Coverage for src/oai/repository.py: 94%

170 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-11-05 09:56 +0000

1from django.conf import settings 

2 

3from ptf.model_helpers import get_collection 

4 

5from . import oai_helpers 

6from .oai_helpers import OAIInfoVisitor 

7from .oai_helpers import OAIMetadataPrefixVisitor 

8from .oai_helpers import get_collections 

9from .oai_helpers import get_oai_item 

10 

11MAX_RESULT_SIZE = settings.MAX_RESULT_SIZE 

12 

13 

14class Result: 

15 """ 

16 iterator sur le qs passe en param et determine le context et le template 

17 en fonction de l'item et param setspec 

18 """ 

19 

20 def __init__( 

21 self, 

22 setspec, 

23 results, 

24 cursor, 

25 restart, 

26 total, 

27 metaformat, 

28 fd, 

29 ud, 

30 repositoryIdentifier, 

31 header_only=False, 

32 ): 

33 self.setspec = setspec 

34 self.results = results 

35 self.cursor = cursor 

36 self.restart = restart 

37 self.total = total 

38 self.px = metaformat 

39 self.fd = fd 

40 self.ud = ud 

41 self.repositoryIdentifier = repositoryIdentifier 

42 self.header_only = header_only 

43 

44 def __iter__(self): 

45 for item in self.results: 

46 context = item.accept(OAIInfoVisitor(self.px)) 

47 context["template"] = self.get_template(item) 

48 context["repositoryIdentifier"] = self.repositoryIdentifier 

49 yield context 

50 

51 def get_template(self, object_): 

52 classname = object_.classname.lower() 

53 return "oai/" + classname + "_" + self.px + ".xml" 

54 

55 def resumptionToken(self): 

56 # le protocole est bizarre au niveau des resumption tokens 

57 if not self.restart: 

58 if self.cursor == 0: 

59 return "" 

60 return '<resumptionToken completeListSize="{}" cursor="{}"/>'.format( 

61 self.total, self.cursor 

62 ) 

63 if self.setspec: 

64 return ( 

65 '<resumptionToken completeListSize="{}"' 

66 ' cursor="{}">{}:{}:{}:{}:{}:{}</resumptionToken>' 

67 ).format( 

68 self.total, 

69 self.cursor, 

70 self.px, 

71 self.restart, 

72 self.setspec, 

73 self.total, 

74 self.fd, 

75 self.ud, 

76 ) 

77 return ( 

78 '<resumptionToken completeListSize="{}"' 

79 ' cursor="{}">{}:{}::{}:{}:{}</resumptionToken>' 

80 ).format(self.total, self.cursor, self.px, self.restart, self.total, self.fd, self.ud) 

81 

82 

83class OAIRepository: 

84 def __init__(self, base_url): 

85 self.base_url = base_url 

86 self.sets_descr = "" 

87 self.setspec = "" 

88 self.cursor = 0 

89 self.fromdate = "" 

90 self.untildate = "" 

91 self.px = None 

92 

93 def Identify(self): 

94 pass 

95 

96 def setmetaDataFormat(self, fmt): 

97 self.px = fmt 

98 

99 def setSet(self, setspec): 

100 self.setspec = setspec 

101 

102 def setfromDate(self, fd): 

103 self.fromdate = fd 

104 

105 def setuntilDate(self, ud): 

106 self.untildate = ud 

107 

108 def setresumptionToken(self, token): 

109 (px, c, s, t, fromdate, untildate) = token.split(":") 

110 cursor = int(c) 

111 total = int(t) 

112 self.setspec = s 

113 if cursor >= total: 113 ↛ 114line 113 didn't jump to line 114 because the condition on line 113 was never true

114 return 0 

115 self.px = px 

116 self.cursor = cursor 

117 self.fromdate = fromdate 

118 self.untildate = untildate 

119 return 1 

120 

121 def get(self, id_): 

122 """ 

123 en fonction de l'id determine le queryset et retourne un Result 

124 @param id: 

125 @return: Result, template: string, 

126 """ 

127 my_id = self.make_internal_id(id_) 

128 items = [get_oai_item(my_id)] 

129 setspec = None 

130 result = Result( 

131 setspec, 

132 items, 

133 "", 

134 "", 

135 "", 

136 self.px, 

137 "", 

138 "", 

139 repositoryIdentifier=self.Identify()["repositoryIdentifier"], 

140 header_only=False, 

141 ) 

142 return result 

143 

144 @staticmethod 

145 def has_set(setspec): 

146 if setspec in ["NUMDAM", "NUMDAM_book", "gallica"]: 

147 return True 

148 col = get_collection(setspec) 

149 if col: 

150 return True 

151 return False 

152 

153 @staticmethod 

154 def listsets(): 

155 collections = get_collections() 

156 sets = ( 

157 {"pid": "NUMDAM", "title": "NUMDAM in eudml-article2"}, 

158 {"pid": "NUMDAM_book", "title": "NUMDAM_book in eudml - book2"}, 

159 {"pid": "gallica", "title": "All Collections in oai_dc for Gallica BNF"}, 

160 ) 

161 return (collections, sets) 

162 

163 @staticmethod 

164 def make_internal_id(id_): 

165 if len(id_.split(":")) == 3: 

166 return id_.split(":")[2] # oai:numdam.org:JTNB_XXX 

167 return None 

168 

169 def has_identifier(self, id_): 

170 my_id = self.make_internal_id(id_) 

171 item = get_oai_item(my_id) 

172 if item: 

173 return True 

174 return False 

175 

176 def get_items(self, px, setspec): 

177 """ 

178 * renvoie une liste d'item en fonction du setspec et prefix : 

179 cf Mathdoc / Services-Projets / Numdam / Documents d'étude et 

180 développement / Etudes techniques / OAI_sur_la_PTF_et_etat_des_lieux.md 

181 

182 @param setspec: 

183 @return: list of items with pagination AND total count of items without pagination 

184 """ 

185 params = { 

186 "cursor": self.cursor, 

187 "page_size": MAX_RESULT_SIZE, 

188 "fromdate": self.fromdate, 

189 "untildate": self.untildate, 

190 } 

191 

192 items = [] 

193 total = 0 

194 col = None 

195 if setspec in ["NUMDAM", "NUMDAM_book"]: 

196 transform = {"NUMDAM": "eudml-article2", "NUMDAM_book": "eudml-book2"} 

197 px = transform[setspec] 

198 elif setspec == "gallica": 

199 items, total = get_collections(params) 

200 return items, total 

201 elif setspec: 

202 col = get_collection(setspec) 

203 

204 restrict_by_date_published = getattr(settings, "OAI_BY_DATE_PUBLISHED", False) 

205 if restrict_by_date_published: 

206 items, total = oai_helpers.get_articles_by_date_published(params, col) 

207 else: 

208 klass = "get_items_{}".format(px.replace("-", "_")) 

209 items, total = getattr(oai_helpers, klass)(params, col) 

210 

211 return items, total 

212 

213 def get_next_page(self, items, total): 

214 count = len(items) 

215 if self.cursor + count >= total: 

216 return 0 

217 return self.cursor + MAX_RESULT_SIZE 

218 

219 def listids(self): 

220 items, total = self.get_items(self.px, self.setspec) 

221 restart = self.get_next_page(items, total) 

222 result = Result( 

223 self.setspec, 

224 items, 

225 self.cursor, 

226 restart, 

227 total, 

228 self.px, 

229 self.fromdate, 

230 self.untildate, 

231 repositoryIdentifier=self.Identify()["repositoryIdentifier"], 

232 header_only=True, 

233 ) 

234 return result 

235 

236 def listrecs(self): 

237 items, total = self.get_items(self.px, self.setspec) 

238 restart = self.get_next_page(items, total) 

239 result = Result( 

240 self.setspec, 

241 items, 

242 self.cursor, 

243 restart, 

244 total, 

245 self.px, 

246 self.fromdate, 

247 self.untildate, 

248 repositoryIdentifier=self.Identify()["repositoryIdentifier"], 

249 header_only=False, 

250 ) 

251 return result 

252 

253 @staticmethod 

254 def has_format(px, object_=None, setspec=None): 

255 """ 

256 renvoie si le format est supporte par rapport au set ou objet 

257 @param px: 

258 @param object: 

259 @param setspec: 

260 @return: True or False 

261 """ 

262 

263 matrice = { 

264 "eudml-book2": ["NUMDAM_book"], 

265 "eudml-article2": ["NUMDAM"], 

266 "oai_dc": ["NUMDAM_book", "NUMDAM", "gallica"], 

267 } 

268 value = True 

269 if px not in matrice: 

270 return False 

271 

272 if object_: # on regarde le type d'objet 

273 klass = "get_items_{}".format(px.replace("-", "_")) 

274 items = getattr(oai_helpers, klass)(pid=object_.pid) 

275 if len(items) != 1: 

276 value = False 

277 # cas particulier item est une collection 

278 if object_.classname == "Collection" and px == "oai_dc": 

279 value = True 

280 

281 if setspec in matrice[px]: 

282 setspec = None # on est dans le cas des set dédiés ne représentant pas une collection 

283 if setspec: 

284 value = get_collection(setspec) is not None 

285 

286 return value 

287 

288 def listmetadataformats(self, identifier=None): 

289 oai_dc = { 

290 "prefix": "oai_dc", 

291 "schema": "http://www.openarchives.org/OAI/2.0/oai_dc.xsd", 

292 "namespace": "http://www.openarchives.org/OAI/2.0/oai_dc/", 

293 } 

294 eudml_article2 = { 

295 "prefix": "eudml-article2", 

296 "schema": "http://eudml.org/schema/2.0/eudml-article-2.0.xsd", 

297 "namespace": "http://jats.nlm.nih.gov", 

298 } 

299 eudml_book2 = { 

300 "prefix": "eudml-book2", 

301 "schema": "http://eudml.org/schema/2.0/eudml-book-2.0.xsd", 

302 "namespace": "http://eudml.org/schema/2.0/eudml-book", 

303 } 

304 

305 if identifier is None: 

306 repository = self.Identify() 

307 if repository["repositoryIdentifier"] == "centre-mersenne.org": 307 ↛ 308line 307 didn't jump to line 308 because the condition on line 307 was never true

308 if repository["base_url"] != "proceedings.centre-mersenne.org/oai": 

309 return [oai_dc, eudml_article2] 

310 return [oai_dc, eudml_article2, eudml_book2] 

311 

312 value = None 

313 pid = self.make_internal_id(identifier) 

314 item = get_oai_item(pid) 

315 

316 if item: 316 ↛ 337line 316 didn't jump to line 337 because the condition on line 316 was always true

317 formats = item.accept(OAIMetadataPrefixVisitor()) 

318 if formats: 318 ↛ 337line 318 didn't jump to line 337 because the condition on line 318 was always true

319 value = [] 

320 for px in formats: 

321 value.append(locals()[px]) 

322 

323 # 

324 # 

325 # if item.classname == 'Article': 

326 # article = item.cast() 

327 # if article.my_container.ctype == 'issue': 

328 # value = [oai_dc, eudml_article2] 

329 # else: 

330 # value = [oai_dc] 

331 # 

332 # elif item.classname == 'Collection': 

333 # value = [oai_dc] 

334 # elif item.ctype.startswith('book'): # container 

335 # value = [eudml_book2] 

336 

337 return value