Coverage for src/oai/oai_helpers.py: 72%

195 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-11-05 09:56 +0000

1from django.conf import settings 

2from django.db.models import Q 

3from django.urls import reverse 

4 

5from ptf.model_helpers import get_first_last_years 

6from ptf.model_helpers import get_resource 

7from ptf.model_helpers import get_volumes_in_collection 

8from ptf.model_helpers import parse_date_str 

9from ptf.models import Article 

10from ptf.models import Collection 

11from ptf.models import Resource 

12 

13 

14def get_oai_item(pid): 

15 """ 

16 verifie si l'id est un object que l'on peut retourner : 

17 - Article 

18 - Container de type Book-monograph ou edited-book n'ayant pas d'issue 

19 - Collection 

20 retourne 1 item en fonction de l'id 

21 @param id: 

22 @return: item OR None 

23 """ 

24 

25 item = get_resource(pid) 

26 if item: 

27 if item.classname == "Container": 

28 item = item.cast() 

29 count_articles = item.article_set.count() 

30 if ( 30 ↛ 35line 30 didn't jump to line 35 because the condition on line 30 was never true

31 item.ctype == "book-lecture-notes" 

32 or item.ctype == "book-series" 

33 or item.ctype == "book-edited-book" 

34 ) and count_articles == 0: 

35 item = None 

36 return item 

37 

38 

39def get_articles_by_date_published(params, col): 

40 """ 

41 retourne uniquement les articles dans le cas de l'option OAI_BY_DATE_PUBLISHED = True dans settings 

42 et donc aucun online_first 

43 @param params: filtres optionnels pour la pagination et la restriction par date 

44 @return: list of Articles 

45 """ 

46 if col: 46 ↛ 47line 46 didn't jump to line 47 because the condition on line 46 was never true

47 q1 = Q( 

48 classname="Article", 

49 my_container__my_collection=col, 

50 date_published__isnull=False, 

51 ) 

52 else: 

53 q1 = Q(classname="Article", date_published__isnull=False) 

54 qs = ( 

55 Article.objects.filter(q1) 

56 .filter(sites__id=settings.SITE_ID) 

57 .exclude(classname="TranslatedArticle") 

58 .order_by("id") 

59 ) 

60 

61 if "fromdate" in params or "untildate" in params: 61 ↛ 71line 61 didn't jump to line 71 because the condition on line 61 was always true

62 fromdate = params["fromdate"] 

63 untildate = params["untildate"] 

64 if fromdate: 64 ↛ 67line 64 didn't jump to line 67 because the condition on line 64 was always true

65 fromdate = parse_date_str(fromdate) 

66 qs = qs.filter(date_published__gte=fromdate) 

67 if untildate: 67 ↛ 71line 67 didn't jump to line 71 because the condition on line 67 was always true

68 untildate = parse_date_str(untildate) 

69 qs = qs.filter(date_published__lte=untildate) 

70 

71 total = qs.count() # necessaire pour resumptionToken 

72 if "cursor" in params and "page_size" in params: 72 ↛ 74line 72 didn't jump to line 74 because the condition on line 72 was always true

73 qs = restrict_for_pagination(qs, params["cursor"], params["page_size"]) 

74 return qs, total 

75 

76 

77def restrictResource_by_date(qs, fromdate=None, untildate=None): 

78 if fromdate: 

79 fromdate = parse_date_str(fromdate) 

80 qs = qs.filter(sitemembership__deployed__gte=fromdate) 

81 if untildate: 

82 untildate = parse_date_str(untildate) 

83 untildate = untildate.replace(hour=23, minute=59, second=59) 

84 

85 qs = qs.filter(sitemembership__deployed__lte=untildate) 

86 return qs 

87 

88 

89def restrict_for_pagination(qs, cursor, page_size): 

90 total = qs.count() 

91 if total > cursor + page_size: 

92 slice_end = cursor + page_size 

93 # restart = slice_end 

94 else: 

95 slice_end = total 

96 # restart = 0 

97 qs = qs[cursor:slice_end] 

98 return qs 

99 

100 

101def restrict_qs(qs, params): 

102 if "fromdate" in params or "untildate" in params: 102 ↛ 104line 102 didn't jump to line 104 because the condition on line 102 was always true

103 qs = restrictResource_by_date(qs, params["fromdate"], params["untildate"]) 

104 total = qs.count() # necessaire pour resumptionToken 

105 if "cursor" in params and "page_size" in params: 105 ↛ 107line 105 didn't jump to line 107 because the condition on line 105 was always true

106 qs = restrict_for_pagination(qs, params["cursor"], params["page_size"]) 

107 return qs, total 

108 

109 

110def get_collections(params=None): 

111 """ 

112 retourne toutes les collections 

113 en params : filtres optionnels pour la pagination et la restriction par date 

114 @param params: 'cursor': self.cursor, 

115 'page_size': MAX_RESULT_SIZE, 

116 'fromdate': self.fromdate, 

117 'untildate': self.untildate} 

118 @return: list of items 

119 """ 

120 qs = Collection.objects.filter(sites__id=settings.SITE_ID) 

121 qs = qs.order_by("id") 

122 if params: 

123 qs, total = restrict_qs(qs, params) 

124 return qs.all(), total 

125 return qs.all() 

126 

127 

128def get_items_eudml_article2(params=None, col=None, pid=None): 

129 """ 

130 retourne les articles non contenu dans des livres 

131 en params : filtres optionnels pour la pagination et la restriction par date 

132 @param params: 

133 @return: 

134 """ 

135 qs = Article.objects.filter(my_container__ctype="issue", sites__id=settings.SITE_ID).order_by( 

136 "id" 

137 ) 

138 if col: 

139 qs = qs.filter(my_container__my_collection=col) 

140 if pid: 

141 qs = qs.filter(pid=pid) 

142 if params: 

143 qs, total = restrict_qs(qs, params) 

144 return qs.all(), total 

145 return qs.all() 

146 

147 

148def get_items_eudml_book2(params=None, col=None, pid=None): 

149 """ 

150 pour le format eudml-book2, on doit retourner les monograph au niveau du container - un record par container (avec les book-parts) 

151 et les edited-books au niveau book-parts - un record par book-part si ils ont des book part, sinon retourner le niveau container 

152 @param params: filtres optionnels pagination et restriction par date 

153 @return: 

154 """ 

155 

156 if col: 

157 q1 = Q( 

158 classname="Article", 

159 article__my_container__ctype="book-edited-book", 

160 article__my_container__my_collection=col, 

161 ) 

162 q2 = Q( 

163 classname="Container", 

164 container__ctype__in=["book-monograph", "book-lecture-notes"], 

165 container__my_collection=col, 

166 ) 

167 else: 

168 q1 = Q(classname="Article", article__my_container__ctype="book-edited-book") 

169 q2 = Q( 

170 classname="Container", 

171 container__ctype__in=[ 

172 "book-monograph", 

173 "book-lecture-notes", 

174 "book-series", 

175 "book-edited-book", 

176 ], 

177 ) 

178 

179 qs = Resource.objects.filter(q1 | q2).filter(sites__id=settings.SITE_ID).order_by("id") 

180 

181 if pid: 

182 qs = qs.filter(pid=pid) 

183 

184 # requete quand on ne retournait que des containers pour eudml-book2 

185 # qs = Container.objects.filter(sites__id=settings.SITE_ID, ctype__startswith='book').order_by('id') 

186 if params: 

187 qs, total = restrict_qs(qs, params) 

188 return qs.all(), total 

189 return qs.all() 

190 

191 

192# def get_objects_from_collection(col, params=None): 

193# """ 

194# retourne les objects contenu dans une collection : 

195# pour une book-series ou des lecture-notes : retourne des containers 

196# pour le reste retourne des articles 

197# @param params: filtres optionnels pour la pagination et la restriction par date 

198# @return: 

199# """ 

200# 

201# # TODO : these : classe derivee ? 

202# if col.coltype in ['book-series', 'these', 'lecture-notes'] : 

203# qs = Container.objects.filter(sites__id=settings.SITE_ID, my_collection=col).order_by('id') 

204# else: 

205# qs = Article.objects.filter( sites__id=settings.SITE_ID, my_container__my_collection=col).order_by('id') 

206# if params: 

207# qs, total = restrict_qs(qs, params) 

208# return qs.all(), total 

209# return qs.all() 

210 

211 

212def get_items_oai_dc(params=None, col=None, pid=None): 

213 """ 

214 retourne les Articles / les Book-parts d'edited-book et book-monograph(niveau container) 

215 @param params: filtres optionnels pour la pagination et la restriction par date 

216 @return: list of Resources 

217 """ 

218 if col: 

219 q1 = Q(classname="Article", article__my_container__my_collection=col) 

220 q2 = Q( 

221 classname="Container", 

222 container__ctype__in=[ 

223 "book-monograph", 

224 "book-lecture-notes", 

225 "book-edited-book", 

226 "book-series", 

227 ], 

228 container__my_collection=col, 

229 ) 

230 else: 

231 q1 = Q(classname="Article") 

232 q2 = Q( 

233 classname="Container", 

234 container__ctype__in=[ 

235 "book-monograph", 

236 "book-lecture-notes", 

237 "book-edited-book", 

238 "book-series", 

239 ], 

240 ) 

241 q3 = ~Q( 

242 classname="Article", 

243 article__my_container__ctype__in=["book-monograph", "book-lecture-notes"], 

244 ) 

245 

246 qs = Resource.objects.filter((q1 & q3) | q2).filter(sites__id=settings.SITE_ID).order_by("id") 

247 

248 # qs = Article.objects.filter(sites__id=settings.SITE_ID).order_by('id') 

249 if pid: 

250 qs = qs.filter(pid=pid) 

251 

252 if params: 

253 qs, total = restrict_qs(qs, params) 

254 return qs.all(), total 

255 return qs.all() 

256 

257 

258def get_publishers_for_GALLICA(collection): 

259 """ 

260 retourne la liste des publishers des volumes d'un journal OU d'une collection avec les dates 

261 @param object : Collection 

262 @return: [] : list of publishers with dates 

263 """ 

264 publishers = [] 

265 list = collection.content.all().order_by("id") 

266 

267 for container in list: 

268 container = container.cast() 

269 # une date year peut-être sous la forme YYYY-YYYY : issue comprenant 

270 # des articles de 2 années 

271 if len(container.year.split("-")) > 1: 

272 datedeb = container.year.split("-")[0] 

273 datefin = container.year.split("-")[1] 

274 else: 

275 datedeb = datefin = container.year 

276 if container.my_publisher: 

277 if publishers: 

278 item = publishers[0] 

279 if container.my_publisher.pub_name != item["name"]: 

280 item = { 

281 "name": container.my_publisher.pub_name, 

282 "datedeb": datedeb, 

283 "datefin": datefin, 

284 } 

285 publishers.insert(0, item) 

286 else: 

287 if datedeb < item["datedeb"]: 

288 item["datedeb"] = datedeb 

289 if datefin > item["datefin"]: 

290 item["datefin"] = datefin 

291 else: 

292 publishers.insert( 

293 0, 

294 { 

295 "name": container.my_publisher.pub_name, 

296 "datedeb": datedeb, 

297 "datefin": datefin, 

298 }, 

299 ) 

300 return publishers 

301 

302 

303def get_containers_for_GALLICA(collection): 

304 """ 

305 retourne une liste ordonee volume/annee des volumes d'un journal ou des books d'une collection 

306 volume[volume_int]{'title': TODO, 

307 'vseries' : vserie_int, 

308 'index' : volume_int, 

309 'fyear': int 'first year', 

310 'lyear': int 'last year', 

311 } 

312 @param object: Collection 

313 @return: list 

314 """ 

315 # TODO : ceci n'est pas une liste ordonnée : faire avec une liste 

316 volumes = {} 

317 containers = collection.content.all().order_by("id") 

318 

319 for container in containers: 

320 # on verifie si ce volume existe deja 

321 year = container.year 

322 fyear, lyear = get_first_last_years(year) 

323 fyear = int(fyear) 

324 lyear = int(lyear) 

325 

326 if container.volume_int in volumes: 

327 volume = volumes[container.volume_int] 

328 

329 if volume["fyear"] > fyear: 

330 volume["fyear"] = fyear 

331 if volume["lyear"] < lyear: 

332 volume["lyear"] = lyear 

333 

334 volume["title"] = container.title_tex 

335 volume["vseries"] = container.vseries_int 

336 volume["index"] = container.volume_int 

337 volumes[container.volume_int] = volume 

338 

339 else: 

340 volume = { 

341 "title": container.title_tex, 

342 "index": container.volume_int, 

343 "vseries": container.vseries_int, 

344 "fyear": fyear, 

345 "lyear": lyear, 

346 } 

347 volumes[container.volume_int] = volume 

348 

349 # for container in containers: 

350 # 

351 # my_hash = {'vseries': container.vseries, 'volume': container.volume, 'year': container.year} 

352 # # if container.vseries != '': 

353 # # text = container.vseries + 'e série, ' 

354 # # text += "Vol. " + container.volume + ", " + container.year 

355 # if my_hash not in list: 

356 # list.append(my_hash) 

357 

358 return volumes 

359 

360 

361class OAIInfoVisitor: 

362 def __init__(self, px): 

363 self.px = px 

364 

365 def visit(self, resource): 

366 meth = getattr(self, "visit" + resource.classname) 

367 return meth(resource) 

368 

369 def visitArticle(self, article): 

370 setspec = [] 

371 # EuDML harvests Numdam metadata and expects a "NUMDAM" set 

372 if self.px == "eudml-article2" and settings.SITE_NAME == "numdam": 

373 setspec.append("NUMDAM") 

374 if self.px == "eudml-book2" and settings.SITE_NAME == "numdam": 

375 setspec.append("NUMDAM_book") 

376 

377 setspec.append(article.my_container.my_collection.pid) 

378 

379 return {"item": article, "setspec": setspec} 

380 

381 def visitCollection(self, collection): 

382 result = {} 

383 result["item"] = collection 

384 

385 context = get_volumes_in_collection(collection) 

386 result["publishers"] = context["publishers"] 

387 result["issues_in_vseries"] = context["sorted_issues"] 

388 result["vols_number"] = context["volume_count"] 

389 # result['publishers'] = get_publishers_for_GALLICA(collection) 

390 # result['issues'] = get_containers_for_GALLICA(collection) 

391 result["url"] = "{}{}".format( 

392 settings.SITE_DOMAIN, reverse("journal-issues", args=[collection.pid]) 

393 ) 

394 result["icon"] = collection.icon() 

395 result["setspec"] = ["gallica"] 

396 return result 

397 

398 def visitContainer(self, container): 

399 references = False 

400 if self.px == "eudml-book2": 

401 # on regarde si type monograph avec book-parts et si les book part 

402 # ont une biblio 

403 if ( 

404 container.ctype in ["book-monograph", "book-lecture-notes"] 

405 and container.article_set.count() > 0 

406 ): 

407 for article in container.article_set.all(): 

408 if article.bibitem_set.count() > 0: 408 ↛ 407line 408 didn't jump to line 407 because the condition on line 408 was always true

409 references = True 

410 

411 return { 

412 "item": container, 

413 "setspec": ["NUMDAM_book", container.my_collection.pid], 

414 "references": references, 

415 } 

416 

417 

418class OAIMetadataPrefixVisitor: 

419 def visit(self, resource): 

420 meth = getattr(self, "visit" + resource.classname) 

421 return meth(resource) 

422 

423 def visitArticle(self, article): 

424 if article.my_container.ctype == "book-edited-book": 424 ↛ 425line 424 didn't jump to line 425 because the condition on line 424 was never true

425 return ["oai_dc", "eudml_book2"] 

426 elif article.my_container.ctype == "issue": 426 ↛ 428line 426 didn't jump to line 428 because the condition on line 426 was always true

427 return ["oai_dc", "eudml_article2"] 

428 return None 

429 

430 def visitCollection(self, collection): 

431 return ["oai_dc"] 

432 

433 def visitContainer(self, container): 

434 if container.ctype == "book-monograph" or container.ctype == "book-lecture-notes": 434 ↛ 436line 434 didn't jump to line 436 because the condition on line 434 was always true

435 return ["oai_dc", "eudml_book2"] 

436 return None