Coverage for src/oai/oai_helpers.py: 72%
195 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-05 09:56 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-05 09:56 +0000
1from django.conf import settings
2from django.db.models import Q
3from django.urls import reverse
5from ptf.model_helpers import get_first_last_years
6from ptf.model_helpers import get_resource
7from ptf.model_helpers import get_volumes_in_collection
8from ptf.model_helpers import parse_date_str
9from ptf.models import Article
10from ptf.models import Collection
11from ptf.models import Resource
14def get_oai_item(pid):
15 """
16 verifie si l'id est un object que l'on peut retourner :
17 - Article
18 - Container de type Book-monograph ou edited-book n'ayant pas d'issue
19 - Collection
20 retourne 1 item en fonction de l'id
21 @param id:
22 @return: item OR None
23 """
25 item = get_resource(pid)
26 if item:
27 if item.classname == "Container":
28 item = item.cast()
29 count_articles = item.article_set.count()
30 if ( 30 ↛ 35line 30 didn't jump to line 35 because the condition on line 30 was never true
31 item.ctype == "book-lecture-notes"
32 or item.ctype == "book-series"
33 or item.ctype == "book-edited-book"
34 ) and count_articles == 0:
35 item = None
36 return item
39def get_articles_by_date_published(params, col):
40 """
41 retourne uniquement les articles dans le cas de l'option OAI_BY_DATE_PUBLISHED = True dans settings
42 et donc aucun online_first
43 @param params: filtres optionnels pour la pagination et la restriction par date
44 @return: list of Articles
45 """
46 if col: 46 ↛ 47line 46 didn't jump to line 47 because the condition on line 46 was never true
47 q1 = Q(
48 classname="Article",
49 my_container__my_collection=col,
50 date_published__isnull=False,
51 )
52 else:
53 q1 = Q(classname="Article", date_published__isnull=False)
54 qs = (
55 Article.objects.filter(q1)
56 .filter(sites__id=settings.SITE_ID)
57 .exclude(classname="TranslatedArticle")
58 .order_by("id")
59 )
61 if "fromdate" in params or "untildate" in params: 61 ↛ 71line 61 didn't jump to line 71 because the condition on line 61 was always true
62 fromdate = params["fromdate"]
63 untildate = params["untildate"]
64 if fromdate: 64 ↛ 67line 64 didn't jump to line 67 because the condition on line 64 was always true
65 fromdate = parse_date_str(fromdate)
66 qs = qs.filter(date_published__gte=fromdate)
67 if untildate: 67 ↛ 71line 67 didn't jump to line 71 because the condition on line 67 was always true
68 untildate = parse_date_str(untildate)
69 qs = qs.filter(date_published__lte=untildate)
71 total = qs.count() # necessaire pour resumptionToken
72 if "cursor" in params and "page_size" in params: 72 ↛ 74line 72 didn't jump to line 74 because the condition on line 72 was always true
73 qs = restrict_for_pagination(qs, params["cursor"], params["page_size"])
74 return qs, total
77def restrictResource_by_date(qs, fromdate=None, untildate=None):
78 if fromdate:
79 fromdate = parse_date_str(fromdate)
80 qs = qs.filter(sitemembership__deployed__gte=fromdate)
81 if untildate:
82 untildate = parse_date_str(untildate)
83 untildate = untildate.replace(hour=23, minute=59, second=59)
85 qs = qs.filter(sitemembership__deployed__lte=untildate)
86 return qs
89def restrict_for_pagination(qs, cursor, page_size):
90 total = qs.count()
91 if total > cursor + page_size:
92 slice_end = cursor + page_size
93 # restart = slice_end
94 else:
95 slice_end = total
96 # restart = 0
97 qs = qs[cursor:slice_end]
98 return qs
101def restrict_qs(qs, params):
102 if "fromdate" in params or "untildate" in params: 102 ↛ 104line 102 didn't jump to line 104 because the condition on line 102 was always true
103 qs = restrictResource_by_date(qs, params["fromdate"], params["untildate"])
104 total = qs.count() # necessaire pour resumptionToken
105 if "cursor" in params and "page_size" in params: 105 ↛ 107line 105 didn't jump to line 107 because the condition on line 105 was always true
106 qs = restrict_for_pagination(qs, params["cursor"], params["page_size"])
107 return qs, total
110def get_collections(params=None):
111 """
112 retourne toutes les collections
113 en params : filtres optionnels pour la pagination et la restriction par date
114 @param params: 'cursor': self.cursor,
115 'page_size': MAX_RESULT_SIZE,
116 'fromdate': self.fromdate,
117 'untildate': self.untildate}
118 @return: list of items
119 """
120 qs = Collection.objects.filter(sites__id=settings.SITE_ID)
121 qs = qs.order_by("id")
122 if params:
123 qs, total = restrict_qs(qs, params)
124 return qs.all(), total
125 return qs.all()
128def get_items_eudml_article2(params=None, col=None, pid=None):
129 """
130 retourne les articles non contenu dans des livres
131 en params : filtres optionnels pour la pagination et la restriction par date
132 @param params:
133 @return:
134 """
135 qs = Article.objects.filter(my_container__ctype="issue", sites__id=settings.SITE_ID).order_by(
136 "id"
137 )
138 if col:
139 qs = qs.filter(my_container__my_collection=col)
140 if pid:
141 qs = qs.filter(pid=pid)
142 if params:
143 qs, total = restrict_qs(qs, params)
144 return qs.all(), total
145 return qs.all()
148def get_items_eudml_book2(params=None, col=None, pid=None):
149 """
150 pour le format eudml-book2, on doit retourner les monograph au niveau du container - un record par container (avec les book-parts)
151 et les edited-books au niveau book-parts - un record par book-part si ils ont des book part, sinon retourner le niveau container
152 @param params: filtres optionnels pagination et restriction par date
153 @return:
154 """
156 if col:
157 q1 = Q(
158 classname="Article",
159 article__my_container__ctype="book-edited-book",
160 article__my_container__my_collection=col,
161 )
162 q2 = Q(
163 classname="Container",
164 container__ctype__in=["book-monograph", "book-lecture-notes"],
165 container__my_collection=col,
166 )
167 else:
168 q1 = Q(classname="Article", article__my_container__ctype="book-edited-book")
169 q2 = Q(
170 classname="Container",
171 container__ctype__in=[
172 "book-monograph",
173 "book-lecture-notes",
174 "book-series",
175 "book-edited-book",
176 ],
177 )
179 qs = Resource.objects.filter(q1 | q2).filter(sites__id=settings.SITE_ID).order_by("id")
181 if pid:
182 qs = qs.filter(pid=pid)
184 # requete quand on ne retournait que des containers pour eudml-book2
185 # qs = Container.objects.filter(sites__id=settings.SITE_ID, ctype__startswith='book').order_by('id')
186 if params:
187 qs, total = restrict_qs(qs, params)
188 return qs.all(), total
189 return qs.all()
192# def get_objects_from_collection(col, params=None):
193# """
194# retourne les objects contenu dans une collection :
195# pour une book-series ou des lecture-notes : retourne des containers
196# pour le reste retourne des articles
197# @param params: filtres optionnels pour la pagination et la restriction par date
198# @return:
199# """
200#
201# # TODO : these : classe derivee ?
202# if col.coltype in ['book-series', 'these', 'lecture-notes'] :
203# qs = Container.objects.filter(sites__id=settings.SITE_ID, my_collection=col).order_by('id')
204# else:
205# qs = Article.objects.filter( sites__id=settings.SITE_ID, my_container__my_collection=col).order_by('id')
206# if params:
207# qs, total = restrict_qs(qs, params)
208# return qs.all(), total
209# return qs.all()
212def get_items_oai_dc(params=None, col=None, pid=None):
213 """
214 retourne les Articles / les Book-parts d'edited-book et book-monograph(niveau container)
215 @param params: filtres optionnels pour la pagination et la restriction par date
216 @return: list of Resources
217 """
218 if col:
219 q1 = Q(classname="Article", article__my_container__my_collection=col)
220 q2 = Q(
221 classname="Container",
222 container__ctype__in=[
223 "book-monograph",
224 "book-lecture-notes",
225 "book-edited-book",
226 "book-series",
227 ],
228 container__my_collection=col,
229 )
230 else:
231 q1 = Q(classname="Article")
232 q2 = Q(
233 classname="Container",
234 container__ctype__in=[
235 "book-monograph",
236 "book-lecture-notes",
237 "book-edited-book",
238 "book-series",
239 ],
240 )
241 q3 = ~Q(
242 classname="Article",
243 article__my_container__ctype__in=["book-monograph", "book-lecture-notes"],
244 )
246 qs = Resource.objects.filter((q1 & q3) | q2).filter(sites__id=settings.SITE_ID).order_by("id")
248 # qs = Article.objects.filter(sites__id=settings.SITE_ID).order_by('id')
249 if pid:
250 qs = qs.filter(pid=pid)
252 if params:
253 qs, total = restrict_qs(qs, params)
254 return qs.all(), total
255 return qs.all()
258def get_publishers_for_GALLICA(collection):
259 """
260 retourne la liste des publishers des volumes d'un journal OU d'une collection avec les dates
261 @param object : Collection
262 @return: [] : list of publishers with dates
263 """
264 publishers = []
265 list = collection.content.all().order_by("id")
267 for container in list:
268 container = container.cast()
269 # une date year peut-être sous la forme YYYY-YYYY : issue comprenant
270 # des articles de 2 années
271 if len(container.year.split("-")) > 1:
272 datedeb = container.year.split("-")[0]
273 datefin = container.year.split("-")[1]
274 else:
275 datedeb = datefin = container.year
276 if container.my_publisher:
277 if publishers:
278 item = publishers[0]
279 if container.my_publisher.pub_name != item["name"]:
280 item = {
281 "name": container.my_publisher.pub_name,
282 "datedeb": datedeb,
283 "datefin": datefin,
284 }
285 publishers.insert(0, item)
286 else:
287 if datedeb < item["datedeb"]:
288 item["datedeb"] = datedeb
289 if datefin > item["datefin"]:
290 item["datefin"] = datefin
291 else:
292 publishers.insert(
293 0,
294 {
295 "name": container.my_publisher.pub_name,
296 "datedeb": datedeb,
297 "datefin": datefin,
298 },
299 )
300 return publishers
303def get_containers_for_GALLICA(collection):
304 """
305 retourne une liste ordonee volume/annee des volumes d'un journal ou des books d'une collection
306 volume[volume_int]{'title': TODO,
307 'vseries' : vserie_int,
308 'index' : volume_int,
309 'fyear': int 'first year',
310 'lyear': int 'last year',
311 }
312 @param object: Collection
313 @return: list
314 """
315 # TODO : ceci n'est pas une liste ordonnée : faire avec une liste
316 volumes = {}
317 containers = collection.content.all().order_by("id")
319 for container in containers:
320 # on verifie si ce volume existe deja
321 year = container.year
322 fyear, lyear = get_first_last_years(year)
323 fyear = int(fyear)
324 lyear = int(lyear)
326 if container.volume_int in volumes:
327 volume = volumes[container.volume_int]
329 if volume["fyear"] > fyear:
330 volume["fyear"] = fyear
331 if volume["lyear"] < lyear:
332 volume["lyear"] = lyear
334 volume["title"] = container.title_tex
335 volume["vseries"] = container.vseries_int
336 volume["index"] = container.volume_int
337 volumes[container.volume_int] = volume
339 else:
340 volume = {
341 "title": container.title_tex,
342 "index": container.volume_int,
343 "vseries": container.vseries_int,
344 "fyear": fyear,
345 "lyear": lyear,
346 }
347 volumes[container.volume_int] = volume
349 # for container in containers:
350 #
351 # my_hash = {'vseries': container.vseries, 'volume': container.volume, 'year': container.year}
352 # # if container.vseries != '':
353 # # text = container.vseries + 'e série, '
354 # # text += "Vol. " + container.volume + ", " + container.year
355 # if my_hash not in list:
356 # list.append(my_hash)
358 return volumes
361class OAIInfoVisitor:
362 def __init__(self, px):
363 self.px = px
365 def visit(self, resource):
366 meth = getattr(self, "visit" + resource.classname)
367 return meth(resource)
369 def visitArticle(self, article):
370 setspec = []
371 # EuDML harvests Numdam metadata and expects a "NUMDAM" set
372 if self.px == "eudml-article2" and settings.SITE_NAME == "numdam":
373 setspec.append("NUMDAM")
374 if self.px == "eudml-book2" and settings.SITE_NAME == "numdam":
375 setspec.append("NUMDAM_book")
377 setspec.append(article.my_container.my_collection.pid)
379 return {"item": article, "setspec": setspec}
381 def visitCollection(self, collection):
382 result = {}
383 result["item"] = collection
385 context = get_volumes_in_collection(collection)
386 result["publishers"] = context["publishers"]
387 result["issues_in_vseries"] = context["sorted_issues"]
388 result["vols_number"] = context["volume_count"]
389 # result['publishers'] = get_publishers_for_GALLICA(collection)
390 # result['issues'] = get_containers_for_GALLICA(collection)
391 result["url"] = "{}{}".format(
392 settings.SITE_DOMAIN, reverse("journal-issues", args=[collection.pid])
393 )
394 result["icon"] = collection.icon()
395 result["setspec"] = ["gallica"]
396 return result
398 def visitContainer(self, container):
399 references = False
400 if self.px == "eudml-book2":
401 # on regarde si type monograph avec book-parts et si les book part
402 # ont une biblio
403 if (
404 container.ctype in ["book-monograph", "book-lecture-notes"]
405 and container.article_set.count() > 0
406 ):
407 for article in container.article_set.all():
408 if article.bibitem_set.count() > 0: 408 ↛ 407line 408 didn't jump to line 407 because the condition on line 408 was always true
409 references = True
411 return {
412 "item": container,
413 "setspec": ["NUMDAM_book", container.my_collection.pid],
414 "references": references,
415 }
418class OAIMetadataPrefixVisitor:
419 def visit(self, resource):
420 meth = getattr(self, "visit" + resource.classname)
421 return meth(resource)
423 def visitArticle(self, article):
424 if article.my_container.ctype == "book-edited-book": 424 ↛ 425line 424 didn't jump to line 425 because the condition on line 424 was never true
425 return ["oai_dc", "eudml_book2"]
426 elif article.my_container.ctype == "issue": 426 ↛ 428line 426 didn't jump to line 428 because the condition on line 426 was always true
427 return ["oai_dc", "eudml_article2"]
428 return None
430 def visitCollection(self, collection):
431 return ["oai_dc"]
433 def visitContainer(self, container):
434 if container.ctype == "book-monograph" or container.ctype == "book-lecture-notes": 434 ↛ 436line 434 didn't jump to line 436 because the condition on line 434 was always true
435 return ["oai_dc", "eudml_book2"]
436 return None