Coverage for src/oai/repository.py: 94%
170 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-05 09:56 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-05 09:56 +0000
1from django.conf import settings
3from ptf.model_helpers import get_collection
5from . import oai_helpers
6from .oai_helpers import OAIInfoVisitor
7from .oai_helpers import OAIMetadataPrefixVisitor
8from .oai_helpers import get_collections
9from .oai_helpers import get_oai_item
11MAX_RESULT_SIZE = settings.MAX_RESULT_SIZE
14class Result:
15 """
16 iterator sur le qs passe en param et determine le context et le template
17 en fonction de l'item et param setspec
18 """
20 def __init__(
21 self,
22 setspec,
23 results,
24 cursor,
25 restart,
26 total,
27 metaformat,
28 fd,
29 ud,
30 repositoryIdentifier,
31 header_only=False,
32 ):
33 self.setspec = setspec
34 self.results = results
35 self.cursor = cursor
36 self.restart = restart
37 self.total = total
38 self.px = metaformat
39 self.fd = fd
40 self.ud = ud
41 self.repositoryIdentifier = repositoryIdentifier
42 self.header_only = header_only
44 def __iter__(self):
45 for item in self.results:
46 context = item.accept(OAIInfoVisitor(self.px))
47 context["template"] = self.get_template(item)
48 context["repositoryIdentifier"] = self.repositoryIdentifier
49 yield context
51 def get_template(self, object_):
52 classname = object_.classname.lower()
53 return "oai/" + classname + "_" + self.px + ".xml"
55 def resumptionToken(self):
56 # le protocole est bizarre au niveau des resumption tokens
57 if not self.restart:
58 if self.cursor == 0:
59 return ""
60 return '<resumptionToken completeListSize="{}" cursor="{}"/>'.format(
61 self.total, self.cursor
62 )
63 if self.setspec:
64 return (
65 '<resumptionToken completeListSize="{}"'
66 ' cursor="{}">{}:{}:{}:{}:{}:{}</resumptionToken>'
67 ).format(
68 self.total,
69 self.cursor,
70 self.px,
71 self.restart,
72 self.setspec,
73 self.total,
74 self.fd,
75 self.ud,
76 )
77 return (
78 '<resumptionToken completeListSize="{}"'
79 ' cursor="{}">{}:{}::{}:{}:{}</resumptionToken>'
80 ).format(self.total, self.cursor, self.px, self.restart, self.total, self.fd, self.ud)
83class OAIRepository:
84 def __init__(self, base_url):
85 self.base_url = base_url
86 self.sets_descr = ""
87 self.setspec = ""
88 self.cursor = 0
89 self.fromdate = ""
90 self.untildate = ""
91 self.px = None
93 def Identify(self):
94 pass
96 def setmetaDataFormat(self, fmt):
97 self.px = fmt
99 def setSet(self, setspec):
100 self.setspec = setspec
102 def setfromDate(self, fd):
103 self.fromdate = fd
105 def setuntilDate(self, ud):
106 self.untildate = ud
108 def setresumptionToken(self, token):
109 (px, c, s, t, fromdate, untildate) = token.split(":")
110 cursor = int(c)
111 total = int(t)
112 self.setspec = s
113 if cursor >= total: 113 ↛ 114line 113 didn't jump to line 114 because the condition on line 113 was never true
114 return 0
115 self.px = px
116 self.cursor = cursor
117 self.fromdate = fromdate
118 self.untildate = untildate
119 return 1
121 def get(self, id_):
122 """
123 en fonction de l'id determine le queryset et retourne un Result
124 @param id:
125 @return: Result, template: string,
126 """
127 my_id = self.make_internal_id(id_)
128 items = [get_oai_item(my_id)]
129 setspec = None
130 result = Result(
131 setspec,
132 items,
133 "",
134 "",
135 "",
136 self.px,
137 "",
138 "",
139 repositoryIdentifier=self.Identify()["repositoryIdentifier"],
140 header_only=False,
141 )
142 return result
144 @staticmethod
145 def has_set(setspec):
146 if setspec in ["NUMDAM", "NUMDAM_book", "gallica"]:
147 return True
148 col = get_collection(setspec)
149 if col:
150 return True
151 return False
153 @staticmethod
154 def listsets():
155 collections = get_collections()
156 sets = (
157 {"pid": "NUMDAM", "title": "NUMDAM in eudml-article2"},
158 {"pid": "NUMDAM_book", "title": "NUMDAM_book in eudml - book2"},
159 {"pid": "gallica", "title": "All Collections in oai_dc for Gallica BNF"},
160 )
161 return (collections, sets)
163 @staticmethod
164 def make_internal_id(id_):
165 if len(id_.split(":")) == 3:
166 return id_.split(":")[2] # oai:numdam.org:JTNB_XXX
167 return None
169 def has_identifier(self, id_):
170 my_id = self.make_internal_id(id_)
171 item = get_oai_item(my_id)
172 if item:
173 return True
174 return False
176 def get_items(self, px, setspec):
177 """
178 * renvoie une liste d'item en fonction du setspec et prefix :
179 cf Mathdoc / Services-Projets / Numdam / Documents d'étude et
180 développement / Etudes techniques / OAI_sur_la_PTF_et_etat_des_lieux.md
182 @param setspec:
183 @return: list of items with pagination AND total count of items without pagination
184 """
185 params = {
186 "cursor": self.cursor,
187 "page_size": MAX_RESULT_SIZE,
188 "fromdate": self.fromdate,
189 "untildate": self.untildate,
190 }
192 items = []
193 total = 0
194 col = None
195 if setspec in ["NUMDAM", "NUMDAM_book"]:
196 transform = {"NUMDAM": "eudml-article2", "NUMDAM_book": "eudml-book2"}
197 px = transform[setspec]
198 elif setspec == "gallica":
199 items, total = get_collections(params)
200 return items, total
201 elif setspec:
202 col = get_collection(setspec)
204 restrict_by_date_published = getattr(settings, "OAI_BY_DATE_PUBLISHED", False)
205 if restrict_by_date_published:
206 items, total = oai_helpers.get_articles_by_date_published(params, col)
207 else:
208 klass = "get_items_{}".format(px.replace("-", "_"))
209 items, total = getattr(oai_helpers, klass)(params, col)
211 return items, total
213 def get_next_page(self, items, total):
214 count = len(items)
215 if self.cursor + count >= total:
216 return 0
217 return self.cursor + MAX_RESULT_SIZE
219 def listids(self):
220 items, total = self.get_items(self.px, self.setspec)
221 restart = self.get_next_page(items, total)
222 result = Result(
223 self.setspec,
224 items,
225 self.cursor,
226 restart,
227 total,
228 self.px,
229 self.fromdate,
230 self.untildate,
231 repositoryIdentifier=self.Identify()["repositoryIdentifier"],
232 header_only=True,
233 )
234 return result
236 def listrecs(self):
237 items, total = self.get_items(self.px, self.setspec)
238 restart = self.get_next_page(items, total)
239 result = Result(
240 self.setspec,
241 items,
242 self.cursor,
243 restart,
244 total,
245 self.px,
246 self.fromdate,
247 self.untildate,
248 repositoryIdentifier=self.Identify()["repositoryIdentifier"],
249 header_only=False,
250 )
251 return result
253 @staticmethod
254 def has_format(px, object_=None, setspec=None):
255 """
256 renvoie si le format est supporte par rapport au set ou objet
257 @param px:
258 @param object:
259 @param setspec:
260 @return: True or False
261 """
263 matrice = {
264 "eudml-book2": ["NUMDAM_book"],
265 "eudml-article2": ["NUMDAM"],
266 "oai_dc": ["NUMDAM_book", "NUMDAM", "gallica"],
267 }
268 value = True
269 if px not in matrice:
270 return False
272 if object_: # on regarde le type d'objet
273 klass = "get_items_{}".format(px.replace("-", "_"))
274 items = getattr(oai_helpers, klass)(pid=object_.pid)
275 if len(items) != 1:
276 value = False
277 # cas particulier item est une collection
278 if object_.classname == "Collection" and px == "oai_dc":
279 value = True
281 if setspec in matrice[px]:
282 setspec = None # on est dans le cas des set dédiés ne représentant pas une collection
283 if setspec:
284 value = get_collection(setspec) is not None
286 return value
288 def listmetadataformats(self, identifier=None):
289 oai_dc = {
290 "prefix": "oai_dc",
291 "schema": "http://www.openarchives.org/OAI/2.0/oai_dc.xsd",
292 "namespace": "http://www.openarchives.org/OAI/2.0/oai_dc/",
293 }
294 eudml_article2 = {
295 "prefix": "eudml-article2",
296 "schema": "http://eudml.org/schema/2.0/eudml-article-2.0.xsd",
297 "namespace": "http://jats.nlm.nih.gov",
298 }
299 eudml_book2 = {
300 "prefix": "eudml-book2",
301 "schema": "http://eudml.org/schema/2.0/eudml-book-2.0.xsd",
302 "namespace": "http://eudml.org/schema/2.0/eudml-book",
303 }
305 if identifier is None:
306 repository = self.Identify()
307 if repository["repositoryIdentifier"] == "centre-mersenne.org": 307 ↛ 308line 307 didn't jump to line 308 because the condition on line 307 was never true
308 if repository["base_url"] != "proceedings.centre-mersenne.org/oai":
309 return [oai_dc, eudml_article2]
310 return [oai_dc, eudml_article2, eudml_book2]
312 value = None
313 pid = self.make_internal_id(identifier)
314 item = get_oai_item(pid)
316 if item: 316 ↛ 337line 316 didn't jump to line 337 because the condition on line 316 was always true
317 formats = item.accept(OAIMetadataPrefixVisitor())
318 if formats: 318 ↛ 337line 318 didn't jump to line 337 because the condition on line 318 was always true
319 value = []
320 for px in formats:
321 value.append(locals()[px])
323 #
324 #
325 # if item.classname == 'Article':
326 # article = item.cast()
327 # if article.my_container.ctype == 'issue':
328 # value = [oai_dc, eudml_article2]
329 # else:
330 # value = [oai_dc]
331 #
332 # elif item.classname == 'Collection':
333 # value = [oai_dc]
334 # elif item.ctype.startswith('book'): # container
335 # value = [eudml_book2]
337 return value