Coverage for src/ptf/model_data.py: 91%
260 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-05 09:56 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-05 09:56 +0000
1##################################################################################################
2#
3# README
4#
5# pivot classes for PTF objects (articles, issues,...).
6# They only have data members and are independent of XML, relational database and search engine.
7# They can be used directly by cmds
8# Parsers (JATS), Comparators,... inherit from these classes and add functions (parse_tree, compare)
9#
10##################################################################################################
13# Simple object for compatibility reason:
14# - in xml_cmds/add_relations
15# - in xml_cmds/find_collection (needs an incollection.collection.pid)
17from typing import TYPE_CHECKING
18from typing import Literal
19from typing import NotRequired
20from typing import TypedDict
21from typing import Unpack
23if TYPE_CHECKING: 23 ↛ 24line 23 didn't jump to line 24 because the condition on line 23 was never true
24 from ptf.models import Collection
27class Foo:
28 pass
31class AbstractDict(TypedDict): # Inferred from usage
32 tag: Literal["abstract", "biblio", "avertissement", "note", "description", "intro", "toc"]
33 value_html: NotRequired[str]
34 value_xml: NotRequired[str]
35 value_tex: str
36 lang: str
39class ResourceData:
40 def __init__(self, *args, **kwargs):
41 super().__init__()
43 self.lang = "und"
45 self.pid: str | None = None
46 self.doi: str | None = None
48 self.url: str | None = None
50 self.title_xml = ""
51 self.title_tex = ""
52 self.title_html = ""
53 self.trans_lang = "und"
54 self.trans_title_html = ""
55 self.trans_title_tex = ""
56 self.trans_title_xml = ""
58 # Common to articles, books, book-parts
59 self.abstracts: list[AbstractDict] = []
60 self.bibitems: list[RefData] = []
61 # TODO: Remove bibitem. This is used for solrCmds. solrCmds should use bibitems instead.
62 self.bibitem = []
63 self.awards = []
64 self.relations = []
66 self.ids = [] # Other id of the resource. Create ResourceId in the DB
67 # id given by an external source (MR, ZBL...). Create an ExtId in the DB
68 self.extids: list[tuple[str, str]] = []
70 self.ext_links: list[
71 ExtLinkDict
72 ] = (
73 []
74 ) # <ext-link> can contain MR/ZBL... In this case, ExtLink are not created, only ExtId
75 self.streams = []
76 self.related_objects = []
78 self.counts = []
80 self.contributors: list[ContributorDict] = []
81 self.kwds: list[SubjDict] = []
82 self.kwd_groups = []
83 self.subjs: list[str] = []
84 self.subj_groups = []
86 self.figures = []
87 self.supplementary_materials = []
89 self.funding_statement_html = ""
90 self.funding_statement_xml = ""
91 self.footnotes_html = ""
92 self.footnotes_xml = ""
94 self.body_html = ""
95 self.body_tex = ""
96 self.body_xml = ""
97 self.body = ""
100# Found in col.xml
101# It is the main way to create a collection in PTF
102# The upload urls will end up create MathdocPublicationData
103class MathdocPublicationData(ResourceData):
104 def __init__(self, *args, **kwargs):
105 super().__init__(*args, **kwargs)
107 self.coltype: str | None = None
108 self.pid: str | None = None
109 self.issn = None
110 self.e_issn = None
111 self.wall = 0
112 self.provider = None
113 self.abbrev = ""
116class PublisherData:
117 def __init__(self, *args, **kwargs):
118 super().__init__()
119 self.name: str | None = None
120 self.loc = ""
121 # Note: add_publisher in xml_cmds tries to create ExtLinks for Publishers
122 # But this is not possible in JATS:
123 # <publisher> only has <publisher-name> and <publisher-location>
124 # TODO: remove ext_links ?
125 self.ext_links = []
128# JournalData typically comes from a <journal-meta> inside a <journal-issue>
129# It is not the main way to create a Journal, but can be used to create a collection on the fly.
130# In this case, it will get some attributes from its parent collection (ex: coltype)
131class JournalData(ResourceData):
132 def __init__(self, *args, **kwargs):
133 super().__init__(*args, **kwargs)
135 self.coltype = None
136 self.publisher = None
137 self.provider = None
138 self.issn = None
139 self.e_issn = None
140 self.wall = 0
141 self.abbrev = ""
144class IssueData(ResourceData):
145 merged_year: str | None
146 year: str | None
148 def __init__(self, *args, **kwargs):
149 super().__init__(*args, **kwargs)
151 self.journal: Collection | None = None
152 self.publisher: PublisherData | None = None
153 self.provider = None
154 self.ctype = "issue"
155 self.year = ""
156 self.vseries = ""
157 self.volume = ""
158 self.number = ""
159 self.last_modified_iso_8601_date_str: str | None = None
160 self.prod_deployed_date_iso_8601_date_str: str | None = None
161 self.articles: list[ArticleData] = []
162 self.with_online_first = False
163 self.seq = 0
165 def __iter__(self):
166 yield from self.articles
169class ArticleData(ResourceData):
170 def __init__(self, *args, **kwargs):
171 super().__init__(*args, **kwargs)
173 self.atype = ""
175 self.seq = 0
176 self.fpage = self.lpage = self.page_range = self.size = ""
177 self.page_type = ""
179 self.article_number = ""
180 self.talk_number = ""
182 self.elocation = ""
183 self.history_dates = []
184 self.prod_deployed_date_iso_8601_date_str: str | None = None
185 self.date_published_iso_8601_date_str: str | None = None
187 self.pid = None
189 self.coi_statement = "" # Conflict of interest
191 # list of ArticleData, translation of the article by others
192 self.translations: list[ArticleData] = []
195class RefData(ResourceData):
196 # TODO: remove lang ? It is not used by Bibitem.
198 def __init__(self, *args, lang, **kwargs):
199 super().__init__(*args, **kwargs)
201 self.lang = lang
202 self.user_id = ""
203 self.label = ""
204 self.label_prefix = self.label_suffix = ""
205 self.citation_xml = ""
206 self.citation_html: str | None = None
207 self.citation_tex: str | None = None
208 self.type = "misc"
209 self.publisher_name = ""
210 self.publisher_loc = ""
211 self.institution = ""
212 self.series = ""
213 self.volume = ""
214 self.issue = ""
215 self.month = ""
216 self.year = ""
217 self.comment = ""
218 self.annotation = ""
219 self.fpage = ""
220 self.lpage = ""
221 self.page_range = ""
222 self.size = ""
223 self.source_tex = ""
224 self.article_title_tex = ""
225 self.chapter_title_tex = ""
228# Incollection found in books
229# Mainly used to find the book number in its collection
230class CollectionData(ResourceData):
231 def __init__(self, *args, **kwargs):
232 super().__init__(*args, **kwargs)
234 self.coltype = None
235 self.issn = None
236 self.e_issn = None
237 self.volume = ""
238 self.vseries = ""
239 self.seq = 0
242class BookData(ResourceData):
243 def __init__(self, *args, **kwargs):
244 super().__init__(*args, **kwargs)
246 book_type = "Book"
247 self.ctype = "book-" + book_type
248 self.frontmatter_xml = None
249 self.frontmatter_toc_html = None
250 self.frontmatter_foreword_html = None
251 self.incollection = []
252 self.publisher = None
253 self.provider = None
254 self.parts = []
255 self.body = ""
256 self.seq = 0
258 self.last_modified_iso_8601_date_str = None
259 self.prod_deployed_date_iso_8601_date_str = None
262class BookPartData(ArticleData):
263 def __init__(self, *args, **kwargs):
264 super().__init__(*args, **kwargs)
266 self.atype = ""
267 self.fpage = self.lpage = self.page_range = self.page_type = ""
268 self.frontmatter_xml = None
269 self.frontmatter_toc_html = None
270 self.frontmatter_foreword_html = None
271 self.parts = []
272 self.body = None
275def create_refdata(lang="und", doi=None):
276 data = RefData(lang=lang)
277 data.type = "unknown"
278 data.citation_html = ""
279 data.citation_tex = ""
280 data.citation_xml = '<label></label><mixed-citation xml:space="preserve"></mixed_citation>'
282 if doi is not None:
283 data.doi = doi
284 data.extids.append(("doi", doi))
286 return data
289def create_articledata(doi: str | None = None):
290 data = ArticleData(doi=doi)
291 return data
294def create_issuedata():
295 data = IssueData()
296 return data
299def create_bookdata():
300 data = BookData()
301 return data
304def create_publicationdata():
305 data = MathdocPublicationData()
306 return data
309def create_collectiondata():
310 data = CollectionData()
311 return data
314def create_publisherdata():
315 data = PublisherData()
316 return data
319class ContributorDict(TypedDict, total=False): # Inferred from usage
320 orcid: str
321 idref: str
322 mid: str
323 first_name: str
324 last_name: str
325 prefix: str
326 suffix: str
327 email: str
328 string_name: str
329 addresses: list[str]
330 address_text: str
331 role: Literal["author", "editor", ""]
332 deceased_before_publication: bool
333 equal_contrib: bool
334 contrib_xml: str
335 corresponding: bool
336 seq: int
339def create_contributor(**kwargs: Unpack[ContributorDict]) -> ContributorDict:
340 default: ContributorDict = {
341 "orcid": "",
342 "idref": "",
343 "mid": "",
344 "first_name": "",
345 "last_name": "",
346 "prefix": "",
347 "suffix": "",
348 "email": "",
349 "string_name": "",
350 "addresses": [],
351 "address_text": "",
352 "role": "",
353 "deceased_before_publication": False,
354 "equal_contrib": False,
355 "contrib_xml": '<contrib content-type="author"><name><surname></surname><given-names></given-names></name></contrib>',
356 "corresponding": False,
357 "seq": 0,
358 }
359 return default | kwargs
362class SubjDict(TypedDict, total=False): # Inferred from usage
363 lang: str
364 type: str
365 value: str
368def create_subj(**kwargs: Unpack[SubjDict]) -> SubjDict:
369 defaults: SubjDict = {
370 "lang": "",
371 "type": "",
372 "value": "",
373 }
374 return defaults | kwargs
377class ExtLinkDict(TypedDict, total=False): # Inferred from usage
378 rel: str
379 mimetype: str
380 location: str
381 base: str
382 metadata: str
385def create_extlink(**kwargs: Unpack[ExtLinkDict]) -> ExtLinkDict:
386 defaults: ExtLinkDict = {"rel": "", "mimetype": "", "location": "", "base": "", "metadata": ""}
387 return defaults | kwargs
390class DataStreamDict(TypedDict, total=False): # Inferred from usage
391 rel: str
392 mimetype: str
393 location: str
394 base: str
395 text: str
398def create_datastream(**kwargs: Unpack[DataStreamDict]) -> DataStreamDict:
399 defaults: DataStreamDict = {"rel": "", "mimetype": "", "location": "", "base": "", "text": ""}
400 return defaults | kwargs
403def get_extlink(resource: ResourceData, rel):
404 if resource is None:
405 return None
407 results = [
408 extlink for extlink in resource.ext_links if "rel" in extlink and extlink["rel"] == rel
409 ]
410 result = results[0] if len(results) > 0 else None
412 return result