Coverage for src/ptf/model_data.py: 91%

260 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-11-05 09:56 +0000

1################################################################################################## 

2# 

3# README 

4# 

5# pivot classes for PTF objects (articles, issues,...). 

6# They only have data members and are independent of XML, relational database and search engine. 

7# They can be used directly by cmds 

8# Parsers (JATS), Comparators,... inherit from these classes and add functions (parse_tree, compare) 

9# 

10################################################################################################## 

11 

12 

13# Simple object for compatibility reason: 

14# - in xml_cmds/add_relations 

15# - in xml_cmds/find_collection (needs an incollection.collection.pid) 

16 

17from typing import TYPE_CHECKING 

18from typing import Literal 

19from typing import NotRequired 

20from typing import TypedDict 

21from typing import Unpack 

22 

23if TYPE_CHECKING: 23 ↛ 24line 23 didn't jump to line 24 because the condition on line 23 was never true

24 from ptf.models import Collection 

25 

26 

27class Foo: 

28 pass 

29 

30 

31class AbstractDict(TypedDict): # Inferred from usage 

32 tag: Literal["abstract", "biblio", "avertissement", "note", "description", "intro", "toc"] 

33 value_html: NotRequired[str] 

34 value_xml: NotRequired[str] 

35 value_tex: str 

36 lang: str 

37 

38 

39class ResourceData: 

40 def __init__(self, *args, **kwargs): 

41 super().__init__() 

42 

43 self.lang = "und" 

44 

45 self.pid: str | None = None 

46 self.doi: str | None = None 

47 

48 self.url: str | None = None 

49 

50 self.title_xml = "" 

51 self.title_tex = "" 

52 self.title_html = "" 

53 self.trans_lang = "und" 

54 self.trans_title_html = "" 

55 self.trans_title_tex = "" 

56 self.trans_title_xml = "" 

57 

58 # Common to articles, books, book-parts 

59 self.abstracts: list[AbstractDict] = [] 

60 self.bibitems: list[RefData] = [] 

61 # TODO: Remove bibitem. This is used for solrCmds. solrCmds should use bibitems instead. 

62 self.bibitem = [] 

63 self.awards = [] 

64 self.relations = [] 

65 

66 self.ids = [] # Other id of the resource. Create ResourceId in the DB 

67 # id given by an external source (MR, ZBL...). Create an ExtId in the DB 

68 self.extids: list[tuple[str, str]] = [] 

69 

70 self.ext_links: list[ 

71 ExtLinkDict 

72 ] = ( 

73 [] 

74 ) # <ext-link> can contain MR/ZBL... In this case, ExtLink are not created, only ExtId 

75 self.streams = [] 

76 self.related_objects = [] 

77 

78 self.counts = [] 

79 

80 self.contributors: list[ContributorDict] = [] 

81 self.kwds: list[SubjDict] = [] 

82 self.kwd_groups = [] 

83 self.subjs: list[str] = [] 

84 self.subj_groups = [] 

85 

86 self.figures = [] 

87 self.supplementary_materials = [] 

88 

89 self.funding_statement_html = "" 

90 self.funding_statement_xml = "" 

91 self.footnotes_html = "" 

92 self.footnotes_xml = "" 

93 

94 self.body_html = "" 

95 self.body_tex = "" 

96 self.body_xml = "" 

97 self.body = "" 

98 

99 

100# Found in col.xml 

101# It is the main way to create a collection in PTF 

102# The upload urls will end up create MathdocPublicationData 

103class MathdocPublicationData(ResourceData): 

104 def __init__(self, *args, **kwargs): 

105 super().__init__(*args, **kwargs) 

106 

107 self.coltype: str | None = None 

108 self.pid: str | None = None 

109 self.issn = None 

110 self.e_issn = None 

111 self.wall = 0 

112 self.provider = None 

113 self.abbrev = "" 

114 

115 

116class PublisherData: 

117 def __init__(self, *args, **kwargs): 

118 super().__init__() 

119 self.name: str | None = None 

120 self.loc = "" 

121 # Note: add_publisher in xml_cmds tries to create ExtLinks for Publishers 

122 # But this is not possible in JATS: 

123 # <publisher> only has <publisher-name> and <publisher-location> 

124 # TODO: remove ext_links ? 

125 self.ext_links = [] 

126 

127 

128# JournalData typically comes from a <journal-meta> inside a <journal-issue> 

129# It is not the main way to create a Journal, but can be used to create a collection on the fly. 

130# In this case, it will get some attributes from its parent collection (ex: coltype) 

131class JournalData(ResourceData): 

132 def __init__(self, *args, **kwargs): 

133 super().__init__(*args, **kwargs) 

134 

135 self.coltype = None 

136 self.publisher = None 

137 self.provider = None 

138 self.issn = None 

139 self.e_issn = None 

140 self.wall = 0 

141 self.abbrev = "" 

142 

143 

144class IssueData(ResourceData): 

145 merged_year: str | None 

146 year: str | None 

147 

148 def __init__(self, *args, **kwargs): 

149 super().__init__(*args, **kwargs) 

150 

151 self.journal: Collection | None = None 

152 self.publisher: PublisherData | None = None 

153 self.provider = None 

154 self.ctype = "issue" 

155 self.year = "" 

156 self.vseries = "" 

157 self.volume = "" 

158 self.number = "" 

159 self.last_modified_iso_8601_date_str: str | None = None 

160 self.prod_deployed_date_iso_8601_date_str: str | None = None 

161 self.articles: list[ArticleData] = [] 

162 self.with_online_first = False 

163 self.seq = 0 

164 

165 def __iter__(self): 

166 yield from self.articles 

167 

168 

169class ArticleData(ResourceData): 

170 def __init__(self, *args, **kwargs): 

171 super().__init__(*args, **kwargs) 

172 

173 self.atype = "" 

174 

175 self.seq = 0 

176 self.fpage = self.lpage = self.page_range = self.size = "" 

177 self.page_type = "" 

178 

179 self.article_number = "" 

180 self.talk_number = "" 

181 

182 self.elocation = "" 

183 self.history_dates = [] 

184 self.prod_deployed_date_iso_8601_date_str: str | None = None 

185 self.date_published_iso_8601_date_str: str | None = None 

186 

187 self.pid = None 

188 

189 self.coi_statement = "" # Conflict of interest 

190 

191 # list of ArticleData, translation of the article by others 

192 self.translations: list[ArticleData] = [] 

193 

194 

195class RefData(ResourceData): 

196 # TODO: remove lang ? It is not used by Bibitem. 

197 

198 def __init__(self, *args, lang, **kwargs): 

199 super().__init__(*args, **kwargs) 

200 

201 self.lang = lang 

202 self.user_id = "" 

203 self.label = "" 

204 self.label_prefix = self.label_suffix = "" 

205 self.citation_xml = "" 

206 self.citation_html: str | None = None 

207 self.citation_tex: str | None = None 

208 self.type = "misc" 

209 self.publisher_name = "" 

210 self.publisher_loc = "" 

211 self.institution = "" 

212 self.series = "" 

213 self.volume = "" 

214 self.issue = "" 

215 self.month = "" 

216 self.year = "" 

217 self.comment = "" 

218 self.annotation = "" 

219 self.fpage = "" 

220 self.lpage = "" 

221 self.page_range = "" 

222 self.size = "" 

223 self.source_tex = "" 

224 self.article_title_tex = "" 

225 self.chapter_title_tex = "" 

226 

227 

228# Incollection found in books 

229# Mainly used to find the book number in its collection 

230class CollectionData(ResourceData): 

231 def __init__(self, *args, **kwargs): 

232 super().__init__(*args, **kwargs) 

233 

234 self.coltype = None 

235 self.issn = None 

236 self.e_issn = None 

237 self.volume = "" 

238 self.vseries = "" 

239 self.seq = 0 

240 

241 

242class BookData(ResourceData): 

243 def __init__(self, *args, **kwargs): 

244 super().__init__(*args, **kwargs) 

245 

246 book_type = "Book" 

247 self.ctype = "book-" + book_type 

248 self.frontmatter_xml = None 

249 self.frontmatter_toc_html = None 

250 self.frontmatter_foreword_html = None 

251 self.incollection = [] 

252 self.publisher = None 

253 self.provider = None 

254 self.parts = [] 

255 self.body = "" 

256 self.seq = 0 

257 

258 self.last_modified_iso_8601_date_str = None 

259 self.prod_deployed_date_iso_8601_date_str = None 

260 

261 

262class BookPartData(ArticleData): 

263 def __init__(self, *args, **kwargs): 

264 super().__init__(*args, **kwargs) 

265 

266 self.atype = "" 

267 self.fpage = self.lpage = self.page_range = self.page_type = "" 

268 self.frontmatter_xml = None 

269 self.frontmatter_toc_html = None 

270 self.frontmatter_foreword_html = None 

271 self.parts = [] 

272 self.body = None 

273 

274 

275def create_refdata(lang="und", doi=None): 

276 data = RefData(lang=lang) 

277 data.type = "unknown" 

278 data.citation_html = "" 

279 data.citation_tex = "" 

280 data.citation_xml = '<label></label><mixed-citation xml:space="preserve"></mixed_citation>' 

281 

282 if doi is not None: 

283 data.doi = doi 

284 data.extids.append(("doi", doi)) 

285 

286 return data 

287 

288 

289def create_articledata(doi: str | None = None): 

290 data = ArticleData(doi=doi) 

291 return data 

292 

293 

294def create_issuedata(): 

295 data = IssueData() 

296 return data 

297 

298 

299def create_bookdata(): 

300 data = BookData() 

301 return data 

302 

303 

304def create_publicationdata(): 

305 data = MathdocPublicationData() 

306 return data 

307 

308 

309def create_collectiondata(): 

310 data = CollectionData() 

311 return data 

312 

313 

314def create_publisherdata(): 

315 data = PublisherData() 

316 return data 

317 

318 

319class ContributorDict(TypedDict, total=False): # Inferred from usage 

320 orcid: str 

321 idref: str 

322 mid: str 

323 first_name: str 

324 last_name: str 

325 prefix: str 

326 suffix: str 

327 email: str 

328 string_name: str 

329 addresses: list[str] 

330 address_text: str 

331 role: Literal["author", "editor", ""] 

332 deceased_before_publication: bool 

333 equal_contrib: bool 

334 contrib_xml: str 

335 corresponding: bool 

336 seq: int 

337 

338 

339def create_contributor(**kwargs: Unpack[ContributorDict]) -> ContributorDict: 

340 default: ContributorDict = { 

341 "orcid": "", 

342 "idref": "", 

343 "mid": "", 

344 "first_name": "", 

345 "last_name": "", 

346 "prefix": "", 

347 "suffix": "", 

348 "email": "", 

349 "string_name": "", 

350 "addresses": [], 

351 "address_text": "", 

352 "role": "", 

353 "deceased_before_publication": False, 

354 "equal_contrib": False, 

355 "contrib_xml": '<contrib content-type="author"><name><surname></surname><given-names></given-names></name></contrib>', 

356 "corresponding": False, 

357 "seq": 0, 

358 } 

359 return default | kwargs 

360 

361 

362class SubjDict(TypedDict, total=False): # Inferred from usage 

363 lang: str 

364 type: str 

365 value: str 

366 

367 

368def create_subj(**kwargs: Unpack[SubjDict]) -> SubjDict: 

369 defaults: SubjDict = { 

370 "lang": "", 

371 "type": "", 

372 "value": "", 

373 } 

374 return defaults | kwargs 

375 

376 

377class ExtLinkDict(TypedDict, total=False): # Inferred from usage 

378 rel: str 

379 mimetype: str 

380 location: str 

381 base: str 

382 metadata: str 

383 

384 

385def create_extlink(**kwargs: Unpack[ExtLinkDict]) -> ExtLinkDict: 

386 defaults: ExtLinkDict = {"rel": "", "mimetype": "", "location": "", "base": "", "metadata": ""} 

387 return defaults | kwargs 

388 

389 

390class DataStreamDict(TypedDict, total=False): # Inferred from usage 

391 rel: str 

392 mimetype: str 

393 location: str 

394 base: str 

395 text: str 

396 

397 

398def create_datastream(**kwargs: Unpack[DataStreamDict]) -> DataStreamDict: 

399 defaults: DataStreamDict = {"rel": "", "mimetype": "", "location": "", "base": "", "text": ""} 

400 return defaults | kwargs 

401 

402 

403def get_extlink(resource: ResourceData, rel): 

404 if resource is None: 

405 return None 

406 

407 results = [ 

408 extlink for extlink in resource.ext_links if "rel" in extlink and extlink["rel"] == rel 

409 ] 

410 result = results[0] if len(results) > 0 else None 

411 

412 return result