Coverage for src/ptf/model

1##################################################################################################

3# README

5# pivot classes for PTF objects (articles, issues,...).

6# They only have data members and are independent of XML, relational database and search engine.

7# They can be used directly by cmds

8# Parsers (JATS), Comparators,... inherit from these classes and add functions (parse_tree, compare)

10##################################################################################################

13# Simple object for compatibility reason:

14# - in xml_cmds/add_relations

15# - in xml_cmds/find_collection (needs an incollection.collection.pid)

17from typing import TYPE_CHECKING

18from typing import Literal

19from typing import NotRequired

20from typing import TypedDict

21from typing import Unpack

23if TYPE_CHECKING: 23 ↛ 24line 23 didn't jump to line 24 because the condition on line 23 was never true

24 from ptf.models import Collection

27class Foo:

28 pass

31class AbstractDict(TypedDict): # Inferred from usage

32 tag: Literal["abstract", "biblio", "avertissement", "note", "description", "intro", "toc"]

33 value_html: NotRequired[str]

34 value_xml: NotRequired[str]

35 value_tex: str

36 lang: str

39class ResourceData:

40 def __init__(self, *args, **kwargs):

41 super().__init__()

43 self.lang = "und"

45 self.pid: str | None = None

46 self.doi: str | None = None

48 self.url: str | None = None

50 self.title_xml = ""

51 self.title_tex = ""

52 self.title_html = ""

53 self.trans_lang = "und"

54 self.trans_title_html = ""

55 self.trans_title_tex = ""

56 self.trans_title_xml = ""

58 # Common to articles, books, book-parts

59 self.abstracts: list[AbstractDict] = []

60 self.bibitems: list[RefData] = []

61 # TODO: Remove bibitem. This is used for solrCmds. solrCmds should use bibitems instead.

62 self.bibitem = []

63 self.awards = []

64 self.relations = []

66 self.ids = [] # Other id of the resource. Create ResourceId in the DB

67 # id given by an external source (MR, ZBL...). Create an ExtId in the DB

68 self.extids: list[tuple[str, str]] = []

70 self.ext_links: list[

71 ExtLinkDict

72 ] = (

73 []

74 ) # <ext-link> can contain MR/ZBL... In this case, ExtLink are not created, only ExtId

75 self.streams = []

76 self.related_objects = []

78 self.counts = []

80 self.contributors: list[ContributorDict] = []

81 self.kwds: list[SubjDict] = []

82 self.kwd_groups = []

83 self.subjs: list[str] = []

84 self.subj_groups = []

86 self.figures = []

87 self.supplementary_materials = []

89 self.funding_statement_html = ""

90 self.funding_statement_xml = ""

91 self.footnotes_html = ""

92 self.footnotes_xml = ""

94 self.body_html = ""

95 self.body_tex = ""

96 self.body_xml = ""

97 self.body = ""

100# Found in col.xml

101# It is the main way to create a collection in PTF

102# The upload urls will end up create MathdocPublicationData

103class MathdocPublicationData(ResourceData):

104 def __init__(self, *args, **kwargs):

105 super().__init__(*args, **kwargs)

106

107 self.coltype: str | None = None

108 self.pid: str | None = None

109 self.issn = None

110 self.e_issn = None

111 self.wall = 0

112 self.provider = None

113 self.abbrev = ""

114

115

116class PublisherData:

117 def __init__(self, *args, **kwargs):

118 super().__init__()

119 self.name: str | None = None

120 self.loc = ""

121 # Note: add_publisher in xml_cmds tries to create ExtLinks for Publishers

122 # But this is not possible in JATS:

123 # <publisher> only has <publisher-name> and <publisher-location>

124 # TODO: remove ext_links ?

125 self.ext_links = []

126

127

128# JournalData typically comes from a <journal-meta> inside a <journal-issue>

129# It is not the main way to create a Journal, but can be used to create a collection on the fly.

130# In this case, it will get some attributes from its parent collection (ex: coltype)

131class JournalData(ResourceData):

132 def __init__(self, *args, **kwargs):

133 super().__init__(*args, **kwargs)

134

135 self.coltype = None

136 self.publisher = None

137 self.provider = None

138 self.issn = None

139 self.e_issn = None

140 self.wall = 0

141 self.abbrev = ""

142

143

144class IssueData(ResourceData):

145 merged_year: str | None

146 year: str | None

147

148 def __init__(self, *args, **kwargs):

149 super().__init__(*args, **kwargs)

150

151 self.journal: Collection | None = None

152 self.publisher: PublisherData | None = None

153 self.provider = None

154 self.ctype = "issue"

155 self.year = ""

156 self.vseries = ""

157 self.volume = ""

158 self.number = ""

159 self.last_modified_iso_8601_date_str: str | None = None

160 self.prod_deployed_date_iso_8601_date_str: str | None = None

161 self.articles: list[ArticleData] = []

162 self.with_online_first = False

163 self.seq = 0

164

165 def __iter__(self):

166 yield from self.articles

167

168

169class ArticleData(ResourceData):

170 def __init__(self, *args, **kwargs):

171 super().__init__(*args, **kwargs)

172

173 self.atype = ""

174

175 self.seq = 0

176 self.fpage = self.lpage = self.page_range = self.size = ""

177 self.page_type = ""

178

179 self.article_number = ""

180 self.talk_number = ""

181

182 self.elocation = ""

183 self.history_dates = []

184 self.prod_deployed_date_iso_8601_date_str: str | None = None

185 self.date_published_iso_8601_date_str: str | None = None

186

187 self.pid = None

188

189 self.coi_statement = "" # Conflict of interest

190

191 # list of ArticleData, translation of the article by others

192 self.translations: list[ArticleData] = []

193

194

195class RefData(ResourceData):

196 # TODO: remove lang ? It is not used by Bibitem.

197

198 def __init__(self, *args, lang, **kwargs):

199 super().__init__(*args, **kwargs)

200

201 self.lang = lang

202 self.user_id = ""

203 self.label = ""

204 self.label_prefix = self.label_suffix = ""

205 self.citation_xml = ""

206 self.citation_html: str | None = None

207 self.citation_tex: str | None = None

208 self.type = "misc"

209 self.publisher_name = ""

210 self.publisher_loc = ""

211 self.institution = ""

212 self.series = ""

213 self.volume = ""

214 self.issue = ""

215 self.month = ""

216 self.year = ""

217 self.comment = ""

218 self.annotation = ""

219 self.fpage = ""

220 self.lpage = ""

221 self.page_range = ""

222 self.size = ""

223 self.source_tex = ""

224 self.article_title_tex = ""

225 self.chapter_title_tex = ""

226

227

228# Incollection found in books

229# Mainly used to find the book number in its collection

230class CollectionData(ResourceData):

231 def __init__(self, *args, **kwargs):

232 super().__init__(*args, **kwargs)

233

234 self.coltype = None

235 self.issn = None

236 self.e_issn = None

237 self.volume = ""

238 self.vseries = ""

239 self.seq = 0

240

241

242class BookData(ResourceData):

243 def __init__(self, *args, **kwargs):

244 super().__init__(*args, **kwargs)

245

246 book_type = "Book"

247 self.ctype = "book-" + book_type

248 self.frontmatter_xml = None

249 self.frontmatter_toc_html = None

250 self.frontmatter_foreword_html = None

251 self.incollection = []

252 self.publisher = None

253 self.provider = None

254 self.parts = []

255 self.body = ""

256 self.seq = 0

257

258 self.last_modified_iso_8601_date_str = None

259 self.prod_deployed_date_iso_8601_date_str = None

260

261

262class BookPartData(ArticleData):

263 def __init__(self, *args, **kwargs):

264 super().__init__(*args, **kwargs)

265

266 self.atype = ""

267 self.fpage = self.lpage = self.page_range = self.page_type = ""

268 self.frontmatter_xml = None

269 self.frontmatter_toc_html = None

270 self.frontmatter_foreword_html = None

271 self.parts = []

272 self.body = None

273

274

275def create_refdata(lang="und", doi=None):

276 data = RefData(lang=lang)

277 data.type = "unknown"

278 data.citation_html = ""

279 data.citation_tex = ""

280 data.citation_xml = '<label></label><mixed-citation xml:space="preserve"></mixed_citation>'

281

282 if doi is not None:

283 data.doi = doi

284 data.extids.append(("doi", doi))

285

286 return data

287

288

289def create_articledata(doi: str | None = None):

290 data = ArticleData(doi=doi)

291 return data

292

293

294def create_issuedata():

295 data = IssueData()

296 return data

297

298

299def create_bookdata():

300 data = BookData()

301 return data

302

303

304def create_publicationdata():

305 data = MathdocPublicationData()

306 return data

307

308

309def create_collectiondata():

310 data = CollectionData()

311 return data

312

313

314def create_publisherdata():

315 data = PublisherData()

316 return data

317

318

319class ContributorDict(TypedDict, total=False): # Inferred from usage

320 orcid: str

321 idref: str

322 mid: str

323 first_name: str

324 last_name: str

325 prefix: str

326 suffix: str

327 email: str

328 string_name: str

329 addresses: list[str]

330 address_text: str

331 role: Literal["author", "editor", ""]

332 deceased_before_publication: bool

333 equal_contrib: bool

334 contrib_xml: str

335 corresponding: bool

336 seq: int

337

338

339def create_contributor(**kwargs: Unpack[ContributorDict]) -> ContributorDict:

340 default: ContributorDict = {

341 "orcid": "",

342 "idref": "",

343 "mid": "",

344 "first_name": "",

345 "last_name": "",

346 "prefix": "",

347 "suffix": "",

348 "email": "",

349 "string_name": "",

350 "addresses": [],

351 "address_text": "",

352 "role": "",

353 "deceased_before_publication": False,

354 "equal_contrib": False,

355 "contrib_xml": '<contrib content-type="author"><name><surname></surname><given-names></given-names></name></contrib>',

356 "corresponding": False,

357 "seq": 0,

358 }

359 return default | kwargs

360

361

362class SubjDict(TypedDict, total=False): # Inferred from usage

363 lang: str

364 type: str

365 value: str

366

367

368def create_subj(**kwargs: Unpack[SubjDict]) -> SubjDict:

369 defaults: SubjDict = {

370 "lang": "",

371 "type": "",

372 "value": "",

373 }

374 return defaults | kwargs

375

376

377class ExtLinkDict(TypedDict, total=False): # Inferred from usage

378 rel: str

379 mimetype: str

380 location: str

381 base: str

382 metadata: str

383

384

385def create_extlink(**kwargs: Unpack[ExtLinkDict]) -> ExtLinkDict:

386 defaults: ExtLinkDict = {"rel": "", "mimetype": "", "location": "", "base": "", "metadata": ""}

387 return defaults | kwargs

388

389

390class DataStreamDict(TypedDict, total=False): # Inferred from usage

391 rel: str

392 mimetype: str

393 location: str

394 base: str

395 text: str

396

397

398def create_datastream(**kwargs: Unpack[DataStreamDict]) -> DataStreamDict:

399 defaults: DataStreamDict = {"rel": "", "mimetype": "", "location": "", "base": "", "text": ""}

400 return defaults | kwargs

401

402

403def get_extlink(resource: ResourceData, rel):

404 if resource is None:

405 return None

406

407 results = [

408 extlink for extlink in resource.ext_links if "rel" in extlink and extlink["rel"] == rel

409 ]

410 result = results[0] if len(results) > 0 else None

411

412 return result

Coverage for src/ptf/model_data.py: 91%

260 statements