Coverage for src/ptf/model_data

1##################################################################################################

3# README

5# Operations on the xml data objects

6# Django DB -> Data objects

8##################################################################################################

10import types

12from django.db.models import Q

14from ptf.cmds.xml.citation_html import get_citation_html

15from ptf.cmds.xml.jats.builder.issue import get_single_title_xml

16from ptf.cmds.xml.jats.builder.issue import get_title_xml

17from ptf.cmds.xml.xml_base import RefBase

18from ptf.cmds.xml.xml_utils import escape

19from ptf.cmds.xml.xml_utils import get_contrib_xml

20from ptf.model_data import ArticleData

21from ptf.model_data import BookData

22from ptf.model_data import BookPartData

23from ptf.model_data import Foo

24from ptf.model_data import IssueData

25from ptf.model_data import JournalData

26from ptf.model_data import MathdocPublicationData

27from ptf.model_data import PublisherData

28from ptf.model_data import RefData

29from ptf.model_data import create_contributor

32def db_append_obj_with_location_to_list(resource_qs, data_list):

33 for obj_with_location in resource_qs:

34 data = {

35 "rel": obj_with_location.rel,

36 "mimetype": obj_with_location.mimetype,

37 "location": obj_with_location.location,

38 "base": obj_with_location.base.base if obj_with_location.base else "",

39 }

40 # 'seq': obj_with_location.seq}

42 for attr in ["metadata", "text", "caption"]:

43 if hasattr(obj_with_location, attr):

44 data[attr] = getattr(obj_with_location, attr)

46 data_list.append(data)

49def db_to_contributors(qs):

50 contributors = []

51 for contribution in qs.all():

52 contributor = create_contributor()

54 contributor["first_name"] = contribution.first_name

55 contributor["last_name"] = contribution.last_name

56 contributor["prefix"] = contribution.prefix

57 contributor["suffix"] = contribution.suffix

58 contributor["orcid"] = contribution.orcid if contribution.orcid else ""

59 contributor["email"] = contribution.email

60 contributor["string_name"] = contribution.string_name

61 contributor["mid"] = contribution.mid if contribution.mid else ""

62 contributor["addresses"] = [

63 contrib_address.address for contrib_address in contribution.contribaddress_set.all()

64 ]

65 contributor["role"] = contribution.role

66 contributor["deceased_before_publication"] = contribution.deceased_before_publication

67 contributor["equal_contrib"] = contribution.equal_contrib

68 contributor["corresponding"] = contribution.corresponding

69 contributor["contrib_xml"] = contribution.contrib_xml

70 # contributor["author_id"] = contribution.id

72 contributors.append(contributor)

74 return contributors

77def db_to_resource_data_common(resource, data_resource):

78 data_resource.pid = resource.pid

79 data_resource.doi = resource.doi

81 data_resource.lang = resource.lang

82 data_resource.title_xml = resource.title_xml

83 data_resource.title_tex = resource.title_tex

84 data_resource.title_html = resource.title_html

85 data_resource.abbrev = resource.abbrev

87 data_resource.trans_lang = resource.trans_lang

88 data_resource.trans_title_tex = resource.trans_title_tex

89 data_resource.trans_title_html = resource.trans_title_html

91 data_resource.funding_statement_xml = resource.funding_statement_xml

92 data_resource.funding_statement_html = resource.funding_statement_html

93 data_resource.footnotes_xml = resource.footnotes_xml

94 data_resource.footnotes_html = resource.footnotes_html

96 data_resource.ids = [(id.id_type, id.id_value) for id in resource.resourceid_set.all()]

97 data_resource.extids = [(extid.id_type, extid.id_value) for extid in resource.extid_set.all()]

99 db_append_obj_with_location_to_list(resource.extlink_set.all(), data_resource.ext_links)

100 db_append_obj_with_location_to_list(resource.datastream_set.all(), data_resource.streams)

101 db_append_obj_with_location_to_list(

102 resource.relatedobject_set.all(), data_resource.related_objects

103 )

104

105 # Ignore related_objects and figures: they are updated by the FullText import after the Cedrics import

106 # db_append_obj_with_location_to_list(resource.relatedobject_set.all(),

107 # data_resource.related_objects)

108 # db_append_obj_with_location_to_list(resource.relatedobject_set.filter(rel='html-image'),

109 # data_resource.figures)

110 db_append_obj_with_location_to_list(

111 resource.relatedobject_set.filter(Q(rel="supplementary-material") | Q(rel="review")),

112 data_resource.supplementary_materials,

113 )

114

115 data_resource.counts = [

116 (count.name, count.value) for count in resource.resourcecount_set.all()

117 ]

118

119 data_resource.contributors = db_to_contributors(resource.contributions)

120

121 data_resource.kwds = [

122 {"type": kwd.type, "lang": kwd.lang, "value": kwd.value} for kwd in resource.kwd_set.all()

123 ]

124 data_resource.subjs = [

125 {"type": subj.type, "lang": subj.lang, "value": subj.value}

126 for subj in resource.subj_set.all()

127 ]

128

129 data_resource.abstracts = [

130 {

131 "tag": abstract.tag,

132 "lang": abstract.lang,

133 "value_xml": abstract.value_xml,

134 "value_tex": abstract.value_tex,

135 "value_html": abstract.value_html,

136 }

137 for abstract in resource.abstract_set.all()

138 ]

139

140 data_resource.awards = [

141 {"abbrev": award.abbrev, "award_id": award.award_id} for award in resource.award_set.all()

142 ]

143

144 for relation in resource.subject_of.all(): 144 ↛ 145line 144 didn't jump to line 145 because the loop on line 144 never started

145 obj = Foo()

146 obj.rel_type = relation.rel_info.left

147 obj.id_value = relation.object_pid

148 data_resource.relations.append(obj)

149

150 for relation in resource.object_of.all():

151 obj = Foo()

152 obj.rel_type = relation.rel_info.right

153 obj.id_value = relation.subject_pid

154 data_resource.relations.append(obj)

155 if hasattr(resource, "issn"):

156 data_resource.issn = resource.issn

157 if hasattr(resource, "e_issn"):

158 data_resource.e_issn = resource.e_issn

159

160

161def db_to_publisher_data(publisher):

162 data_publisher = PublisherData()

163

164 data_publisher.name = publisher.pub_name

165 data_publisher.loc = publisher.pub_loc

166

167 # TODO: ext_links ?

168 data_publisher.ext_links = []

169

170 return data_publisher

171

172

173def db_to_publication_data(collection):

174 data_col = MathdocPublicationData()

175

176 db_to_resource_data_common(collection, data_col)

177

178 data_col.coltype = collection.coltype

179 data_col.wall = collection.wall

180 data_col.issn = collection.issn

181 data_col.e_issn = collection.e_issn

182

183 return data_col

184

185

186def db_to_journal_data(collection):

187 data_journal = JournalData()

188

189 # A JournalData has no coltype ?

190

191 # A JournalData has a publisher but it does not seem to be used anywhere ?

192 # The publisher seems to belong to the issue/article and not to the Journal.

193

194 db_to_resource_data_common(collection, data_journal)

195 return data_journal

196

197

198def db_to_collection_data(collection):

199 data_col = MathdocPublicationData()

200

201 db_to_resource_data_common(collection, data_col)

202

203 data_col.coltype = collection.coltype

204 data_col.issn = collection.issn

205 data_col.e_issn = collection.e_issn

206

207 # attributes used for CollectionMembership

208 if hasattr(collection, "vseries"):

209 data_col.vseries = collection.vseries

210 if hasattr(collection, "volume"):

211 data_col.volume = collection.volume

212 if hasattr(collection, "seq"):

213 data_col.seq = collection.seq

214

215 return data_col

216

217

218def db_to_issue_data(container, articles=None):

219 data_issue = IssueData()

220

221 db_to_resource_data_common(container, data_issue)

222

223 data_issue.ctype = container.ctype

224

225 data_issue.year = container.year

226 data_issue.vseries = container.vseries

227 data_issue.volume = container.volume

228 data_issue.number = container.number

229

230 data_issue.last_modified_iso_8601_date_str = (

231 container.last_modified.isoformat() if container.last_modified else ""

232 )

233 data_issue.prod_deployed_date_iso_8601_date_str = (

234 container.deployed_date().isoformat() if container.deployed_date() else ""

235 )

236

237 data_issue.journal = db_to_journal_data(container.my_collection)

238 data_issue.publisher = db_to_publisher_data(container.my_publisher)

239 data_issue.provider = container.provider.name

240

241 # a Container has a seq, but it is used only for the books collections

242

243 # articles may have been prefetched / filtered before

244 if not articles: 244 ↛ 247line 244 didn't jump to line 247 because the condition on line 244 was always true

245 articles = container.article_set.all()

246

247 for article in articles:

248 data_article = db_to_article_data(article)

249 data_issue.articles.append(data_article)

250

251 return data_issue

252

253

254def db_to_book_data(container):

255 data_book = BookData()

256

257 db_to_resource_data_common(container, data_book)

258

259 data_book.ctype = container.ctype

260 setattr(data_book, "year", container.year)

261

262 data_book.publisher = db_to_publisher_data(container.my_publisher)

263 data_book.provider = container.provider

264

265 data_col = db_to_collection_data(container.my_collection)

266 # These attributes are required when adding a container to solr

267 if not hasattr(data_col, "vseries"):

268 setattr(data_col, "vseries", 0)

269 if not hasattr(data_col, "volume"):

270 setattr(data_col, "volume", 0)

271 data_book.incollection.append(data_col)

272 for collection in container.my_other_collections.all():

273 data_col = db_to_collection_data(container.my_collection)

274 data_book.incollection.append(data_col)

275

276 if hasattr(container, "frontmatter") and container.frontmatter is not None:

277 data_book.frontmatter_xml = container.frontmatter.value_xml

278 data_book.frontmatter_toc_html = container.frontmatter.value_html

279 data_book.frontmatter_foreword_html = container.frontmatter.foreword_html

280 data_book.body = container.get_body()

281

282 data_book.last_modified_iso_8601_date_str = (

283 container.last_modified.isoformat() if container.last_modified else ""

284 )

285 data_book.prod_deployed_date_iso_8601_date_str = (

286 container.deployed_date().isoformat() if container.deployed_date() else ""

287 )

288

289 for bookpart in container.article_set.all():

290 data_bookpart = db_to_bookpart_data(bookpart)

291 data_book.parts.append(data_bookpart)

292

293 for bibitem in container.bibitem_set.all():

294 data_ref = db_to_ref_data(bibitem, data_book.lang)

295 data_book.bibitems.append(data_ref)

296 data_book.bibitem.append(data_ref.citation_html)

297

298 return data_book

299

300

301def db_to_article_data(article):

302 data_article = ArticleData()

303

304 db_to_resource_data_common(article, data_article)

305

306 data_article.atype = article.atype

307 data_article.seq = str(article.seq)

308

309 data_article.fpage = article.fpage

310 data_article.lpage = article.lpage

311 data_article.page_range = article.page_range

312 data_article.page_type = article.page_type

313

314 data_article.article_number = article.article_number

315 data_article.talk_number = article.talk_number

316 data_article.elocation = article.elocation

317 data_article.coi_statement = article.coi_statement if article.coi_statement else ""

318

319 data_article.date_published_iso_8601_date_str = (

320 article.date_published.isoformat() if article.date_published else ""

321 )

322 data_article.prod_deployed_date_iso_8601_date_str = (

323 article.deployed_date().isoformat()

324 if article.my_container and article.deployed_date()

325 else ""

326 )

327

328 data_article.history_dates = [

329 {"type": type, "date": date.isoformat()}

330 for type, date in [

331 ("received", article.date_received),

332 ("revised", article.date_revised),

333 ("accepted", article.date_accepted),

334 ("online", article.date_online_first),

335 ]

336 if date

337 ]

338

339 data_article.body = article.get_body()

340 data_article.body_html = article.body_html

341 data_article.body_tex = article.body_tex

342 data_article.body_xml = article.body_xml

343

344 for bibitem in article.bibitem_set.all():

345 data_ref = db_to_ref_data(bibitem, "und")

346 data_article.bibitems.append(data_ref)

347 data_article.bibitem.append(data_ref.citation_html)

348

349 for trans_article in article.translations.all(): 349 ↛ 350line 349 didn't jump to line 350 because the loop on line 349 never started

350 trans_data_article = db_to_article_data(trans_article)

351 data_article.translations.append(trans_data_article)

352

353 return data_article

354

355

356def db_to_bookpart_data(article):

357 data_bookpart = BookPartData()

358

359 db_to_resource_data_common(article, data_bookpart)

360

361 data_bookpart.atype = article.atype

362

363 data_bookpart.fpage = article.fpage

364 data_bookpart.lpage = article.lpage

365 data_bookpart.page_range = article.page_range

366 data_bookpart.page_type = article.page_type

367

368 if hasattr(article, "frontmatter") and article.frontmatter is not None:

369 data_bookpart.frontmatter_xml = article.frontmatter.value_xml

370 data_bookpart.frontmatter_toc_html = article.frontmatter.value_html

371 data_bookpart.frontmatter_foreword_html = article.frontmatter.foreword_html

372 data_bookpart.body = article.get_body()

373

374 for bibitem in article.bibitem_set.all():

375 data_ref = db_to_ref_data(bibitem, data_bookpart.lang)

376 data_bookpart.bibitems.append(data_ref)

377 data_bookpart.bibitem.append(data_ref.citation_html)

378

379 return data_bookpart

380

381

382def db_to_ref_data(bibitem, lang):

383 data_ref = RefData(lang=lang)

384

385 data_ref.type = bibitem.type

386 data_ref.user_id = bibitem.user_id

387 data_ref.label = bibitem.label

388

389 data_ref.citation_xml = bibitem.citation_xml

390 data_ref.citation_tex = bibitem.citation_tex

391 data_ref.citation_html = bibitem.citation_html

392

393 data_ref.publisher_name = bibitem.publisher_name

394 data_ref.publisher_loc = bibitem.publisher_loc

395

396 data_ref.article_title_tex = bibitem.article_title_tex

397 data_ref.chapter_title_tex = bibitem.chapter_title_tex

398 data_ref.institution = bibitem.institution

399 data_ref.series = bibitem.series

400 data_ref.volume = bibitem.volume

401 data_ref.issue = bibitem.issue

402 data_ref.month = bibitem.month

403 data_ref.year = bibitem.year

404 data_ref.comment = bibitem.comment

405 data_ref.annotation = bibitem.annotation

406 data_ref.fpage = bibitem.fpage

407 data_ref.lpage = bibitem.lpage

408 data_ref.page_range = bibitem.page_range

409 data_ref.size = bibitem.size

410 data_ref.source_tex = bibitem.source_tex

411

412 data_ref.extids = [

413 (bibitemid.id_type, bibitemid.id_value) for bibitemid in bibitem.bibitemid_set.all()

414 ]

415

416 data_ref.contributors = db_to_contributors(bibitem.contributions)

417

418 return data_ref

419

420

421def jats_from_ref_comment(ref):

422 attr = getattr(ref, "comment")

423 if attr is None: 423 ↛ 424line 423 didn't jump to line 424 because the condition on line 423 was never true

424 return ""

425

426 text = ""

427 start = attr.find("http://")

428 if start == -1: 428 ↛ 431line 428 didn't jump to line 431 because the condition on line 428 was always true

429 start = attr.find("https://")

430

431 if start != -1: 431 ↛ 432line 431 didn't jump to line 432 because the condition on line 431 was never true

432 end = attr.find(" ", start)

433 if end == -1:

434 url = escape(attr[start:])

435 else:

436 url = escape(attr[start:end])

437

438 text = escape(attr[0:start])

439 text += f'<ext-link xlink:href="{url}">{url}</ext-link>'

440

441 if end != -1:

442 text += escape(attr[end + 1 :])

443 else:

444 text = escape(attr)

445

446 text = f'<comment xml:space="preserve">{text}</comment>'

447

448 return text

449

450

451def jats_from_ref_attr(

452 ref,

453 attr_name,

454 jats_tag="",

455 preserve=False,

456 attr_type=None,

457 attr_type_value="",

458 convert_html_tag=False,

459):

460 if not hasattr(ref, attr_name): 460 ↛ 461line 460 didn't jump to line 461 because the condition on line 460 was never true

461 return ""

462

463 text = ""

464 attr = getattr(ref, attr_name)

465 if len(jats_tag) == 0:

466 jats_tag = attr_name

467 if attr and preserve:

468 value = get_single_title_xml(attr) if convert_html_tag else escape(attr)

469 if attr_type is not None: 469 ↛ 470line 469 didn't jump to line 470 because the condition on line 469 was never true

470 text = f'<{jats_tag} {attr_type}="{attr_type_value}" xml:space="preserve">{escape(attr)}</{jats_tag}>'

471 else:

472 text = f'<{jats_tag} xml:space="preserve">{value}</{jats_tag}>'

473 elif attr:

474 value = get_single_title_xml(attr) if convert_html_tag else escape(attr)

475 if attr_type is not None:

476 text = f'<{jats_tag} {attr_type}="{attr_type_value}">{value}</{jats_tag}>'

477 else:

478 text = f"<{jats_tag}>{escape(attr)}</{jats_tag}>"

479

480 return text

481

482

483def jats_from_abstract(abstract_lang, article_lang, abstract):

484 if abstract_lang == article_lang:

485 return f'<abstract xml:lang="{article_lang}">{abstract.value_xml}</abstract>'

486 else:

487 return f'<trans-abstract xml:lang="{abstract_lang}">{abstract.value_xml}</trans-abstract>'

488

489

490def jats_from_ref(ref):

491 text = ""

492 authors = ref.get_authors()

493 if authors is not None: 493 ↛ 496line 493 didn't jump to line 496 because the condition on line 493 was always true

494 text += "".join([author["contrib_xml"] for author in authors])

495

496 text += jats_from_ref_attr(

497 ref, "article_title_tex", "article-title", preserve=True, convert_html_tag=True

498 )

499 text += jats_from_ref_attr(ref, "chapter_title_tex", "chapter-title", convert_html_tag=True)

500 text += jats_from_ref_attr(ref, "source_tex", "source", preserve=True, convert_html_tag=True)

501

502 editors = ref.get_editors()

503 if editors is not None: 503 ↛ 506line 503 didn't jump to line 506 because the condition on line 503 was always true

504 text += "".join([editor["contrib_xml"] for editor in editors])

505

506 text += jats_from_ref_attr(ref, "series", preserve=True)

507 text += jats_from_ref_attr(ref, "volume")

508 text += jats_from_ref_attr(ref, "publisher_name", "publisher-name")

509 text += jats_from_ref_attr(ref, "publisher_loc", "publisher-loc")

510 text += jats_from_ref_attr(ref, "institution")

511 text += jats_from_ref_attr(ref, "year")

512 text += jats_from_ref_attr(ref, "issue")

513 text += jats_from_ref_attr(

514 ref, "doi", "pub-id", attr_type="pub-id-type", attr_type_value="doi"

515 )

516 text += jats_from_ref_attr(ref, "fpage")

517 text += jats_from_ref_attr(ref, "lpage")

518 text += jats_from_ref_attr(ref, "size", "size")

519 text += jats_from_ref_comment(ref)

520

521 return text

522

523

524def update_ref_data_for_jats(ref, i, with_label=True):

525 """

526 Set with_label=False if you do not want a label in the citation_html (for example in the citedby)

527 """

528

529 if hasattr(ref, "eid") and ref.eid is not None and ref.eid != "": 529 ↛ 530line 529 didn't jump to line 530 because the condition on line 529 was never true

530 eids = [item for item in ref.extids if item[0] == "eid"]

531 if len(eids) > 0:

532 ref.extids.remove(eids[0])

533 ref.extids.append(("eid", ref.eid))

534

535 label = ref.label

536 if not label and with_label: 536 ↛ 537line 536 didn't jump to line 537 because the condition on line 536 was never true

537 label = f"[{i}]"

538 ref.label = label

539

540 if ref.type == "unknown": 540 ↛ 541line 540 didn't jump to line 541 because the condition on line 540 was never true

541 if not ref.citation_html:

542 if with_label and ref.citation_tex.find(label) != 0:

543 ref.citation_html = f"{label} {ref.citation_tex}"

544 else:

545 ref.citation_html = ref.citation_tex

546

547 if not ref.citation_xml:

548 ref.citation_xml = f'<label>{escape(ref.label)}</label><mixed-citation xml:space="preserve">{ref.citation_tex}</mixed_ciation>'

549 else:

550 ref.label = f"{label}" if with_label else ""

551 # ref can be a Munch dictionary, or a RefData object.

552 # Add RefBase member functions, like get_authors

553 # ref_base = RefBase(lang='und')

554 # ref_base.from_dict(ref)

555 ref.get_authors = types.MethodType(RefBase.get_authors, ref)

556 ref.get_editors = types.MethodType(RefBase.get_editors, ref)

557 text = get_citation_html(ref)

558 ref.citation_html = ref.citation_tex = text

559

560 for contrib in ref.contributors:

561 contrib["contrib_xml"] = get_contrib_xml(contrib, is_ref=True)

562

563 if ref.type != "unknown": 563 ↛ exitline 563 didn't return from function 'update_ref_data_for_jats' because the condition on line 563 was always true

564 element_citation = jats_from_ref(ref)

565 ref.citation_xml = f'<label>{escape(ref.label)}</label><element-citation publication-type="{ref.type}">{element_citation}</element-citation>'

566

567

568def update_data_for_jats(data_article, create_author_if_empty=False, with_label=True):

569 if not data_article.title_html:

570 data_article.title_html = data_article.title_tex

571 if not data_article.trans_title_html:

572 data_article.trans_title_html = data_article.trans_title_tex

573 if not data_article.title_xml:

574 data_article.title_xml = get_title_xml(

575 data_article.title_tex, data_article.trans_title_tex, data_article.trans_lang

576 )

577

578 for contrib in data_article.contributors:

579 contrib["contrib_xml"] = get_contrib_xml(contrib)

580

581 if data_article.doi is not None:

582 value = ("doi", data_article.doi)

583 if value not in data_article.ids:

584 data_article.ids.append(value)

585

586 if create_author_if_empty and len(data_article.contributors) == 0:

587 contrib = create_contributor()

588 contrib["role"] = "author"

589 contrib["contrib_xml"] = get_contrib_xml(contrib)

590 data_article.contributors = [contrib]

591

592 for i, ref in enumerate(data_article.bibitems, start=1):

593 update_ref_data_for_jats(ref, i, with_label=with_label)

594

595 for trans_data_article in data_article.translations:

596 update_data_for_jats(trans_data_article, create_author_if_empty, with_label)

597

598

599def convert_refdata_for_editor(ref):

600 contribs_text = "\n".join(

601 [f"{contrib['last_name']}, {contrib['first_name']}" for contrib in ref.contributors]

602 )

603 ref.contribs_text = contribs_text

604

605 if not ref.article_title_tex and not ref.chapter_title_tex and not ref.source_tex:

606 ref.type = "unknown"

607

608 ref.doi = ""

609 for extid in ref.extids:

610 if extid[0] == "doi":

611 ref.doi = extid[1]

612 elif extid[0] == "eid":

613 ref.eid = extid[1]

614 # URLs are in <comment>

615 # ref.url = ''

616 # for ext_link in ref.ext_links:

617 # if ext_link['link_type'] == '':

618 # ref.url = ext_link['location']

Coverage for src/ptf/model_data_converter.py: 58%

367 statements