Coverage for src/ptf/citedby.py: 68%

1import concurrent.futures

2import html

3import re

4from collections import defaultdict

5from datetime import timedelta

6from difflib import SequenceMatcher

8import xmltodict

9from bs4 import BeautifulSoup

10from pylatexenc.latex2text import LatexNodes2Text

11from requests.exceptions import ConnectionError

12from requests.exceptions import Timeout

13from requests_cache import CachedSession

14from requests_cache import FileCache

16from django.conf import settings

18from ptf.bibtex import parse_bibtex

19from ptf.cmds.xml.xml_utils import normalise_span

20from ptf.model_data import RefData

21from ptf.model_data import create_contributor

22from ptf.model_data_converter import update_ref_data_for_jats

23from ptf.model_helpers import get_extid

24from ptf.models import BibItemId

25from ptf.models import get_names

26from ptf.utils import get_display_name

28ADS_URL = "https://api.adsabs.harvard.edu/v1/search"

29ARXIV_URL = "https://export.arxiv.org/api/query"

30CROSSREF_URL = "https://doi.crossref.org/servlet/getForwardLinks"

31SEMANTIC_URL = "https://api.semanticscholar.org/v1/paper/"

32ZBMATH_URL = "https://zbmath.org"

34ADS = "NASA ADS"

35CROSSREF = "Crossref"

36SEMANTIC = "Semantic Scholar"

37ZBMATH = "zbMATH"

40TIMEOUT = 4.0

42PRIORITY = defaultdict(int, {ZBMATH: 10, ADS: 9, CROSSREF: 8, SEMANTIC: 7})

44LATEX_PARSER = LatexNodes2Text(math_mode="verbatim")

46session = CachedSession(

47 backend=FileCache(

48 getattr(settings, "REQUESTS_CACHE_LOCATION", None) or "/tmp/ptf_requests_cache",

49 decode_content=False,

50 ),

51 headers={

52 "User-Agent": getattr(settings, "REQUESTS_USER_AGENT", None) or "Mathdoc/1.0.0",

53 "From": getattr(settings, "REQUESTS_EMAIL", None) or "accueil@listes.mathdoc.fr",

54 },

55 expire_after=timedelta(days=30),

56)

59def create_refdata(lang="und"):

60 data = RefData(lang=lang)

61 data.type = "misc"

62 data.doi = None

63 data.arxiv = None

64 data.zbl = None

65 data.semantic = None

66 return data

69def is_same_title(compare, titles, tol=0.90):

70 compare = re.sub(r"\W", "", compare).lower()

71 for title in titles:

72 title = re.sub(r"\W", "", title).lower()

73 if SequenceMatcher(None, compare, title).ratio() > tol:

74 return True

75 return False

78def get_zbmath_bibtex(params):

79 text = ""

80 headers = {"Content-Type": "text/html"}

81 response = session.get(ZBMATH_URL, params=params, headers=headers, timeout=0.5 * TIMEOUT)

82 soup = BeautifulSoup(response.text, "html.parser")

83 results = soup.find("div", {"class": "citations"})

84 if results: 84 ↛ 85line 84 didn't jump to line 85 because the condition on line 84 was never true

85 for ref in results.find_all("a", href=True):

86 headers = {"Content-Type": "text/x-bibtex"}

87 url = ZBMATH_URL + "/bibtexoutput" + ref.get("href", "")

88 response = session.get(url, headers=headers, timeout=0.5 * TIMEOUT)

89 response.encoding = "utf-8"

90 text += response.text

91 return text

94def citedby_zbmath(metadata):

95 if "zbl_id" in metadata: 95 ↛ 96line 95 didn't jump to line 96 because the condition on line 95 was never true

96 params = {"q": "an:" + metadata["zbl_id"]}

97 else:

98 params = {"q": "en:" + metadata["doi"]}

99 title_tex = normalise_span(metadata["title"]).replace("\xa0", " ")

100 authors = "&au:".join(metadata["authors"])

101 params = {"q": params["q"] + "|(ti:" + f'"{title_tex}"' + "&au:" + authors + ")"}

102 text = get_zbmath_bibtex(params)

103 citations = parse_bibtex(text)

104 return citations

105

106

107def citedby_crossref(metadata):

108 citations = []

109 user = settings.CROSSREF_USER

110 password = settings.CROSSREF_PWD

111 url = f"{CROSSREF_URL}?usr={user}&pwd={password}&doi={metadata['doi']}"

112 response = session.get(url, timeout=TIMEOUT)

113 response.encoding = "utf-8"

114 if response.status_code == 200: 114 ↛ 115line 114 didn't jump to line 115 because the condition on line 114 was never true

115 data = xmltodict.parse(response.text)

116 body = data["crossref_result"]["query_result"]["body"]

117 if body:

118 citations = body["forward_link"]

119

120 if not isinstance(citations, list): 120 ↛ 121line 120 didn't jump to line 121 because the condition on line 120 was never true

121 citations = [citations]

122 return citations

123

124

125def get_arxiv_id(metadata):

126 arxiv_id = None

127 title_tex = normalise_span(metadata["title"]).replace("\xa0", " ")

128 headers = {"Content-Type": "application/atom+xml"}

129 query = "doi:" + metadata["doi"] + " OR (ti:" + f'"{title_tex}"' + ")"

130 params = {"search_query": query, "max_results": 1}

131 response = session.get(ARXIV_URL, params=params, headers=headers, timeout=0.5 * TIMEOUT)

132 if response.status_code == 200: 132 ↛ 139line 132 didn't jump to line 139 because the condition on line 132 was always true

133 data = xmltodict.parse(response.text)

134 if "entry" in data["feed"]: 134 ↛ 135line 134 didn't jump to line 135 because the condition on line 134 was never true

135 entry = data["feed"]["entry"]

136 if is_same_title(title_tex, [entry["title"]]):

137 arxiv_id = entry["id"].split("arxiv.org/abs/")

138 arxiv_id = arxiv_id[-1].split("v")[0]

139 return arxiv_id

140

141

142def citedby_ads(metadata, by_doi=True, citedby=True):

143 if by_doi: 143 ↛ 146line 143 didn't jump to line 146 because the condition on line 143 was always true

144 arxiv_id = get_arxiv_id(metadata)

145 else:

146 arxiv_id = metadata["arxiv_id"]

147 if not arxiv_id: 147 ↛ 150line 147 didn't jump to line 150 because the condition on line 147 was always true

148 return []

149

150 citations = []

151 url = ADS_URL + "/query"

152 headers = {"Authorization": f"Bearer:{settings.ADS_TOKEN}"}

153 reference = "citation" if citedby else "reference"

154 params = {"q": "identifier:" + arxiv_id, "fl": reference}

155 response = session.get(url, headers=headers, params=params, timeout=0.5 * TIMEOUT)

156 if response.status_code == 200:

157 results = response.json().get("response", {}).get("docs")

158 if results and isinstance(results, list) and reference in results[0]:

159 url = ADS_URL + "/bigquery"

160 bibcodes = "bibcode\n" + "\n".join(results[0][reference])

161 filters = "abstract,author,bibcode,comment,doi,doctype,"

162 filters += "eid,identifier,issue,keyword,orcid_pub,"

163 filters += "page,page_count,page_range,pub,pub_raw,title,volume,year"

164 params = {"q": "*:*", "fl": filters, "rows": 200}

165 response = session.post(

166 url, params=params, headers=headers, data=bibcodes, timeout=0.5 * TIMEOUT

167 )

168 response.encoding = "utf-8"

169 if response.status_code == 200:

170 citations = response.json().get("response", {}).get("docs")

171 return citations

172

173

174def citedby_semantic(metadata, citedby=True):

175 citations = []

176 reference = "citations" if citedby else "references"

177 if settings.SITE_ID != 36: # all but PCJ

178 response = session.get(SEMANTIC_URL + metadata["doi"], timeout=TIMEOUT)

179 response.encoding = "utf-8"

180 if response.status_code == 200:

181 citations.extend(response.json()[reference])

182 return citations

183

184

185def set_contributors(ref, api_contributors, orcids=None):

186 if not isinstance(api_contributors, list):

187 api_contributors = [api_contributors]

188

189 contributors = []

190 for contributor in api_contributors:

191 first_name = last_name = ""

192 if ref.provider == CROSSREF:

193 first_name = contributor.get("given_name")

194 last_name = contributor.get("surname")

195 elif ref.provider in [ADS, ZBMATH]:

196 result = contributor.split(", ")

197 if result: 197 ↛ 205line 197 didn't jump to line 205 because the condition on line 197 was always true

198 first_name = result[1] if len(result) > 1 else ""

199 last_name = result[0]

200 elif ref.provider == SEMANTIC: 200 ↛ 205line 200 didn't jump to line 205 because the condition on line 200 was always true

201 result = contributor["name"].split(" ")

202 if result: 202 ↛ 205line 202 didn't jump to line 205 because the condition on line 202 was always true

203 first_name = " ".join(result[0:-1])

204 last_name = result[-1]

205 contributor = create_contributor()

206 contributor["first_name"] = first_name.strip() if first_name else ""

207 contributor["last_name"] = last_name.strip() if last_name else ""

208 contributor["role"] = "author"

209 contributors.append(contributor)

210

211 if orcids and len(contributors) == len(orcids):

212 for contrib, orcid in zip(contributors, orcids):

213 contrib["orcid"] = orcid if orcid != "-" else ""

214 setattr(ref, "contributors", contributors)

215

216

217def ads_to_bibtex_type(doc_type):

218 if doc_type in ["article", "eprint"]: 218 ↛ 220line 218 didn't jump to line 220 because the condition on line 218 was always true

219 bibtex_type = "article"

220 elif doc_type in [

221 "book",

222 "inbook",

223 "inproceedings",

224 "mastersthesis",

225 "phdthesis",

226 "proceedings",

227 "techreport",

228 ]:

229 bibtex_type = doc_type

230 else:

231 bibtex_type = "misc"

232 return bibtex_type

233

234

235def crossref_to_bibtex_type(doc_type, item):

236 if doc_type == "journal_cite": 236 ↛ 238line 236 didn't jump to line 238 because the condition on line 236 was always true

237 bibtex_type = "article"

238 elif doc_type == "conf_cite":

239 if "paper_title" in item:

240 bibtex_type = "inproceedings"

241 else:

242 bibtex_type = "proceedings"

243 elif doc_type == "book_cite":

244 if "chapter_title" in item:

245 bibtex_type = "inbook"

246 else:

247 bibtex_type = "book"

248 else:

249 bibtex_type = "misc"

250 return bibtex_type

251

252

253def citedby_crossref_refs(citations):

254 refdata = []

255 for item in citations:

256 item.pop("@doi") # the interior orderdict remains

257 if not item: 257 ↛ 258line 257 didn't jump to line 258 because the condition on line 257 was never true

258 continue

259 doc_type, item = item.popitem()

260 ref = create_refdata()

261 setattr(ref, "provider", CROSSREF)

262 setattr(ref, "type", crossref_to_bibtex_type(doc_type, item))

263 if "journal_title" in item and item["journal_title"]: 263 ↛ 265line 263 didn't jump to line 265 because the condition on line 263 was always true

264 setattr(ref, "source_tex", item["journal_title"])

265 if "article_title" in item and item["article_title"]: 265 ↛ 267line 265 didn't jump to line 267 because the condition on line 265 was always true

266 setattr(ref, "article_title_tex", item["article_title"])

267 if "volume_title" in item: # book or proceedings title 267 ↛ 268line 267 didn't jump to line 268 because the condition on line 267 was never true

268 setattr(ref, "source_tex", item["volume_title"])

269 if "paper_title" in item and item["paper_title"]: # inproceedings title 269 ↛ 270line 269 didn't jump to line 270 because the condition on line 269 was never true

270 setattr(ref, "article_title_tex", item["paper_title"])

271 if "chapter_title" in item and item["chapter_title"]: # incollection or inbook 271 ↛ 272line 271 didn't jump to line 272 because the condition on line 271 was never true

272 setattr(ref, "chapter_title_tex", item["chapter_title"])

273 if "first_page" in item: 273 ↛ 275line 273 didn't jump to line 275 because the condition on line 273 was always true

274 setattr(ref, "fpage", item["first_page"])

275 if "last_page" in item: 275 ↛ 276line 275 didn't jump to line 276 because the condition on line 275 was never true

276 setattr(ref, "lpage", item["last_page"])

277 if "volume" in item: 277 ↛ 279line 277 didn't jump to line 279 because the condition on line 277 was always true

278 setattr(ref, "volume", item["volume"])

279 if "issue" in item:

280 setattr(ref, "issue", item["issue"])

281 if "year" in item and item["year"]: 281 ↛ 283line 281 didn't jump to line 283 because the condition on line 281 was always true

282 setattr(ref, "year", item["year"])

283 if "contributors" in item and "contributor" in item["contributors"]: 283 ↛ 285line 283 didn't jump to line 285 because the condition on line 283 was always true

284 set_contributors(ref, item["contributors"]["contributor"])

285 if "doi" in item and item["doi"]: 285 ↛ 287line 285 didn't jump to line 287 because the condition on line 285 was always true

286 setattr(ref, "doi", item["doi"]["#text"].lower())

287 refdata.append(ref)

288 return refdata

289

290

291def citedby_zbmath_refs(citations):

292 return bibtex_to_refs(citations)

293

294

295def is_misc(doctype):

296 if doctype not in [ 296 ↛ 310line 296 didn't jump to line 310 because the condition on line 296 was never true

297 "article",

298 "book",

299 # "booklet",

300 "conference",

301 "inbook",

302 "incollection",

303 "inproceedings",

304 # "manual",

305 # "mastersthesis",

306 "phdthesis",

307 "proceedings",

308 "techreport",

309 ]:

310 return True

311 return False

312

313

314def bibtex_to_refs(bibitems):

315 refdata = []

316 for item in bibitems:

317 ref = create_refdata()

318 setattr(ref, "provider", ZBMATH)

319 item["doctype"] = "misc" if is_misc(item["doctype"]) else item["doctype"]

320 setattr(ref, "type", item["doctype"])

321 if "fjournal" in item: 321 ↛ 323line 321 didn't jump to line 323 because the condition on line 321 was always true

322 setattr(ref, "source_tex", item["fjournal"])

323 elif "journal" in item:

324 setattr(ref, "source_tex", item["journal"])

325 elif "booktitle" in item:

326 setattr(ref, "source_tex", item["booktitle"])

327 elif "howpublished" in item:

328 howpublished = re.sub(r" $[0-9]{4}$\.?", "", item["howpublished"])

329 setattr(ref, "source_tex", howpublished)

330 if "fseries" in item: 330 ↛ 331line 330 didn't jump to line 331 because the condition on line 330 was never true

331 setattr(ref, "series", item["fseries"])

332 elif "series" in item: 332 ↛ 333line 332 didn't jump to line 333 because the condition on line 332 was never true

333 setattr(ref, "series", item["series"])

334 if "title" in item: 334 ↛ 341line 334 didn't jump to line 341 because the condition on line 334 was always true

335 if item["doctype"] in ["article", "misc"]: 335 ↛ 337line 335 didn't jump to line 337 because the condition on line 335 was always true

336 setattr(ref, "article_title_tex", item["title"])

337 elif item["doctype"] in ["incollection", "inproceedings", "inbook"]:

338 setattr(ref, "chapter_title_tex", item["title"])

339 else:

340 setattr(ref, "source_tex", item["title"])

341 if "url" in item and not ref.source_tex: 341 ↛ 342line 341 didn't jump to line 342 because the condition on line 341 was never true

342 setattr(ref, "source_tex", item["url"])

343 if "pages" in item and item["pages"]: 343 ↛ 348line 343 didn't jump to line 348 because the condition on line 343 was always true

344 result = [x for x in re.split(r"\W", item["pages"])]

345 setattr(ref, "fpage", result[0])

346 if len(result) == 2:

347 setattr(ref, "lpage", result[1])

348 if "volume" in item: 348 ↛ 350line 348 didn't jump to line 350 because the condition on line 348 was always true

349 setattr(ref, "volume", item["volume"])

350 if "number" in item:

351 setattr(ref, "issue", item["number"])

352 if "issue" in item: 352 ↛ 353line 352 didn't jump to line 353 because the condition on line 352 was never true

353 setattr(ref, "issue", item["issue"])

354 if "note" in item:

355 setattr(ref, "comment", item["note"])

356 if "year" in item: 356 ↛ 358line 356 didn't jump to line 358 because the condition on line 356 was always true

357 setattr(ref, "year", item["year"])

358 if "author" in item: 358 ↛ 360line 358 didn't jump to line 360 because the condition on line 358 was always true

359 set_contributors(ref, item["author"].split(" and "))

360 if "publisher" in item: 360 ↛ 361line 360 didn't jump to line 361 because the condition on line 360 was never true

361 setattr(ref, "publisher_name", item["publisher"])

362 elif "school" in item: 362 ↛ 363line 362 didn't jump to line 363 because the condition on line 362 was never true

363 setattr(ref, "publisher_name", item["school"])

364 elif "institution" in item: 364 ↛ 365line 364 didn't jump to line 365 because the condition on line 364 was never true

365 setattr(ref, "publisher_name", item["institution"])

366 if "address" in item: 366 ↛ 367line 366 didn't jump to line 367 because the condition on line 366 was never true

367 setattr(ref, "publisher_loc", item["address"])

368 if "doi" in item and item["doi"]:

369 setattr(ref, "doi", item["doi"].lower())

370 if "zbmath" in item: 370 ↛ 372line 370 didn't jump to line 372 because the condition on line 370 was always true

371 setattr(ref, "zbl", item["zbmath"])

372 if "zbl" in item:

373 setattr(ref, "zbl", item["zbl"])

374 refdata.append(ref)

375 return refdata

376

377

378def citedby_ads_refs(citations):

379 refdata: list[RefData] = []

380 for item in citations:

381 ref = create_refdata()

382 setattr(ref, "provider", ADS)

383 setattr(ref, "bibcode", item["bibcode"])

384 setattr(ref, "type", ads_to_bibtex_type(item["doctype"]))

385 if "title" in item and item["title"]: 385 ↛ 387line 385 didn't jump to line 387 because the condition on line 385 was always true

386 setattr(ref, "article_title_tex", item["title"][0])

387 if "page_range" in item: 387 ↛ 388line 387 didn't jump to line 388 because the condition on line 387 was never true

388 result = item["page_range"].split("-")

389 if len(result) == 2:

390 setattr(ref, "fpage", result[0])

391 setattr(ref, "lpage", result[1])

392 elif "page" in item and item["page"] and item["page"][0].isdigit():

393 setattr(ref, "fpage", item["page"][0])

394 if "page_count" in item and item["page_count"]:

395 setattr(ref, "lpage", str(item["page_count"] - 1))

396 if "year" in item and item["year"]: 396 ↛ 398line 396 didn't jump to line 398 because the condition on line 396 was always true

397 setattr(ref, "year", item["year"])

398 if "author" in item and item["author"]: 398 ↛ 400line 398 didn't jump to line 400 because the condition on line 398 was always true

399 set_contributors(ref, item["author"], item.get("orcid_pub", []))

400 if "issue" in item: 400 ↛ 401line 400 didn't jump to line 401 because the condition on line 400 was never true

401 setattr(ref, "issue", item["issue"])

402 if "volume" in item: 402 ↛ 403line 402 didn't jump to line 403 because the condition on line 402 was never true

403 setattr(ref, "volume", item["volume"])

404 if "doi" in item and item["doi"]: 404 ↛ 406line 404 didn't jump to line 406 because the condition on line 404 was always true

405 setattr(ref, "doi", item["doi"][0].lower())

406 if "eid" in item and item["eid"]: 406 ↛ 411line 406 didn't jump to line 411 because the condition on line 406 was always true

407 arxiv = item["eid"].split("arXiv:")

408 if "pub" in item and "arXiv" in item["pub"]: 408 ↛ 411line 408 didn't jump to line 411 because the condition on line 408 was always true

409 setattr(ref, "arxiv", arxiv[-1])

410 setattr(ref, "source_tex", "arXiv")

411 if "pub_raw" in item and item["pub_raw"] and ref.doi and not ref.arxiv: 411 ↛ 412line 411 didn't jump to line 412 because the condition on line 411 was never true

412 result = re.match(r"(^.+)?[,.]( vol. | Volume )", item["pub_raw"])

413 if result:

414 setattr(ref, "source_tex", result.group(1))

415 elif "pub" in item and not ref.arxiv: 415 ↛ 416line 415 didn't jump to line 416 because the condition on line 415 was never true

416 setattr(ref, "source_tex", item["pub"])

417 if "abstract" in item and item["abstract"]: 417 ↛ 419line 417 didn't jump to line 419 because the condition on line 417 was always true

418 setattr(ref, "abstract", [item["abstract"]])

419 refdata.append(ref)

420 return refdata

421

422

423def citedby_semantic_refs(citations):

424 refdata = []

425 for item in citations:

426 ref = create_refdata()

427 setattr(ref, "provider", SEMANTIC)

428 if "title" in item: 428 ↛ 432line 428 didn't jump to line 432 because the condition on line 428 was always true

429 title = item["title"]

430 title = title.capitalize() if title.isupper() else item["title"]

431 setattr(ref, "article_title_tex", title)

432 if "year" in item and item["year"]: 432 ↛ 434line 432 didn't jump to line 434 because the condition on line 432 was always true

433 setattr(ref, "year", str(item["year"]))

434 if "authors" in item and item["authors"]: 434 ↛ 436line 434 didn't jump to line 436 because the condition on line 434 was always true

435 set_contributors(ref, item["authors"])

436 if "doi" in item and item["doi"]:

437 setattr(ref, "doi", item["doi"].lower())

438 if "arxivId" in item and item["arxivId"]:

439 setattr(ref, "arxiv", item["arxivId"])

440 setattr(ref, "source_tex", "arXiv")

441 if "venue" in item and item["venue"]:

442 setattr(ref, "source_tex", item["venue"])

443 if "paperId" in item: 443 ↛ 445line 443 didn't jump to line 445 because the condition on line 443 was always true

444 setattr(ref, "semantic", item["paperId"])

445 refdata.append(ref)

446 return refdata

447

448

449def get_extlinks(extids):

450 extlinks = []

451 for extid in extids:

452 eid = BibItemId()

453 eid.id_type, eid.id_value = extid

454 extlink = ""

455 if eid.id_type == "doi":

456 extlink = "DOI:" + eid.id_value

457 elif eid.id_type == "arxiv":

458 extlink = "arXiv:" + eid.id_value

459 elif eid.id_type == "zbl-item-id":

460 extlink = "Zbl:" + eid.id_value

461 elif eid.id_type == "semantic-scholar": 461 ↛ 463line 461 didn't jump to line 463 because the condition on line 461 was always true

462 extlink = "Semantic-scholar:" + eid.id_value

463 if extlink: 463 ↛ 451line 463 didn't jump to line 451 because the condition on line 463 was always true

464 extlink = f' | <a href="{eid.get_href()}">{extlink}</a>'

465 extlinks.append(extlink)

466 return extlinks

467

468

469def built_extlinks(ref):

470 extids = []

471 if ref.doi:

472 extids.append(("doi", ref.doi))

473 if ref.arxiv:

474 extids.append(("arxiv", ref.arxiv))

475 if ref.zbl:

476 extids.append(("zbl-item-id", ref.zbl))

477 if not any((ref.doi, ref.zbl, ref.arxiv)) and getattr(ref, "semantic", False):

478 extids.append(("semantic-scholar", ref.semantic))

479 setattr(ref, "extids", extids)

480

481

482def get_values_for_stats(refs):

483 """

484 extract data of a ref and return as a dict

485 @param refs: dict of RefData.__dict__

486 @return: dict

487 """

488

489 citedby_for_stats = []

490 for ref_item in refs.values():

491 authors = []

492 for author in ref_item.get("contributors"):

493 if author["role"] == "author": 493 ↛ 492line 493 didn't jump to line 492 because the condition on line 493 was always true

494 display_name = get_display_name(

495 author["prefix"],

496 author["first_name"],

497 author["last_name"],

498 author["suffix"],

499 author["string_name"],

500 )

501 authors.append({"author": display_name})

502

503 title_key = get_publication_title(ref_item, "title")

504 title = ref_item[title_key]

505 publication_title_key = get_publication_title(ref_item, "publication_title")

506 publication_title = ref_item[publication_title_key]

507

508 url = ""

509 if ref_item["extlinks"]: 509 ↛ 513line 509 didn't jump to line 513

510 result = re.search(r'href="(.+)">', ref_item["extlinks"][0])

511 url = result.group(1) if result else ""

512

513 result = {

514 "authors": authors,

515 "title": title,

516 "publication_title": publication_title,

517 "year": ref_item["year"],

518 "url": url,

519 "source": ref_item["provider"],

520 }

521 citedby_for_stats.append(result)

522 return citedby_for_stats

523

524

525def get_publication_title(ref_item, category="title"):

526 type_ = ref_item.get("type")

527

528 if "thesis" in type_: 528 ↛ 529line 528 didn't jump to line 529 because the condition on line 528 was never true

529 type_ = "thesis"

530 else:

531 type_ = "misc"

532

533 dic = {

534 "incollection": {"title": "source_tex", "publication_title": "series"},

535 "thesis": {"title": "source_tex", "publication_title": "series"},

536 "article": {"title": "article_title_tex", "publication_title": "source_tex"},

537 "book": {"title": "source_tex", "publication_title": "series"},

538 "inbook": {"title": "chapter_title_tex", "publication_title": "series"},

539 "misc": {"title": "article_title_tex", "publication_title": "source_tex"},

540 }

541 return dic.get(type_).get(category)

542

543

544def built_citations(data):

545 # to match citations and add these ids when missing

546 doi_arxiv = {ref.doi: ref.arxiv for ref in data if ref.doi and ref.arxiv}

547 arxiv_doi = {v: k for k, v in doi_arxiv.items()}

548

549 results = []

550 for n, ref in enumerate(data):

551 if ref.arxiv and not ref.doi:

552 setattr(ref, "doi", arxiv_doi.get(ref.arxiv))

553 elif not ref.arxiv and ref.doi:

554 setattr(ref, "arxiv", doi_arxiv.get(ref.doi))

555 built_extlinks(ref)

556 update_ref_data_for_jats(ref, n, with_label=False)

557 ref.citation_html = html.unescape(ref.citation_html)

558 results.append(vars(ref))

559

560 results.sort(

561 key=lambda k: (

562 -int(k["year"]) if k["year"] else 0,

563 k["source_tex"],

564 k["volume"],

565 k["issue"],

566 k["fpage"],

567 ),

568 )

569

570 refs = {}

571 titles = {

572 item[get_publication_title(item)]

573 for item in results

574 if any((item["arxiv"], item["doi"], item["zbl"]))

575 }

576

577 for item in results:

578 links = get_extlinks(item["extids"])

579 level = PRIORITY[item["provider"]]

580 citation = LATEX_PARSER.latex_to_text(item["citation_html"].replace("$$", "$"))

581 ref = {"html": citation + "".join(links)}

582 ref.update({"priority": level, "extlinks": links})

583 ref.update(item)

584

585 if item["doi"]:

586 if item["doi"] not in refs or refs[item["doi"]]["priority"] < level:

587 refs[item["doi"]] = ref

588 elif item["zbl"]:

589 refs[item["zbl"]] = ref

590 elif item["arxiv"]:

591 if item["arxiv"] not in refs or refs[item["arxiv"]]["priority"] < level: 591 ↛ 577line 591 didn't jump to line 577 because the condition on line 591 was always true

592 refs[item["arxiv"]] = ref

593 elif item["semantic"] and (item["doi"] or item["arxiv"]): 593 ↛ 594line 593 didn't jump to line 594 because the condition on line 593 was never true

594 if not is_same_title(item[get_publication_title(item)], titles):

595 refs[item["semantic"]] = ref

596

597 sources = list({ref["provider"] for ref in refs.values()})

598 sources = ", ".join(sorted(sources))

599 citations_html = [citation["html"] for citation in refs.values()]

600 citedby_for_stats = get_values_for_stats(refs)

601 return citations_html, sources, citedby_for_stats

602

603

604def citations_to_refs(provider, citations):

605 if provider == CROSSREF:

606 return citedby_crossref_refs(citations)

607 elif provider == ZBMATH:

608 return citedby_zbmath_refs(citations)

609 elif provider == ADS:

610 return citedby_ads_refs(citations)

611 elif provider == SEMANTIC:

612 return citedby_semantic_refs(citations)

613

614

615def get_citations(resource):

616 """Returns documents that cite this doi and sources used for the research."""

617 data = {}

618 authors = get_names(resource, "author")

619 zbl_id = get_extid(resource, "zbl-item-id")

620 preprint_id = get_extid(resource, "preprint")

621

622 metadata = {

623 "authors": authors,

624 "doi": resource.doi,

625 "preprint_id": preprint_id.id_value if preprint_id else "",

626 "title": resource.title_tex,

627 }

628

629 if zbl_id and zbl_id.id_value: 629 ↛ 630line 629 didn't jump to line 630 because the condition on line 629 was never true

630 metadata.update({"zbl_id": zbl_id.id_value})

631

632 with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:

633 future_to_provider = {

634 executor.submit(citedby_crossref, metadata): CROSSREF,

635 executor.submit(citedby_zbmath, metadata): ZBMATH,

636 executor.submit(citedby_ads, metadata): ADS,

637 }

638 for future in concurrent.futures.as_completed(future_to_provider):

639 provider = future_to_provider[future]

640 try:

641 if future.result(): 641 ↛ 642line 641 didn't jump to line 642 because the condition on line 641 was never true

642 data.update({provider: future.result()})

643 except Timeout:

644 continue

645 except ConnectionError:

646 continue

647

648 citations = []

649 for provider, cites in data.items(): 649 ↛ 650line 649 didn't jump to line 650 because the loop on line 649 never started

650 citations.extend(citations_to_refs(provider, cites))

651

652 return built_citations(citations)