Coverage for src/ptf/citedby.py: 68%
484 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-05 09:56 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-05 09:56 +0000
1import concurrent.futures
2import html
3import re
4from collections import defaultdict
5from datetime import timedelta
6from difflib import SequenceMatcher
8import xmltodict
9from bs4 import BeautifulSoup
10from pylatexenc.latex2text import LatexNodes2Text
11from requests.exceptions import ConnectionError
12from requests.exceptions import Timeout
13from requests_cache import CachedSession
14from requests_cache import FileCache
16from django.conf import settings
18from ptf.bibtex import parse_bibtex
19from ptf.cmds.xml.xml_utils import normalise_span
20from ptf.model_data import RefData
21from ptf.model_data import create_contributor
22from ptf.model_data_converter import update_ref_data_for_jats
23from ptf.model_helpers import get_extid
24from ptf.models import BibItemId
25from ptf.models import get_names
26from ptf.utils import get_display_name
28ADS_URL = "https://api.adsabs.harvard.edu/v1/search"
29ARXIV_URL = "https://export.arxiv.org/api/query"
30CROSSREF_URL = "https://doi.crossref.org/servlet/getForwardLinks"
31SEMANTIC_URL = "https://api.semanticscholar.org/v1/paper/"
32ZBMATH_URL = "https://zbmath.org"
34ADS = "NASA ADS"
35CROSSREF = "Crossref"
36SEMANTIC = "Semantic Scholar"
37ZBMATH = "zbMATH"
40TIMEOUT = 4.0
42PRIORITY = defaultdict(int, {ZBMATH: 10, ADS: 9, CROSSREF: 8, SEMANTIC: 7})
44LATEX_PARSER = LatexNodes2Text(math_mode="verbatim")
46session = CachedSession(
47 backend=FileCache(
48 getattr(settings, "REQUESTS_CACHE_LOCATION", None) or "/tmp/ptf_requests_cache",
49 decode_content=False,
50 ),
51 headers={
52 "User-Agent": getattr(settings, "REQUESTS_USER_AGENT", None) or "Mathdoc/1.0.0",
53 "From": getattr(settings, "REQUESTS_EMAIL", None) or "accueil@listes.mathdoc.fr",
54 },
55 expire_after=timedelta(days=30),
56)
59def create_refdata(lang="und"):
60 data = RefData(lang=lang)
61 data.type = "misc"
62 data.doi = None
63 data.arxiv = None
64 data.zbl = None
65 data.semantic = None
66 return data
69def is_same_title(compare, titles, tol=0.90):
70 compare = re.sub(r"\W", "", compare).lower()
71 for title in titles:
72 title = re.sub(r"\W", "", title).lower()
73 if SequenceMatcher(None, compare, title).ratio() > tol:
74 return True
75 return False
78def get_zbmath_bibtex(params):
79 text = ""
80 headers = {"Content-Type": "text/html"}
81 response = session.get(ZBMATH_URL, params=params, headers=headers, timeout=0.5 * TIMEOUT)
82 soup = BeautifulSoup(response.text, "html.parser")
83 results = soup.find("div", {"class": "citations"})
84 if results: 84 ↛ 85line 84 didn't jump to line 85 because the condition on line 84 was never true
85 for ref in results.find_all("a", href=True):
86 headers = {"Content-Type": "text/x-bibtex"}
87 url = ZBMATH_URL + "/bibtexoutput" + ref.get("href", "")
88 response = session.get(url, headers=headers, timeout=0.5 * TIMEOUT)
89 response.encoding = "utf-8"
90 text += response.text
91 return text
94def citedby_zbmath(metadata):
95 if "zbl_id" in metadata: 95 ↛ 96line 95 didn't jump to line 96 because the condition on line 95 was never true
96 params = {"q": "an:" + metadata["zbl_id"]}
97 else:
98 params = {"q": "en:" + metadata["doi"]}
99 title_tex = normalise_span(metadata["title"]).replace("\xa0", " ")
100 authors = "&au:".join(metadata["authors"])
101 params = {"q": params["q"] + "|(ti:" + f'"{title_tex}"' + "&au:" + authors + ")"}
102 text = get_zbmath_bibtex(params)
103 citations = parse_bibtex(text)
104 return citations
107def citedby_crossref(metadata):
108 citations = []
109 user = settings.CROSSREF_USER
110 password = settings.CROSSREF_PWD
111 url = f"{CROSSREF_URL}?usr={user}&pwd={password}&doi={metadata['doi']}"
112 response = session.get(url, timeout=TIMEOUT)
113 response.encoding = "utf-8"
114 if response.status_code == 200: 114 ↛ 115line 114 didn't jump to line 115 because the condition on line 114 was never true
115 data = xmltodict.parse(response.text)
116 body = data["crossref_result"]["query_result"]["body"]
117 if body:
118 citations = body["forward_link"]
120 if not isinstance(citations, list): 120 ↛ 121line 120 didn't jump to line 121 because the condition on line 120 was never true
121 citations = [citations]
122 return citations
125def get_arxiv_id(metadata):
126 arxiv_id = None
127 title_tex = normalise_span(metadata["title"]).replace("\xa0", " ")
128 headers = {"Content-Type": "application/atom+xml"}
129 query = "doi:" + metadata["doi"] + " OR (ti:" + f'"{title_tex}"' + ")"
130 params = {"search_query": query, "max_results": 1}
131 response = session.get(ARXIV_URL, params=params, headers=headers, timeout=0.5 * TIMEOUT)
132 if response.status_code == 200: 132 ↛ 139line 132 didn't jump to line 139 because the condition on line 132 was always true
133 data = xmltodict.parse(response.text)
134 if "entry" in data["feed"]: 134 ↛ 135line 134 didn't jump to line 135 because the condition on line 134 was never true
135 entry = data["feed"]["entry"]
136 if is_same_title(title_tex, [entry["title"]]):
137 arxiv_id = entry["id"].split("arxiv.org/abs/")
138 arxiv_id = arxiv_id[-1].split("v")[0]
139 return arxiv_id
142def citedby_ads(metadata, by_doi=True, citedby=True):
143 if by_doi: 143 ↛ 146line 143 didn't jump to line 146 because the condition on line 143 was always true
144 arxiv_id = get_arxiv_id(metadata)
145 else:
146 arxiv_id = metadata["arxiv_id"]
147 if not arxiv_id: 147 ↛ 150line 147 didn't jump to line 150 because the condition on line 147 was always true
148 return []
150 citations = []
151 url = ADS_URL + "/query"
152 headers = {"Authorization": f"Bearer:{settings.ADS_TOKEN}"}
153 reference = "citation" if citedby else "reference"
154 params = {"q": "identifier:" + arxiv_id, "fl": reference}
155 response = session.get(url, headers=headers, params=params, timeout=0.5 * TIMEOUT)
156 if response.status_code == 200:
157 results = response.json().get("response", {}).get("docs")
158 if results and isinstance(results, list) and reference in results[0]:
159 url = ADS_URL + "/bigquery"
160 bibcodes = "bibcode\n" + "\n".join(results[0][reference])
161 filters = "abstract,author,bibcode,comment,doi,doctype,"
162 filters += "eid,identifier,issue,keyword,orcid_pub,"
163 filters += "page,page_count,page_range,pub,pub_raw,title,volume,year"
164 params = {"q": "*:*", "fl": filters, "rows": 200}
165 response = session.post(
166 url, params=params, headers=headers, data=bibcodes, timeout=0.5 * TIMEOUT
167 )
168 response.encoding = "utf-8"
169 if response.status_code == 200:
170 citations = response.json().get("response", {}).get("docs")
171 return citations
174def citedby_semantic(metadata, citedby=True):
175 citations = []
176 reference = "citations" if citedby else "references"
177 if settings.SITE_ID != 36: # all but PCJ
178 response = session.get(SEMANTIC_URL + metadata["doi"], timeout=TIMEOUT)
179 response.encoding = "utf-8"
180 if response.status_code == 200:
181 citations.extend(response.json()[reference])
182 return citations
185def set_contributors(ref, api_contributors, orcids=None):
186 if not isinstance(api_contributors, list):
187 api_contributors = [api_contributors]
189 contributors = []
190 for contributor in api_contributors:
191 first_name = last_name = ""
192 if ref.provider == CROSSREF:
193 first_name = contributor.get("given_name")
194 last_name = contributor.get("surname")
195 elif ref.provider in [ADS, ZBMATH]:
196 result = contributor.split(", ")
197 if result: 197 ↛ 205line 197 didn't jump to line 205 because the condition on line 197 was always true
198 first_name = result[1] if len(result) > 1 else ""
199 last_name = result[0]
200 elif ref.provider == SEMANTIC: 200 ↛ 205line 200 didn't jump to line 205 because the condition on line 200 was always true
201 result = contributor["name"].split(" ")
202 if result: 202 ↛ 205line 202 didn't jump to line 205 because the condition on line 202 was always true
203 first_name = " ".join(result[0:-1])
204 last_name = result[-1]
205 contributor = create_contributor()
206 contributor["first_name"] = first_name.strip() if first_name else ""
207 contributor["last_name"] = last_name.strip() if last_name else ""
208 contributor["role"] = "author"
209 contributors.append(contributor)
211 if orcids and len(contributors) == len(orcids):
212 for contrib, orcid in zip(contributors, orcids):
213 contrib["orcid"] = orcid if orcid != "-" else ""
214 setattr(ref, "contributors", contributors)
217def ads_to_bibtex_type(doc_type):
218 if doc_type in ["article", "eprint"]: 218 ↛ 220line 218 didn't jump to line 220 because the condition on line 218 was always true
219 bibtex_type = "article"
220 elif doc_type in [
221 "book",
222 "inbook",
223 "inproceedings",
224 "mastersthesis",
225 "phdthesis",
226 "proceedings",
227 "techreport",
228 ]:
229 bibtex_type = doc_type
230 else:
231 bibtex_type = "misc"
232 return bibtex_type
235def crossref_to_bibtex_type(doc_type, item):
236 if doc_type == "journal_cite": 236 ↛ 238line 236 didn't jump to line 238 because the condition on line 236 was always true
237 bibtex_type = "article"
238 elif doc_type == "conf_cite":
239 if "paper_title" in item:
240 bibtex_type = "inproceedings"
241 else:
242 bibtex_type = "proceedings"
243 elif doc_type == "book_cite":
244 if "chapter_title" in item:
245 bibtex_type = "inbook"
246 else:
247 bibtex_type = "book"
248 else:
249 bibtex_type = "misc"
250 return bibtex_type
253def citedby_crossref_refs(citations):
254 refdata = []
255 for item in citations:
256 item.pop("@doi") # the interior orderdict remains
257 if not item: 257 ↛ 258line 257 didn't jump to line 258 because the condition on line 257 was never true
258 continue
259 doc_type, item = item.popitem()
260 ref = create_refdata()
261 setattr(ref, "provider", CROSSREF)
262 setattr(ref, "type", crossref_to_bibtex_type(doc_type, item))
263 if "journal_title" in item and item["journal_title"]: 263 ↛ 265line 263 didn't jump to line 265 because the condition on line 263 was always true
264 setattr(ref, "source_tex", item["journal_title"])
265 if "article_title" in item and item["article_title"]: 265 ↛ 267line 265 didn't jump to line 267 because the condition on line 265 was always true
266 setattr(ref, "article_title_tex", item["article_title"])
267 if "volume_title" in item: # book or proceedings title 267 ↛ 268line 267 didn't jump to line 268 because the condition on line 267 was never true
268 setattr(ref, "source_tex", item["volume_title"])
269 if "paper_title" in item and item["paper_title"]: # inproceedings title 269 ↛ 270line 269 didn't jump to line 270 because the condition on line 269 was never true
270 setattr(ref, "article_title_tex", item["paper_title"])
271 if "chapter_title" in item and item["chapter_title"]: # incollection or inbook 271 ↛ 272line 271 didn't jump to line 272 because the condition on line 271 was never true
272 setattr(ref, "chapter_title_tex", item["chapter_title"])
273 if "first_page" in item: 273 ↛ 275line 273 didn't jump to line 275 because the condition on line 273 was always true
274 setattr(ref, "fpage", item["first_page"])
275 if "last_page" in item: 275 ↛ 276line 275 didn't jump to line 276 because the condition on line 275 was never true
276 setattr(ref, "lpage", item["last_page"])
277 if "volume" in item: 277 ↛ 279line 277 didn't jump to line 279 because the condition on line 277 was always true
278 setattr(ref, "volume", item["volume"])
279 if "issue" in item:
280 setattr(ref, "issue", item["issue"])
281 if "year" in item and item["year"]: 281 ↛ 283line 281 didn't jump to line 283 because the condition on line 281 was always true
282 setattr(ref, "year", item["year"])
283 if "contributors" in item and "contributor" in item["contributors"]: 283 ↛ 285line 283 didn't jump to line 285 because the condition on line 283 was always true
284 set_contributors(ref, item["contributors"]["contributor"])
285 if "doi" in item and item["doi"]: 285 ↛ 287line 285 didn't jump to line 287 because the condition on line 285 was always true
286 setattr(ref, "doi", item["doi"]["#text"].lower())
287 refdata.append(ref)
288 return refdata
291def citedby_zbmath_refs(citations):
292 return bibtex_to_refs(citations)
295def is_misc(doctype):
296 if doctype not in [ 296 ↛ 310line 296 didn't jump to line 310 because the condition on line 296 was never true
297 "article",
298 "book",
299 # "booklet",
300 "conference",
301 "inbook",
302 "incollection",
303 "inproceedings",
304 # "manual",
305 # "mastersthesis",
306 "phdthesis",
307 "proceedings",
308 "techreport",
309 ]:
310 return True
311 return False
314def bibtex_to_refs(bibitems):
315 refdata = []
316 for item in bibitems:
317 ref = create_refdata()
318 setattr(ref, "provider", ZBMATH)
319 item["doctype"] = "misc" if is_misc(item["doctype"]) else item["doctype"]
320 setattr(ref, "type", item["doctype"])
321 if "fjournal" in item: 321 ↛ 323line 321 didn't jump to line 323 because the condition on line 321 was always true
322 setattr(ref, "source_tex", item["fjournal"])
323 elif "journal" in item:
324 setattr(ref, "source_tex", item["journal"])
325 elif "booktitle" in item:
326 setattr(ref, "source_tex", item["booktitle"])
327 elif "howpublished" in item:
328 howpublished = re.sub(r" \([0-9]{4}\)\.?", "", item["howpublished"])
329 setattr(ref, "source_tex", howpublished)
330 if "fseries" in item: 330 ↛ 331line 330 didn't jump to line 331 because the condition on line 330 was never true
331 setattr(ref, "series", item["fseries"])
332 elif "series" in item: 332 ↛ 333line 332 didn't jump to line 333 because the condition on line 332 was never true
333 setattr(ref, "series", item["series"])
334 if "title" in item: 334 ↛ 341line 334 didn't jump to line 341 because the condition on line 334 was always true
335 if item["doctype"] in ["article", "misc"]: 335 ↛ 337line 335 didn't jump to line 337 because the condition on line 335 was always true
336 setattr(ref, "article_title_tex", item["title"])
337 elif item["doctype"] in ["incollection", "inproceedings", "inbook"]:
338 setattr(ref, "chapter_title_tex", item["title"])
339 else:
340 setattr(ref, "source_tex", item["title"])
341 if "url" in item and not ref.source_tex: 341 ↛ 342line 341 didn't jump to line 342 because the condition on line 341 was never true
342 setattr(ref, "source_tex", item["url"])
343 if "pages" in item and item["pages"]: 343 ↛ 348line 343 didn't jump to line 348 because the condition on line 343 was always true
344 result = [x for x in re.split(r"\W", item["pages"])]
345 setattr(ref, "fpage", result[0])
346 if len(result) == 2:
347 setattr(ref, "lpage", result[1])
348 if "volume" in item: 348 ↛ 350line 348 didn't jump to line 350 because the condition on line 348 was always true
349 setattr(ref, "volume", item["volume"])
350 if "number" in item:
351 setattr(ref, "issue", item["number"])
352 if "issue" in item: 352 ↛ 353line 352 didn't jump to line 353 because the condition on line 352 was never true
353 setattr(ref, "issue", item["issue"])
354 if "note" in item:
355 setattr(ref, "comment", item["note"])
356 if "year" in item: 356 ↛ 358line 356 didn't jump to line 358 because the condition on line 356 was always true
357 setattr(ref, "year", item["year"])
358 if "author" in item: 358 ↛ 360line 358 didn't jump to line 360 because the condition on line 358 was always true
359 set_contributors(ref, item["author"].split(" and "))
360 if "publisher" in item: 360 ↛ 361line 360 didn't jump to line 361 because the condition on line 360 was never true
361 setattr(ref, "publisher_name", item["publisher"])
362 elif "school" in item: 362 ↛ 363line 362 didn't jump to line 363 because the condition on line 362 was never true
363 setattr(ref, "publisher_name", item["school"])
364 elif "institution" in item: 364 ↛ 365line 364 didn't jump to line 365 because the condition on line 364 was never true
365 setattr(ref, "publisher_name", item["institution"])
366 if "address" in item: 366 ↛ 367line 366 didn't jump to line 367 because the condition on line 366 was never true
367 setattr(ref, "publisher_loc", item["address"])
368 if "doi" in item and item["doi"]:
369 setattr(ref, "doi", item["doi"].lower())
370 if "zbmath" in item: 370 ↛ 372line 370 didn't jump to line 372 because the condition on line 370 was always true
371 setattr(ref, "zbl", item["zbmath"])
372 if "zbl" in item:
373 setattr(ref, "zbl", item["zbl"])
374 refdata.append(ref)
375 return refdata
378def citedby_ads_refs(citations):
379 refdata: list[RefData] = []
380 for item in citations:
381 ref = create_refdata()
382 setattr(ref, "provider", ADS)
383 setattr(ref, "bibcode", item["bibcode"])
384 setattr(ref, "type", ads_to_bibtex_type(item["doctype"]))
385 if "title" in item and item["title"]: 385 ↛ 387line 385 didn't jump to line 387 because the condition on line 385 was always true
386 setattr(ref, "article_title_tex", item["title"][0])
387 if "page_range" in item: 387 ↛ 388line 387 didn't jump to line 388 because the condition on line 387 was never true
388 result = item["page_range"].split("-")
389 if len(result) == 2:
390 setattr(ref, "fpage", result[0])
391 setattr(ref, "lpage", result[1])
392 elif "page" in item and item["page"] and item["page"][0].isdigit():
393 setattr(ref, "fpage", item["page"][0])
394 if "page_count" in item and item["page_count"]:
395 setattr(ref, "lpage", str(item["page_count"] - 1))
396 if "year" in item and item["year"]: 396 ↛ 398line 396 didn't jump to line 398 because the condition on line 396 was always true
397 setattr(ref, "year", item["year"])
398 if "author" in item and item["author"]: 398 ↛ 400line 398 didn't jump to line 400 because the condition on line 398 was always true
399 set_contributors(ref, item["author"], item.get("orcid_pub", []))
400 if "issue" in item: 400 ↛ 401line 400 didn't jump to line 401 because the condition on line 400 was never true
401 setattr(ref, "issue", item["issue"])
402 if "volume" in item: 402 ↛ 403line 402 didn't jump to line 403 because the condition on line 402 was never true
403 setattr(ref, "volume", item["volume"])
404 if "doi" in item and item["doi"]: 404 ↛ 406line 404 didn't jump to line 406 because the condition on line 404 was always true
405 setattr(ref, "doi", item["doi"][0].lower())
406 if "eid" in item and item["eid"]: 406 ↛ 411line 406 didn't jump to line 411 because the condition on line 406 was always true
407 arxiv = item["eid"].split("arXiv:")
408 if "pub" in item and "arXiv" in item["pub"]: 408 ↛ 411line 408 didn't jump to line 411 because the condition on line 408 was always true
409 setattr(ref, "arxiv", arxiv[-1])
410 setattr(ref, "source_tex", "arXiv")
411 if "pub_raw" in item and item["pub_raw"] and ref.doi and not ref.arxiv: 411 ↛ 412line 411 didn't jump to line 412 because the condition on line 411 was never true
412 result = re.match(r"(^.+)?[,.]( vol. | Volume )", item["pub_raw"])
413 if result:
414 setattr(ref, "source_tex", result.group(1))
415 elif "pub" in item and not ref.arxiv: 415 ↛ 416line 415 didn't jump to line 416 because the condition on line 415 was never true
416 setattr(ref, "source_tex", item["pub"])
417 if "abstract" in item and item["abstract"]: 417 ↛ 419line 417 didn't jump to line 419 because the condition on line 417 was always true
418 setattr(ref, "abstract", [item["abstract"]])
419 refdata.append(ref)
420 return refdata
423def citedby_semantic_refs(citations):
424 refdata = []
425 for item in citations:
426 ref = create_refdata()
427 setattr(ref, "provider", SEMANTIC)
428 if "title" in item: 428 ↛ 432line 428 didn't jump to line 432 because the condition on line 428 was always true
429 title = item["title"]
430 title = title.capitalize() if title.isupper() else item["title"]
431 setattr(ref, "article_title_tex", title)
432 if "year" in item and item["year"]: 432 ↛ 434line 432 didn't jump to line 434 because the condition on line 432 was always true
433 setattr(ref, "year", str(item["year"]))
434 if "authors" in item and item["authors"]: 434 ↛ 436line 434 didn't jump to line 436 because the condition on line 434 was always true
435 set_contributors(ref, item["authors"])
436 if "doi" in item and item["doi"]:
437 setattr(ref, "doi", item["doi"].lower())
438 if "arxivId" in item and item["arxivId"]:
439 setattr(ref, "arxiv", item["arxivId"])
440 setattr(ref, "source_tex", "arXiv")
441 if "venue" in item and item["venue"]:
442 setattr(ref, "source_tex", item["venue"])
443 if "paperId" in item: 443 ↛ 445line 443 didn't jump to line 445 because the condition on line 443 was always true
444 setattr(ref, "semantic", item["paperId"])
445 refdata.append(ref)
446 return refdata
449def get_extlinks(extids):
450 extlinks = []
451 for extid in extids:
452 eid = BibItemId()
453 eid.id_type, eid.id_value = extid
454 extlink = ""
455 if eid.id_type == "doi":
456 extlink = "DOI:" + eid.id_value
457 elif eid.id_type == "arxiv":
458 extlink = "arXiv:" + eid.id_value
459 elif eid.id_type == "zbl-item-id":
460 extlink = "Zbl:" + eid.id_value
461 elif eid.id_type == "semantic-scholar": 461 ↛ 463line 461 didn't jump to line 463 because the condition on line 461 was always true
462 extlink = "Semantic-scholar:" + eid.id_value
463 if extlink: 463 ↛ 451line 463 didn't jump to line 451 because the condition on line 463 was always true
464 extlink = f' | <a href="{eid.get_href()}">{extlink}</a>'
465 extlinks.append(extlink)
466 return extlinks
469def built_extlinks(ref):
470 extids = []
471 if ref.doi:
472 extids.append(("doi", ref.doi))
473 if ref.arxiv:
474 extids.append(("arxiv", ref.arxiv))
475 if ref.zbl:
476 extids.append(("zbl-item-id", ref.zbl))
477 if not any((ref.doi, ref.zbl, ref.arxiv)) and getattr(ref, "semantic", False):
478 extids.append(("semantic-scholar", ref.semantic))
479 setattr(ref, "extids", extids)
482def get_values_for_stats(refs):
483 """
484 extract data of a ref and return as a dict
485 @param refs: dict of RefData.__dict__
486 @return: dict
487 """
489 citedby_for_stats = []
490 for ref_item in refs.values():
491 authors = []
492 for author in ref_item.get("contributors"):
493 if author["role"] == "author": 493 ↛ 492line 493 didn't jump to line 492 because the condition on line 493 was always true
494 display_name = get_display_name(
495 author["prefix"],
496 author["first_name"],
497 author["last_name"],
498 author["suffix"],
499 author["string_name"],
500 )
501 authors.append({"author": display_name})
503 title_key = get_publication_title(ref_item, "title")
504 title = ref_item[title_key]
505 publication_title_key = get_publication_title(ref_item, "publication_title")
506 publication_title = ref_item[publication_title_key]
508 url = ""
509 if ref_item["extlinks"]: 509 ↛ 513line 509 didn't jump to line 513
510 result = re.search(r'href="(.+)">', ref_item["extlinks"][0])
511 url = result.group(1) if result else ""
513 result = {
514 "authors": authors,
515 "title": title,
516 "publication_title": publication_title,
517 "year": ref_item["year"],
518 "url": url,
519 "source": ref_item["provider"],
520 }
521 citedby_for_stats.append(result)
522 return citedby_for_stats
525def get_publication_title(ref_item, category="title"):
526 type_ = ref_item.get("type")
528 if "thesis" in type_: 528 ↛ 529line 528 didn't jump to line 529 because the condition on line 528 was never true
529 type_ = "thesis"
530 else:
531 type_ = "misc"
533 dic = {
534 "incollection": {"title": "source_tex", "publication_title": "series"},
535 "thesis": {"title": "source_tex", "publication_title": "series"},
536 "article": {"title": "article_title_tex", "publication_title": "source_tex"},
537 "book": {"title": "source_tex", "publication_title": "series"},
538 "inbook": {"title": "chapter_title_tex", "publication_title": "series"},
539 "misc": {"title": "article_title_tex", "publication_title": "source_tex"},
540 }
541 return dic.get(type_).get(category)
544def built_citations(data):
545 # to match citations and add these ids when missing
546 doi_arxiv = {ref.doi: ref.arxiv for ref in data if ref.doi and ref.arxiv}
547 arxiv_doi = {v: k for k, v in doi_arxiv.items()}
549 results = []
550 for n, ref in enumerate(data):
551 if ref.arxiv and not ref.doi:
552 setattr(ref, "doi", arxiv_doi.get(ref.arxiv))
553 elif not ref.arxiv and ref.doi:
554 setattr(ref, "arxiv", doi_arxiv.get(ref.doi))
555 built_extlinks(ref)
556 update_ref_data_for_jats(ref, n, with_label=False)
557 ref.citation_html = html.unescape(ref.citation_html)
558 results.append(vars(ref))
560 results.sort(
561 key=lambda k: (
562 -int(k["year"]) if k["year"] else 0,
563 k["source_tex"],
564 k["volume"],
565 k["issue"],
566 k["fpage"],
567 ),
568 )
570 refs = {}
571 titles = {
572 item[get_publication_title(item)]
573 for item in results
574 if any((item["arxiv"], item["doi"], item["zbl"]))
575 }
577 for item in results:
578 links = get_extlinks(item["extids"])
579 level = PRIORITY[item["provider"]]
580 citation = LATEX_PARSER.latex_to_text(item["citation_html"].replace("$$", "$"))
581 ref = {"html": citation + "".join(links)}
582 ref.update({"priority": level, "extlinks": links})
583 ref.update(item)
585 if item["doi"]:
586 if item["doi"] not in refs or refs[item["doi"]]["priority"] < level:
587 refs[item["doi"]] = ref
588 elif item["zbl"]:
589 refs[item["zbl"]] = ref
590 elif item["arxiv"]:
591 if item["arxiv"] not in refs or refs[item["arxiv"]]["priority"] < level: 591 ↛ 577line 591 didn't jump to line 577 because the condition on line 591 was always true
592 refs[item["arxiv"]] = ref
593 elif item["semantic"] and (item["doi"] or item["arxiv"]): 593 ↛ 594line 593 didn't jump to line 594 because the condition on line 593 was never true
594 if not is_same_title(item[get_publication_title(item)], titles):
595 refs[item["semantic"]] = ref
597 sources = list({ref["provider"] for ref in refs.values()})
598 sources = ", ".join(sorted(sources))
599 citations_html = [citation["html"] for citation in refs.values()]
600 citedby_for_stats = get_values_for_stats(refs)
601 return citations_html, sources, citedby_for_stats
604def citations_to_refs(provider, citations):
605 if provider == CROSSREF:
606 return citedby_crossref_refs(citations)
607 elif provider == ZBMATH:
608 return citedby_zbmath_refs(citations)
609 elif provider == ADS:
610 return citedby_ads_refs(citations)
611 elif provider == SEMANTIC:
612 return citedby_semantic_refs(citations)
615def get_citations(resource):
616 """Returns documents that cite this doi and sources used for the research."""
617 data = {}
618 authors = get_names(resource, "author")
619 zbl_id = get_extid(resource, "zbl-item-id")
620 preprint_id = get_extid(resource, "preprint")
622 metadata = {
623 "authors": authors,
624 "doi": resource.doi,
625 "preprint_id": preprint_id.id_value if preprint_id else "",
626 "title": resource.title_tex,
627 }
629 if zbl_id and zbl_id.id_value: 629 ↛ 630line 629 didn't jump to line 630 because the condition on line 629 was never true
630 metadata.update({"zbl_id": zbl_id.id_value})
632 with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
633 future_to_provider = {
634 executor.submit(citedby_crossref, metadata): CROSSREF,
635 executor.submit(citedby_zbmath, metadata): ZBMATH,
636 executor.submit(citedby_ads, metadata): ADS,
637 }
638 for future in concurrent.futures.as_completed(future_to_provider):
639 provider = future_to_provider[future]
640 try:
641 if future.result(): 641 ↛ 642line 641 didn't jump to line 642 because the condition on line 641 was never true
642 data.update({provider: future.result()})
643 except Timeout:
644 continue
645 except ConnectionError:
646 continue
648 citations = []
649 for provider, cites in data.items(): 649 ↛ 650line 649 didn't jump to line 650 because the loop on line 649 never started
650 citations.extend(citations_to_refs(provider, cites))
652 return built_citations(citations)