Coverage for src/ptf/external/datacite.py: 0%
41 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-05 09:56 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-05 09:56 +0000
1import requests
3from ptf import model_data
6def get_datacite_journal_url(publisher):
7 publisher = publisher.replace(" ", "%20")
8 # return f"https://api.datacite.org/dois?prefix={prefix}&fields[dois]=id,publisher
10 return f"https://api.datacite.org/dois?query=publisher:%22{publisher}%22&page[size]=1000"
13def get_datacite_doi_url(doi):
14 return f"https://api.datacite.org/dois/{doi}"
17def get_datacite_articles_in_journal(publisher, what):
18 url = get_datacite_journal_url(publisher)
19 response = requests.get(url)
20 response.raise_for_status()
21 data = response.json()
23 dois_to_ignore = ["10.15781/md28-ws10"]
25 article_datas = []
26 for datacite_data in data["data"]:
27 if datacite_data["id"] not in dois_to_ignore:
28 xarticle = parse_datacide_data(datacite_data["attributes"], what)
29 if xarticle is not None:
30 article_datas.append(xarticle)
32 return article_datas
34 # dois_to_ignore = ["10.15781/8c5z-fs65", "10.15781/z70n-ed29", "10.15781/md28-ws10", "10.15781/7187-xq59"]
35 # dois = [item["id"] for item in data["data"] if item["id"] not in dois_to_ignore ]
36 #
37 # for doi in dois:
38 # xarticle = get_datacite_doi(doi, what)
39 # if xarticle is not None:
40 # article_datas.append(xarticle)
42 return article_datas
45def get_datacite_doi(doi, what):
46 url = get_datacite_doi_url(doi)
47 response = requests.get(url)
48 response.raise_for_status()
49 response.encoding = "utf-8"
50 data = response.json()["data"]["attributes"]
52 xarticle = parse_datacide_data(data, what)
53 return xarticle
56def parse_datacide_data(data, what):
57 xarticle = model_data.create_articledata()
58 doi = data["doi"]
59 xarticle.doi = doi
60 xarticle.pid = doi.replace("/", "_").replace(".", "_").replace("-", "_")
61 xarticle.url = f"https://doi.org/{doi}"
63 if "year" in what:
64 xarticle.year = data["publicationYear"]
66 if "published" in what:
67 xarticle.date_published_iso_8601_date_str = data["created"]
69 if "primary_url" in what:
70 # DataCite does not give the primary url, returns the DOI url instead
71 xarticle.url = f"https://doi.org/{doi}"
73 return xarticle