Coverage for src/ptf/external/datacite.py: 0%

41 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-11-05 09:56 +0000

1import requests 

2 

3from ptf import model_data 

4 

5 

6def get_datacite_journal_url(publisher): 

7 publisher = publisher.replace(" ", "%20") 

8 # return f"https://api.datacite.org/dois?prefix={prefix}&fields[dois]=id,publisher 

9 

10 return f"https://api.datacite.org/dois?query=publisher:%22{publisher}%22&page[size]=1000" 

11 

12 

13def get_datacite_doi_url(doi): 

14 return f"https://api.datacite.org/dois/{doi}" 

15 

16 

17def get_datacite_articles_in_journal(publisher, what): 

18 url = get_datacite_journal_url(publisher) 

19 response = requests.get(url) 

20 response.raise_for_status() 

21 data = response.json() 

22 

23 dois_to_ignore = ["10.15781/md28-ws10"] 

24 

25 article_datas = [] 

26 for datacite_data in data["data"]: 

27 if datacite_data["id"] not in dois_to_ignore: 

28 xarticle = parse_datacide_data(datacite_data["attributes"], what) 

29 if xarticle is not None: 

30 article_datas.append(xarticle) 

31 

32 return article_datas 

33 

34 # dois_to_ignore = ["10.15781/8c5z-fs65", "10.15781/z70n-ed29", "10.15781/md28-ws10", "10.15781/7187-xq59"] 

35 # dois = [item["id"] for item in data["data"] if item["id"] not in dois_to_ignore ] 

36 # 

37 # for doi in dois: 

38 # xarticle = get_datacite_doi(doi, what) 

39 # if xarticle is not None: 

40 # article_datas.append(xarticle) 

41 

42 return article_datas 

43 

44 

45def get_datacite_doi(doi, what): 

46 url = get_datacite_doi_url(doi) 

47 response = requests.get(url) 

48 response.raise_for_status() 

49 response.encoding = "utf-8" 

50 data = response.json()["data"]["attributes"] 

51 

52 xarticle = parse_datacide_data(data, what) 

53 return xarticle 

54 

55 

56def parse_datacide_data(data, what): 

57 xarticle = model_data.create_articledata() 

58 doi = data["doi"] 

59 xarticle.doi = doi 

60 xarticle.pid = doi.replace("/", "_").replace(".", "_").replace("-", "_") 

61 xarticle.url = f"https://doi.org/{doi}" 

62 

63 if "year" in what: 

64 xarticle.year = data["publicationYear"] 

65 

66 if "published" in what: 

67 xarticle.date_published_iso_8601_date_str = data["created"] 

68 

69 if "primary_url" in what: 

70 # DataCite does not give the primary url, returns the DOI url instead 

71 xarticle.url = f"https://doi.org/{doi}" 

72 

73 return xarticle