Coverage for src/ptf/external/crossref.py: 0%

51 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-11-05 09:56 +0000

1from datetime import timedelta 

2 

3from requests_cache import CachedSession 

4from requests_cache import FileCache 

5 

6from django.conf import settings 

7 

8from ptf.model_data import create_articledata 

9 

10session = CachedSession( 

11 backend=FileCache( 

12 getattr(settings, "REQUESTS_CACHE_LOCATION", None) or "/tmp/ptf_requests_cache", 

13 decode_content=False, 

14 ), 

15 headers={ 

16 "User-Agent": getattr(settings, "REQUESTS_USER_AGENT", None) or "Mathdoc/1.0.0", 

17 "From": getattr(settings, "REQUESTS_EMAIL", None) or "accueil@listes.mathdoc.fr", 

18 }, 

19 expire_after=timedelta(days=30), 

20) 

21 

22 

23def get_crossref_journal_url(issn): 

24 issn = issn.replace("-", "") 

25 return f"https://api.crossref.org/journals/{issn}/works?select=DOI&sort=published&rows=1000" 

26 

27 

28def get_crossref_doi_url(doi): 

29 return f"https://api.crossref.org/works/{doi}" 

30 

31 

32def get_crossref_articles_in_journal(issn, what): 

33 url = get_crossref_journal_url(issn) 

34 response = session.get(url) 

35 response.raise_for_status() 

36 data = response.json()["message"] 

37 dois = [item["DOI"] for item in data["items"]] 

38 

39 article_datas = [] 

40 

41 for doi in dois: 

42 xarticle = crossref_request(doi, type="article", what=what) 

43 if xarticle is not None: 

44 article_datas.append(xarticle) 

45 

46 return article_datas 

47 

48 

49def crossref_request(doi, type="article", what=[]): 

50 """ 

51 :param doi: DOI of the resource to request 

52 :param type: type of the expected returned value. Can be "article" 

53 :param what: list of metadata to request. 

54 ["published", "year", "primary_url"] 

55 :return: an ArticleData 

56 """ 

57 

58 url = get_crossref_doi_url(doi) 

59 response = session.get(url) 

60 response.raise_for_status() 

61 data = response.json()["message"] 

62 

63 # FIXME : is doi used somewhere in create_articledata ? 

64 ptf_data = create_articledata(doi=doi) 

65 ptf_data.doi = doi 

66 ptf_data.pid = doi.replace("/", "_").replace(".", "_").replace("-", "_") 

67 

68 if "year" and data.get("published") is None: 

69 return None 

70 

71 if "year" and data.get("published") is not None: 

72 ptf_data.year = data["published"]["date-parts"][0][0] 

73 

74 if "published" in what and data.get("published") is not None: 

75 date_parts = data["published"]["date-parts"][0] 

76 

77 if len(date_parts) == 3: 

78 year = str(date_parts[0]) 

79 month = str(date_parts[1]).zfill(2) 

80 day = str(date_parts[2]).zfill(2) 

81 date_str = f"{year}-{month}-{day}" 

82 elif len(date_parts) == 2: 

83 year = str(date_parts[0]) 

84 month = str(date_parts[1]).zfill(2) 

85 date_str = f"{year}-{month}" 

86 else: 

87 date_str = str(date_parts[0]) 

88 

89 ptf_data.date_published_iso_8601_date_str = date_str 

90 

91 if ( 

92 "primary_url" in what 

93 and data.get("resource") is not None 

94 and data["resource"].get("primary") is not None 

95 ): 

96 ptf_data.url = data["resource"]["primary"].get("URL") 

97 

98 return ptf_data