Coverage for src/ptf/external/arxiv.py: 0%

1from datetime import timedelta

3import feedparser

4from requests_cache import CachedSession

5from requests_cache import FileCache

7from django.conf import settings

9from ptf import model_data

10from ptf.model_data import AbstractDict

11from ptf.model_data import create_contributor

13session = CachedSession(

14 backend=FileCache(

15 getattr(settings, "REQUESTS_CACHE_LOCATION", None) or "/tmp/ptf_requests_cache",

16 decode_content=False,

17 ),

18 headers={

19 "User-Agent": getattr(settings, "REQUESTS_USER_AGENT", None) or "Mathdoc/1.0.0",

20 "From": getattr(settings, "REQUESTS_EMAIL", None) or "accueil@listes.mathdoc.fr",

21 },

22 expire_after=timedelta(days=2),

23)

26def get_arxiv_url(id):

27 return f"http://export.arxiv.org/api/query?id_list={id}"

30def get_arxiv_article(id):

31 url = get_arxiv_url(id)

33 # http = urllib3.PoolManager(cert_reqs="CERT_NONE")

34 # urllib3.util.make_headers(keep_alive=None, accept_encoding="utf-8")

35 headers = {"accept_encoding": "utf-8"}

37 # For SSL Errors, use verify=False kwarg

38 response = session.get(url=url, headers=headers)

40 # parse the response using feedparser

41 feed = feedparser.parse(response.text)

42 if len(feed.entries) == 0:

43 return None

45 entry = feed.entries[0]

47 article_data = model_data.create_articledata()

49 # TITLE

50 article_data.title_tex = entry.title

52 # AUTHORS

53 for author_entry in entry.authors:

54 author = create_contributor()

55 author["role"] = "author"

56 author["string_name"] = author_entry.name

58 article_data.contributors.append(author)

60 # ABSTRACT

61 xabstract: AbstractDict = {

62 "tag": "abstract",

63 "value_html": "",

64 "value_tex": entry.summary,

65 "value_xml": "",

66 "lang": "en",

67 }

68 article_data.abstracts.append(xabstract)

70 # PDF

71 for link in entry.links:

72 if link["type"] == "application/pdf":

73 article_data.pdf_url = link["href"]

75 return article_data