Coverage for src/ptf/bibtex.py: 95%
87 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-05 09:56 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-05 09:56 +0000
1import re
3from pylatexenc.latex2text import LatexNodes2Text
4from pylatexenc.latexencode import unicode_to_latex
5from unidecode import unidecode
7from ptf import models
10def protect_uppercase(line):
11 l = []
13 found_closing_brace = False
14 i = line.rfind("},")
15 if i == len(line) - 2:
16 found_closing_brace = True
17 line = line[: len(line) - 2]
19 i = line.find("{")
20 if i > 0: 20 ↛ 24line 20 didn't jump to line 24 because the condition on line 20 was always true
21 i += 1
22 l.append(line[:i])
23 else:
24 i = 0
26 first_word = True
28 while i < len(line):
29 while i < len(line) and line[i] in [" ", "\t"]:
30 l.append(line[i])
31 i += 1
33 if i < len(line): 33 ↛ 28line 33 didn't jump to line 28 because the condition on line 33 was always true
34 j = i
36 found_uppercase = False
38 if line[j] == "$":
39 # Skip formulas
40 j += 1
41 while j < len(line) and line[j] != "$":
42 j += 1
43 if j < len(line) and line[j] == "$": 43 ↛ 51line 43 didn't jump to line 51 because the condition on line 43 was always true
44 j += 1
45 else:
46 while j < len(line) and line[j] not in [" ", "\t", "$"]:
47 if "A" <= line[j] <= "Z":
48 found_uppercase = True
49 j += 1
51 if found_uppercase and not first_word:
52 l.append("{" + line[i:j] + "}")
53 else:
54 l.append(line[i:j])
56 first_word = False
57 i = j
59 if found_closing_brace:
60 l.append("},")
61 new_line = "".join(l)
63 return new_line
66def append_in_latex(array, unicode_line, is_title=False):
67 if is_title:
68 line = unicode_line.replace("<i>", "|||i|||").replace("</i>", "|||/i|||")
69 line = line.replace("<sup>", "|||sup|||").replace("</sup>", "|||/sup|||")
70 line = line.replace("<sub>", "|||sub|||").replace("</sub>", "|||/sub|||")
71 line = unicode_to_latex(line, non_ascii_only=True)
72 line = line.replace("|||i|||", "\\protect\\emph{").replace("|||/i|||", "}")
73 line = line.replace("|||sup|||", "\\protect\\textsuperscript{").replace("|||/sup|||", "}")
74 line = line.replace("|||sub|||", "\\protect\\textsubscript{").replace("|||/sub|||", "}")
75 else:
76 line = unicode_to_latex(unicode_line, non_ascii_only=True)
78 if is_title:
79 line = protect_uppercase(line)
81 array.append(line)
84def get_bibtex_names(resource, role):
85 contribs = " and ".join(
86 [
87 str(contribution)
88 for contribution in resource.contributions.all()
89 if contribution.role == role
90 ]
91 )
92 line = f"{role} = {{{contribs}}}," if contribs else ""
93 return line
96def get_bibtex_id(resource, year):
97 # Set the id to the last name of first author + year
98 # The last name needs to be converted to ASCII (with unidecode) and put back in unicode (get_bibtex returns unicode)
99 authors = models.get_names(resource, "author")
100 if not authors: 100 ↛ 101line 100 didn't jump to line 101 because the condition on line 100 was never true
101 return ""
103 first_author = authors[0]
104 ascii_name = unidecode(re.split(", .", first_author)[0]).lower()
105 regex = re.compile(r"[^a-zA-Z]")
106 ascii_name = regex.sub("", ascii_name)
107 return ascii_name + year.split("-")[0]
110def parse_bibtex(text, math_mode="verbatim"):
111 entries = []
112 parser = LatexNodes2Text(math_mode=math_mode)
113 if text and "@" in text:
114 bibitems = re.split(r"\s*\n*@", text)
115 for item in bibitems:
116 result = re.match(r"(.*){.*,(\s*)", item)
117 if result:
118 entry = {"doctype": result.group(1).strip().lower()} # article, inbook...
119 for line in item.splitlines():
120 field = line.split(" = ")
121 if field and len(field) == 2:
122 key = field[0].strip().lower()
123 val = field[1].strip(" ,")
124 entry[key] = parser.latex_to_text(val)
125 entries.append(entry)
126 return entries