Coverage for src/ptf/bibtex.py: 95%

1import re

3from pylatexenc.latex2text import LatexNodes2Text

4from pylatexenc.latexencode import unicode_to_latex

5from unidecode import unidecode

7from ptf import models

10def protect_uppercase(line):

11 l = []

13 found_closing_brace = False

14 i = line.rfind("},")

15 if i == len(line) - 2:

16 found_closing_brace = True

17 line = line[: len(line) - 2]

19 i = line.find("{")

20 if i > 0: 20 ↛ 24line 20 didn't jump to line 24 because the condition on line 20 was always true

21 i += 1

22 l.append(line[:i])

23 else:

24 i = 0

26 first_word = True

28 while i < len(line):

29 while i < len(line) and line[i] in [" ", "\t"]:

30 l.append(line[i])

31 i += 1

33 if i < len(line): 33 ↛ 28line 33 didn't jump to line 28 because the condition on line 33 was always true

34 j = i

36 found_uppercase = False

38 if line[j] == "$":

39 # Skip formulas

40 j += 1

41 while j < len(line) and line[j] != "$":

42 j += 1

43 if j < len(line) and line[j] == "$": 43 ↛ 51line 43 didn't jump to line 51 because the condition on line 43 was always true

44 j += 1

45 else:

46 while j < len(line) and line[j] not in [" ", "\t", "$"]:

47 if "A" <= line[j] <= "Z":

48 found_uppercase = True

49 j += 1

51 if found_uppercase and not first_word:

52 l.append("{" + line[i:j] + "}")

53 else:

54 l.append(line[i:j])

56 first_word = False

57 i = j

59 if found_closing_brace:

60 l.append("},")

61 new_line = "".join(l)

63 return new_line

66def append_in_latex(array, unicode_line, is_title=False):

67 if is_title:

68 line = unicode_line.replace("", "|||i|||").replace("", "|||/i|||")

69 line = line.replace("", "|||sup|||").replace("", "|||/sup|||")

70 line = line.replace("", "|||sub|||").replace("", "|||/sub|||")

71 line = unicode_to_latex(line, non_ascii_only=True)

72 line = line.replace("|||i|||", "\\protect\\emph{").replace("|||/i|||", "}")

73 line = line.replace("|||sup|||", "\\protect\\textsuperscript{").replace("|||/sup|||", "}")

74 line = line.replace("|||sub|||", "\\protect\\textsubscript{").replace("|||/sub|||", "}")

75 else:

76 line = unicode_to_latex(unicode_line, non_ascii_only=True)

78 if is_title:

79 line = protect_uppercase(line)

81 array.append(line)

84def get_bibtex_names(resource, role):

85 contribs = " and ".join(

86 [

87 str(contribution)

88 for contribution in resource.contributions.all()

89 if contribution.role == role

90 ]

91 )

92 line = f"{role} = {{{contribs}}}," if contribs else ""

93 return line

96def get_bibtex_id(resource, year):

97 # Set the id to the last name of first author + year

98 # The last name needs to be converted to ASCII (with unidecode) and put back in unicode (get_bibtex returns unicode)

99 authors = models.get_names(resource, "author")

100 if not authors: 100 ↛ 101line 100 didn't jump to line 101 because the condition on line 100 was never true

101 return ""

102

103 first_author = authors[0]

104 ascii_name = unidecode(re.split(", .", first_author)[0]).lower()

105 regex = re.compile(r"[^a-zA-Z]")

106 ascii_name = regex.sub("", ascii_name)

107 return ascii_name + year.split("-")[0]

108

109

110def parse_bibtex(text, math_mode="verbatim"):

111 entries = []

112 parser = LatexNodes2Text(math_mode=math_mode)

113 if text and "@" in text:

114 bibitems = re.split(r"\s*\n*@", text)

115 for item in bibitems:

116 result = re.match(r"(.*){.*,(\s*)", item)

117 if result:

118 entry = {"doctype": result.group(1).strip().lower()} # article, inbook...

119 for line in item.splitlines():

120 field = line.split(" = ")

121 if field and len(field) == 2:

122 key = field[0].strip().lower()

123 val = field[1].strip(" ,")

124 entry[key] = parser.latex_to_text(val)

125 entries.append(entry)

126 return entries