Coverage for src/ptf/bibtex.py: 95%

87 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-11-05 09:56 +0000

1import re 

2 

3from pylatexenc.latex2text import LatexNodes2Text 

4from pylatexenc.latexencode import unicode_to_latex 

5from unidecode import unidecode 

6 

7from ptf import models 

8 

9 

10def protect_uppercase(line): 

11 l = [] 

12 

13 found_closing_brace = False 

14 i = line.rfind("},") 

15 if i == len(line) - 2: 

16 found_closing_brace = True 

17 line = line[: len(line) - 2] 

18 

19 i = line.find("{") 

20 if i > 0: 20 ↛ 24line 20 didn't jump to line 24 because the condition on line 20 was always true

21 i += 1 

22 l.append(line[:i]) 

23 else: 

24 i = 0 

25 

26 first_word = True 

27 

28 while i < len(line): 

29 while i < len(line) and line[i] in [" ", "\t"]: 

30 l.append(line[i]) 

31 i += 1 

32 

33 if i < len(line): 33 ↛ 28line 33 didn't jump to line 28 because the condition on line 33 was always true

34 j = i 

35 

36 found_uppercase = False 

37 

38 if line[j] == "$": 

39 # Skip formulas 

40 j += 1 

41 while j < len(line) and line[j] != "$": 

42 j += 1 

43 if j < len(line) and line[j] == "$": 43 ↛ 51line 43 didn't jump to line 51 because the condition on line 43 was always true

44 j += 1 

45 else: 

46 while j < len(line) and line[j] not in [" ", "\t", "$"]: 

47 if "A" <= line[j] <= "Z": 

48 found_uppercase = True 

49 j += 1 

50 

51 if found_uppercase and not first_word: 

52 l.append("{" + line[i:j] + "}") 

53 else: 

54 l.append(line[i:j]) 

55 

56 first_word = False 

57 i = j 

58 

59 if found_closing_brace: 

60 l.append("},") 

61 new_line = "".join(l) 

62 

63 return new_line 

64 

65 

66def append_in_latex(array, unicode_line, is_title=False): 

67 if is_title: 

68 line = unicode_line.replace("<i>", "|||i|||").replace("</i>", "|||/i|||") 

69 line = line.replace("<sup>", "|||sup|||").replace("</sup>", "|||/sup|||") 

70 line = line.replace("<sub>", "|||sub|||").replace("</sub>", "|||/sub|||") 

71 line = unicode_to_latex(line, non_ascii_only=True) 

72 line = line.replace("|||i|||", "\\protect\\emph{").replace("|||/i|||", "}") 

73 line = line.replace("|||sup|||", "\\protect\\textsuperscript{").replace("|||/sup|||", "}") 

74 line = line.replace("|||sub|||", "\\protect\\textsubscript{").replace("|||/sub|||", "}") 

75 else: 

76 line = unicode_to_latex(unicode_line, non_ascii_only=True) 

77 

78 if is_title: 

79 line = protect_uppercase(line) 

80 

81 array.append(line) 

82 

83 

84def get_bibtex_names(resource, role): 

85 contribs = " and ".join( 

86 [ 

87 str(contribution) 

88 for contribution in resource.contributions.all() 

89 if contribution.role == role 

90 ] 

91 ) 

92 line = f"{role} = {{{contribs}}}," if contribs else "" 

93 return line 

94 

95 

96def get_bibtex_id(resource, year): 

97 # Set the id to the last name of first author + year 

98 # The last name needs to be converted to ASCII (with unidecode) and put back in unicode (get_bibtex returns unicode) 

99 authors = models.get_names(resource, "author") 

100 if not authors: 100 ↛ 101line 100 didn't jump to line 101 because the condition on line 100 was never true

101 return "" 

102 

103 first_author = authors[0] 

104 ascii_name = unidecode(re.split(", .", first_author)[0]).lower() 

105 regex = re.compile(r"[^a-zA-Z]") 

106 ascii_name = regex.sub("", ascii_name) 

107 return ascii_name + year.split("-")[0] 

108 

109 

110def parse_bibtex(text, math_mode="verbatim"): 

111 entries = [] 

112 parser = LatexNodes2Text(math_mode=math_mode) 

113 if text and "@" in text: 

114 bibitems = re.split(r"\s*\n*@", text) 

115 for item in bibitems: 

116 result = re.match(r"(.*){.*,(\s*)", item) 

117 if result: 

118 entry = {"doctype": result.group(1).strip().lower()} # article, inbook... 

119 for line in item.splitlines(): 

120 field = line.split(" = ") 

121 if field and len(field) == 2: 

122 key = field[0].strip().lower() 

123 val = field[1].strip(" ,") 

124 entry[key] = parser.latex_to_text(val) 

125 entries.append(entry) 

126 return entries