Coverage for src/ptf/tex.py: 4%

546 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-11-05 09:56 +0000

1import os 

2import subprocess 

3import tempfile 

4 

5import pypdf 

6from pylatexenc.latexencode import unicode_to_latex 

7 

8from django.conf import settings 

9 

10from ptf import model_helpers 

11from ptf import models 

12from ptf import utils 

13from ptf.cmds.xml import xml_utils 

14from ptf.cmds.xml.jats.jats_parser import get_tex_from_xml 

15from ptf.display import resolver 

16 

17 

18def get_tex_keyword_date_published(colid, article, lang=""): 

19 keyword = "\\dateposted" if colid in ["AHL", "AIF", "OGEO", "JTNB"] else "\\datepublished" 

20 

21 if colid in ["CRMATH", "CRMECA", "CRPHYS", "CRGEOS", "CRCHIM", "CRBIOL"]: 

22 if not article.date_online_first: 

23 keyword = "\\dateposted" 

24 

25 if lang != "": 

26 keyword = "\\CDRsetmeta{traduction_date_posted}" 

27 

28 return keyword 

29 

30 

31def get_tex_keyword_date_online_first(colid, article, lang=""): 

32 return "\\dateposted" 

33 

34 

35def read_tex_file(filename): 

36 """ 

37 read a tex file. Detects if it is in utf-8 or iso-8859-1 

38 returns a string of the body 

39 """ 

40 

41 lines = [] 

42 

43 if os.path.isfile(filename): 

44 try: 

45 with open(filename, encoding="utf-8") as f_: 

46 lines = f_.readlines() 

47 except UnicodeDecodeError: 

48 with open(filename, encoding="iso-8859-1") as f_: 

49 lines = f_.readlines() 

50 

51 return lines 

52 

53 

54def convert_file_to_utf8(article_path, from_name, to_name): 

55 filename = os.path.join(article_path, from_name) 

56 lines = read_tex_file(filename) 

57 

58 user = settings.MERSENNE_TEX_USER 

59 prefix = os.path.join(settings.LOG_DIR, "tmp/") 

60 resolver.create_folder(prefix) 

61 

62 f = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", prefix=prefix, delete=False) 

63 fpath = f.name # ex: /tmp/Rxsft 

64 f.write("".join(lines)) 

65 f.close() 

66 

67 out_filemane = os.path.join(article_path, to_name) 

68 # copy to mersenne-tex 

69 cmd = f"scp {fpath} {user}@mathdoc-tex:{out_filemane}" 

70 utils.execute_cmd(cmd) 

71 

72 

73def write_tex_file(filename, lines, create_temp_file=False): 

74 fpath = filename 

75 if create_temp_file: 

76 prefix = os.path.join(settings.LOG_DIR, "tmp/") 

77 resolver.create_folder(prefix) 

78 

79 f = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", prefix=prefix, delete=False) 

80 

81 fpath = f.name # ex: /tmp/Rxsft 

82 f.write("".join(lines)) 

83 f.close() 

84 else: 

85 with open(filename, "w", encoding="utf-8") as f_: 

86 f_.write("".join(lines)) 

87 return fpath 

88 

89 

90def insert_date_published(new_lines, article, colid, begin_document_pos, lang=""): 

91 if article.date_published is not None: 

92 keyword = get_tex_keyword_date_published(colid, article, lang) 

93 str_ = f'{keyword}{{{article.date_published.strftime("%Y-%m-%d")}}}\n' 

94 new_lines.insert(begin_document_pos, str_) 

95 begin_document_pos += 1 

96 

97 if article.my_container is not None: 

98 is_thematic_issue = len(article.my_container.title_html) > 0 

99 is_issue_finalized = not article.my_container.with_online_first 

100 is_thematic_finalized = is_thematic_issue and is_issue_finalized 

101 

102 if is_thematic_finalized and article.date_online_first is None: 

103 # Finalized thematic issue where the article did not go through online first 

104 # => Add \datepublished so that "Issue date :" appears in the PDF 

105 keyword2 = "\\datepublished" 

106 if keyword2 != keyword: 

107 str_ = f'{keyword2}{{{article.date_published.strftime("%Y-%m-%d")}}}\n' 

108 new_lines.insert(begin_document_pos, str_) 

109 begin_document_pos += 1 

110 

111 return begin_document_pos 

112 

113 

114def insert_date_online_first(new_lines, article, colid, begin_document_pos, lang=""): 

115 if article.date_online_first is not None: 

116 keyword = get_tex_keyword_date_online_first(colid, article, lang) 

117 str_ = f'{keyword}{{{article.date_online_first.strftime("%Y-%m-%d")}}}\n' 

118 new_lines.insert(begin_document_pos, str_) 

119 begin_document_pos += 1 

120 

121 return begin_document_pos 

122 

123 

124def insert_end_page(new_lines, article, colid, begin_document_pos): 

125 if article.lpage: 

126 str_ = "\\makeatletter\\def\\cdr@end@page{" + article.lpage + "}\\makeatother\n" 

127 new_lines.insert(begin_document_pos + 1, str_) 

128 

129 return begin_document_pos 

130 

131 

132def replace_dates_in_tex(lines, article, colid, replace_frontpage_only=False, lang=""): 

133 r""" 

134 add or replace \dateposted and \datepublished in the source Tex 

135 lines is a list of lines of the source Tex 

136 

137 """ 

138 new_lines = [] 

139 bib_name = "" 

140 

141 keyword_date_published = get_tex_keyword_date_published(colid, article, lang) 

142 keyword_date_online_first = get_tex_keyword_date_online_first(colid, article, lang) 

143 found_date_online_first = False 

144 found_date_published = False 

145 begin_document_pos = -1 

146 i = 0 

147 skip_lines = 0 

148 

149 while i < len(lines): 

150 line = lines[i] 

151 len_line = len(line) 

152 j = 0 

153 while j < len_line and line[j] in [" ", "\t"]: 

154 j += 1 

155 

156 if j < len_line and line[j] != "%": # the line is not a comment 

157 if replace_frontpage_only and ( 

158 line.find("\\datepublished{", j) == j 

159 or line.find("\\dateposted{", j) == j 

160 or line.find("\\CDRsetmeta{traduction_date_posted}{", j) == j 

161 ): 

162 skip_lines += 1 

163 elif ( 

164 line.find(f"{keyword_date_published}{{", j) == j 

165 ): # replace existing \datepublished 

166 found_date_published = True 

167 insert_date_published(new_lines, article, colid, len(new_lines), lang=lang) 

168 

169 elif ( 

170 line.find(f"{keyword_date_online_first}{{", j) == j 

171 ): # replace existing \dateposted 

172 found_date_online_first = True 

173 insert_date_online_first(new_lines, article, colid, len(new_lines), lang=lang) 

174 

175 elif ( 

176 line.find("\\begin{document", j) == j 

177 ): # \begin{document} add dates if not present 

178 begin_document_pos = i - skip_lines 

179 new_lines.append(line) 

180 

181 elif line.find("\\documentclass", j) == j or line.find("{\\documentclass", j) == j: 

182 # remove published from \documentclass to allow compilation 

183 line = ( 

184 line.replace(",published,", ",") 

185 .replace(",published", "") 

186 .replace("published", "") 

187 ) 

188 # # remove Unicode temporarily 

189 # line = line.replace(",Unicode,", ",").replace(",Unicode", "").replace("Unicode", "") 

190 new_lines.append(line) 

191 

192 elif line.find("\\makeatletter\\def\\cdr@end@page", j) == j: 

193 # Command to specify the last page (present in the front page) 

194 # Move it after \begin{document} 

195 pass 

196 elif ( 

197 line.find("\\bibliography", j) == j 

198 and line.find("\\bibliographystyle", j) != j 

199 and replace_frontpage_only 

200 ): 

201 end = line.find("}") 

202 if end > 0: 

203 bib_name = line[j + 14 : end] 

204 new_lines.append("\\bibliography{" + bib_name + "_FP}\n") 

205 else: 

206 new_lines.append(line) 

207 else: 

208 new_lines.append(line) 

209 

210 i += 1 

211 

212 if begin_document_pos > 0 and not found_date_online_first: 

213 begin_document_pos = insert_date_online_first( 

214 new_lines, article, colid, begin_document_pos, lang=lang 

215 ) 

216 

217 if begin_document_pos > 0 and not found_date_published: 

218 begin_document_pos = insert_date_published( 

219 new_lines, article, colid, begin_document_pos, lang=lang 

220 ) 

221 

222 if replace_frontpage_only and begin_document_pos > 0: 

223 begin_document_pos = insert_end_page(new_lines, article, colid, begin_document_pos) 

224 

225 # Always add Unicode as the new tex file is in utf-8 

226 # new_lines = protect_tex(new_lines, "Unicode") 

227 

228 return new_lines, bib_name 

229 

230 

231def protect_tex(lines, keyword="published"): 

232 new_lines = [] 

233 

234 i = 0 

235 inside_documentclass = False 

236 

237 while i < len(lines): 

238 line = lines[i] 

239 len_line = len(line) 

240 j = 0 

241 while j < len_line and line[j] in [" ", "\t"]: 

242 j += 1 

243 

244 if j < len_line and line[j] != "%": # the line is not a comment 

245 if line.find("\\documentclass", j) == j or line.find("{\\documentclass", j) == j: 

246 # add published to \documentclass after compilation 

247 j = line.find("]") 

248 if j > 0: 

249 if line.find("{cedram") > 0: # Ignore {article} 

250 line = line[0:j] + "," + keyword + line[j:] 

251 else: 

252 inside_documentclass = True 

253 elif inside_documentclass: 

254 k = line.find("]") 

255 if k == j: 

256 if line.find("{cedram") > 0: # Ignore {article} 

257 new_lines.append(f",{keyword}\n") 

258 inside_documentclass = False 

259 elif k > -1: 

260 if line.find("{cedram") > 0: # Ignore {article} 

261 line = line[0:k] + "," + keyword + line[k:] 

262 inside_documentclass = False 

263 

264 new_lines.append(line) 

265 i += 1 

266 

267 return new_lines 

268 

269 

270def get_tex_corresponding_emails(author_contributions): 

271 emails = [] 

272 

273 for contribution in author_contributions: 

274 if contribution.corresponding and contribution.email: 

275 emails.append(unicode_to_latex(contribution.email).replace(r"\_", r"_")) 

276 

277 return emails 

278 

279 

280def get_tex_authors(author_contributions): 

281 lines = [] 

282 

283 # are_all_equal = models.are_all_equal_contrib(author_contributions) 

284 

285 for contribution in author_contributions: 

286 # \author{\firstname{Antoine} \lastname{Lavoisier}} 

287 # \address{Rue sans aplomb, Paris, France} 

288 # \email[A. Lavoisier]{a-lavois@lead-free-univ.edu} 

289 first_name = unicode_to_latex(contribution.first_name) 

290 last_name = unicode_to_latex(contribution.last_name) 

291 line = f"\\author{{\\firstname{{{first_name}}} \\lastname{{{last_name}}}" 

292 if contribution.orcid: 

293 line += f"\\CDRorcid{{{contribution.orcid}}}" 

294 if contribution.equal_contrib: # and not are_all_equal: 

295 line += "\\IsEqualContrib" 

296 if contribution.deceased_before_publication: # and not are_all_equal: 

297 line += "\\dead" 

298 lines.append(line + "}\n") 

299 

300 for contribaddress in contribution.contribaddress_set.all(): 

301 address = unicode_to_latex(contribaddress.address) 

302 lines.append(f"\\address{{{address}}}\n") 

303 

304 if contribution.corresponding and len(contribution.email) > 0: 

305 email = unicode_to_latex(contribution.email) 

306 lines.append(f"\\email{{{email}}}\n") 

307 

308 lines.append("\n") 

309 

310 return lines 

311 

312 

313def create_tex_for_pcj(article): 

314 pci = article.get_pci_section() 

315 

316 extid = model_helpers.get_extid(article, "rdoi") 

317 rdoi = extid.id_value if extid is not None else "" 

318 

319 lines = [ 

320 "\\documentclass[PCJ,Unicode,screen,Recup]{cedram}\n", 

321 "\\usepackage{pax}\n", 

322 "\\usepackage{mathrsfs}\n" "\n", 

323 "\\issueinfo{" 

324 + article.my_container.volume 

325 + "}{}{}{" 

326 + article.my_container.year 

327 + "}\n", 

328 f"\\renewcommand*{{\\thearticle}}{{{article.article_number}}}\n", 

329 f"\\DOI{{{article.doi}}}\n", 

330 f"\\RDOI{{{rdoi}}}\n", 

331 f"\\setPCI{{{pci}}}\n", 

332 f"\\CDRsetmeta{{articletype}}{{{article.atype}}}", 

333 ] 

334 

335 conf = article.get_conference() 

336 if len(conf) > 0: 

337 lines.append(f"\\setPCIconf{{{conf}}}\n") 

338 

339 author_contributions = article.get_author_contributions() 

340 

341 corresponding_emails = get_tex_corresponding_emails(author_contributions) 

342 for email in corresponding_emails: 

343 lines.append(f"\\PCIcorresp{{{email}}}\n") 

344 

345 lines.append("\n") 

346 

347 # \title[Sample for the template]{Sample for the template, with quite a very long title} 

348 title = article.title_tex.replace("<i>", "|||i|||").replace("</i>", "|||/i|||") 

349 title = title.replace("<sup>", "|||sup|||").replace("</sup>", "|||/sup|||") 

350 title = title.replace("<sub>", "|||sub|||").replace("</sub>", "|||/sub|||") 

351 title = unicode_to_latex(title) 

352 title = title.replace("|||i|||", "\\protect\\emph{").replace("|||/i|||", "}") 

353 title = title.replace("|||sup|||", "\\protect\\textsuperscript{").replace("|||/sup|||", "}") 

354 title = title.replace("|||sub|||", "\\protect\\textsubscript{").replace("|||/sub|||", "}") 

355 lines.append(f"\\title{{{title}}}\n") 

356 lines.append("\n") 

357 lines.extend(get_tex_authors(author_contributions)) 

358 

359 # No keywords for PCJ 

360 # # \keywords{Example, Keyword} 

361 # kwd_gps = article.get_non_msc_kwds() 

362 # if len(kwd_gps) > 0: 

363 # kwd_gp = kwd_gps.first() 

364 # keywords = ", ".join([kwd.value for kwd in kwd_gp.kwd_set.all()]) 

365 # lines.append(f"\\keywords{{{unicode_to_latex(keywords)}}}\n") 

366 # lines.append("\n") 

367 

368 abstracts = article.get_abstracts() 

369 if len(abstracts) > 0: 

370 abstract = abstracts.first() 

371 value = get_tex_from_xml(abstract.value_xml, "abstract", for_tex_file=True) 

372 

373 # .replace('<span class="mathjax-formula">$', '$').replace('$</span>', '$') \ 

374 # .replace('<span class="italique">', '|||i|||').replace('</span>', '|||/i|||') \ 

375 

376 # value = abstract.value_tex \ 

377 # .replace('<i>', '|||i|||').replace('</i>', '|||/i|||') \ 

378 # .replace('<strong>', '|||strong|||').replace('</strong>', '|||/strong|||') \ 

379 # .replace('<sub>', '|||sub|||').replace('</sub>', '|||/sub|||') \ 

380 # .replace('<sup>', '|||sup|||').replace('</sup>', '|||/sup|||') \ 

381 # .replace('<p>', '').replace('</p>', '') \ 

382 # .replace('<ul>', '|||ul|||').replace('</ul>', '|||/ul|||') \ 

383 # .replace('<ol type="1">', '|||ol|||').replace('</ol>', '|||/ol|||') \ 

384 # .replace('<li>', '|||li|||').replace('</li>', '|||/li|||') \ 

385 # .replace('<br/>', '|||newline|||') \ 

386 # .replace('&amp;', '\\&') \ 

387 # .replace('&lt;', '<') \ 

388 # .replace('&gt;', '>') 

389 # 

390 # links = [] 

391 # pos = value.find("<a href=") 

392 # while pos != -1: 

393 # last_href = value.find('"', pos + 9) 

394 # href = value[pos + 9:last_href] 

395 # first_text = value.find('>', last_href) + 1 

396 # last_text = value.find('</a>', first_text) 

397 # text = value[first_text:last_text] 

398 # links.append((href, text)) 

399 # value = value[0:pos] + '|||a|||' + value[last_text + 4:] 

400 # pos = value.find("<a href=") 

401 # 

402 # value = unicode_to_latex(value) 

403 # value = value.replace('|||i|||', '{\\it ').replace('|||/i|||', '}') 

404 # value = value.replace('|||strong|||', '{\\bf ').replace('|||/strong|||', '}') 

405 # value = value.replace('|||sub|||', '\\textsubscript{').replace('|||/sub|||', '}') 

406 # value = value.replace('|||sup|||', '\\textsuperscript{').replace('|||/sup|||', '}') 

407 # value = value.replace('|||ul|||', '\n\\begin{itemize}\n').replace('|||/ul|||', '\\end{itemize}\n') 

408 # value = value.replace('|||ol|||', '\n\\begin{enumerate}\n').replace('|||/ol|||', '\\end{enumerate}\n') 

409 # value = value.replace('|||li|||', '\\item ').replace('|||/li|||', '\n') 

410 # value = value.replace('|||newline|||', '\\newline\n') 

411 # for link in links: 

412 # text = f'\\href{{{link[0]}}}{{{link[1]}}}' 

413 # value = value.replace('|||a|||', text, 1) 

414 

415 lines.append("\\begin{abstract}\n") 

416 lines.append(value + "\n") 

417 lines.append("\\end{abstract}\n") 

418 

419 date_ = article.date_published.strftime("%Y-%m-%d") if article.date_published else "AAAA-MM-DD" 

420 keyword = get_tex_keyword_date_published("PCJ", article) 

421 lines.append(f"{keyword}{{{date_}}}\n") 

422 

423 lines.append("\\begin{document}\n") 

424 lines.append("\\maketitle\n") 

425 article_pdf = f"article_{article.pid}.pdf" 

426 lines.append(f"\\PCIincludepdf{{{article_pdf}}}\n") 

427 

428 lines.append("\\end{document}\n") 

429 

430 return lines 

431 

432 

433def compile_tex(lines, article, update=False): 

434 """ 

435 1) Create a tex file from the list of lines 

436 2) Upload the file to mathdoc-tex (+ the pdf for PCJ) 

437 3) Compile the file 

438 4) Replace the pdf in /mersenne_test_data 

439 5) linearize the pdf 

440 TODO: merge ptf_tools/views create_frontpage (not done while PCJ is unstable to avoid compilation bugs in prod) 

441 """ 

442 

443 # Only allowed on ptf-tools 

444 if settings.SITE_NAME != "ptf_tools": 

445 return 

446 

447 user = settings.MERSENNE_TEX_USER 

448 issue = article.my_container 

449 colid = issue.my_collection.pid 

450 issue_path = resolver.get_cedram_issue_tex_folder(colid, issue.pid) 

451 article_pdf = "" 

452 

453 if colid != "PCJ": 

454 article_tex_name = article.get_ojs_id() 

455 if not article_tex_name: 

456 raise Exception(f"Article {article.pid} has no ojs-id -> cedram tex path") 

457 article_path = os.path.join(issue_path, article_tex_name) 

458 else: 

459 article_tex_name = article.pid 

460 article_path = os.path.join(issue_path, article_tex_name) 

461 article_pdf = f"article_{article.pid}.pdf" 

462 

463 if not update: 

464 # Create the article folder 

465 cmd = f"ssh {user}@mathdoc-tex mkdir -p {article_path}" 

466 utils.execute_cmd(cmd) 

467 

468 # copy the pdf to mersenne-tex 

469 relative_folder = resolver.get_relative_folder(colid, issue.pid, article.pid) 

470 folder = os.path.join(settings.RESOURCES_ROOT, relative_folder) 

471 pdf_file_name = os.path.join(folder, article.pid + ".pdf") 

472 

473 cmd = f"scp {pdf_file_name} {user}@mathdoc-tex:{article_path}/{article_pdf}" 

474 utils.execute_cmd(cmd) 

475 

476 article_tex_file_name = os.path.join(article_path, article_tex_name + ".tex") 

477 fpath = write_tex_file("", lines, create_temp_file=True) 

478 

479 # copy to mersenne-tex 

480 cmd = f"scp {fpath} {user}@mathdoc-tex:{article_tex_file_name}" 

481 utils.execute_cmd(cmd) 

482 # os.unlink(f.name) 

483 

484 # recompile article 

485 ptf_tools_bin = os.path.join(settings.BASE_DIR, "bin") 

486 # execute script to compile 

487 cmd = f"ssh {user}@mathdoc-tex 'bash -s' -- < {ptf_tools_bin}/create_frontpage.sh {article_path} {article_tex_name} {colid} {article_pdf}" 

488 utils.execute_cmd(cmd) 

489 

490 # replace pdf 

491 cedram_pdf_location = os.path.join(article_path, article_tex_name + ".pdf") 

492 relative_folder = resolver.get_relative_folder(colid, issue.pid, article.pid) 

493 to_path = os.path.join( 

494 settings.MERSENNE_TEST_DATA_FOLDER, relative_folder, article.pid + ".pdf" 

495 ) 

496 if settings.MERSENNE_CREATE_FRONTPAGE: 

497 utils.linearize_pdf(cedram_pdf_location, to_path) 

498 

499 return to_path 

500 

501 

502def add_outline(reader, writer, outlines, parent=None): 

503 child_parent = parent 

504 for item in outlines: 

505 if type(item) == list: 

506 add_outline(reader, writer, item, child_parent) 

507 else: 

508 title = item["/Title"] 

509 page_num = reader.get_destination_page_number(item) 

510 

511 if item["/Type"] == "/XYZ": 

512 child_parent = writer.add_outline_item( 

513 title, 

514 page_num, 

515 parent, 

516 None, 

517 False, 

518 False, 

519 pypdf.generic.Fit("/XYZ", (item["/Left"], item["/Top"], 1)), 

520 ) 

521 else: 

522 child_parent = writer.add_outline_item(title, page_num, parent, None, False, False) 

523 

524 

525def test(): 

526 local_fp_pdf = "/home/touvierj/Bureau/test_FP.pdf" 

527 local_content_pdf = "/home/touvierj/Bureau/test_content.pdf" 

528 merged_pdf = "/home/touvierj/Bureau/test_merged.pdf" 

529 

530 pdf_reader_fp = pypdf.PdfReader(local_fp_pdf, strict=False) 

531 pdf_reader_content = pypdf.PdfReader(local_content_pdf, strict=False) 

532 pdf_writer = pypdf.PdfWriter() 

533 

534 for page in range(len(pdf_reader_fp.pages)): 

535 current_page = pdf_reader_fp.pages[page] 

536 if page == 0: 

537 pdf_writer.add_page(current_page) 

538 

539 for page in range(len(pdf_reader_content.pages)): 

540 current_page = pdf_reader_content.pages[page] 

541 if page > 0: 

542 pdf_writer.add_page(current_page) 

543 

544 # Add the Table of Contents (sidebar in a PDF reader) 

545 add_outline(pdf_reader_content, pdf_writer, pdf_reader_content.outline) 

546 

547 # Add the anchors 

548 for dest in pdf_reader_content.named_destinations.values(): 

549 pdf_writer.add_named_destination_object(dest) 

550 

551 with open(merged_pdf, "wb") as f_: 

552 pdf_writer.write(f_) 

553 

554 # Add metadata to the PDF, including EXIF data 

555 add_metadata(models.Article.objects.first(), local_content_pdf, merged_pdf) 

556 

557 exit() 

558 

559 fpage = "i" 

560 merged_pdf = "/home/touvierj/Bureau/good2.pdf" 

561 local_pdf = "/home/touvierj/Bureau/new2.pdf" 

562 

563 is_roman = False 

564 try: 

565 first_page = int(fpage) 

566 except ValueError: 

567 first_page = xml_utils.roman_to_int(fpage) 

568 is_roman = True 

569 

570 reader = pypdf.PdfReader(merged_pdf) 

571 writer = pypdf.PdfWriter() 

572 for page in reader.pages: 

573 writer.add_page(page) 

574 

575 if is_roman: 

576 writer.set_page_label(page_index_from=0, page_index_to=first_page - 1, style="/r") 

577 else: 

578 writer.set_page_label(page_index_from=0, page_index_to=first_page - 1, style="/D") 

579 writer.write(local_pdf) 

580 writer.close() 

581 

582 

583def add_metadata(article, in_pdf, out_pdf): 

584 reader = pypdf.PdfReader(in_pdf, strict=False) 

585 

586 metadata = reader.metadata 

587 cmd = f"exiftool -tagsFromFile {in_pdf}" 

588 

589 if in_pdf == out_pdf: 

590 cmd += " -overwrite_original_in_place" 

591 

592 container = article.my_container 

593 collection = article.get_collection() 

594 

595 msc_kwds, kwds, trans_kwds = article.get_kwds_by_type() 

596 keywords = ", ".join([str(x.value) for x in kwds]) 

597 

598 lang = "" 

599 if article.lang == "fr": 

600 lang = "fr-FR" 

601 elif article.lang == "en": 

602 lang = "en-GB" 

603 

604 if "/Title" in metadata: 

605 title = metadata["/Title"] 

606 if "'" in title and '"' not in title: 

607 cmd += f' -Title="{title}"' 

608 elif "'" not in title: 

609 cmd += f" -Title='{title}'" 

610 

611 if "/Author" in metadata: 

612 author = metadata["/Author"] 

613 if "'" in author and '"' not in author: 

614 cmd += f' -Author="{author}"' 

615 elif "'" not in author: 

616 cmd += f" -Author='{author}'" 

617 

618 cmd += " -Creator='Centre Mersenne'" 

619 cmd += " -Subject=''" 

620 if lang: 

621 cmd += f" -xmp-dc-Language='{lang}'" 

622 cmd += f" -xmp-dc:publisher='{container.my_publisher.pub_name}'" 

623 cmd += f" -xmp-prism:DOI='{article.doi}'" 

624 cmd += f" -Keywords='{keywords}'" 

625 cmd += f" -xmp-xmp:Keywords='{keywords}'" 

626 cmd += f" -xmp-pdf:Keywords='{keywords}'" 

627 cmd += " -xmp-pdf:Copyright='© The author(s)'" 

628 

629 if container.volume: 

630 cmd += f" -xmp-prism:Volume='{container.volume}'" 

631 if container.number: 

632 cmd += f" -xmp-prism:Number='{container.number}'" 

633 if collection.issn: 

634 cmd += f" -xmp-prism:ISSN='{collection.issn}'" 

635 if collection.e_issn: 

636 cmd += f" -xmp-prism:EISSN='{collection.e_issn}'" 

637 if container.title_tex: 

638 cmd += f" -xmp-prism:IssueName='{container.title_tex}'" 

639 cmd += " " + out_pdf 

640 

641 output = subprocess.check_output(cmd, shell=True) 

642 return output 

643 

644 

645def replace_front_page( 

646 article, article_tex_name, fp_pdf_file_name, content_pdf_file_name, final_pdf_file_name 

647): 

648 # At the point the PDF has been recompiled, possibly with a new template 

649 # Use the 1st page of the new PDF with the other pages of the .pdf_SAV 

650 

651 user = settings.MERSENNE_TEX_USER 

652 

653 # Copy the PDF files locally (pypdf is installed in ptf-tools) 

654 local_fp_pdf = os.path.join(settings.LOG_DIR, "tmp/", article_tex_name + ".pdf_FP") 

655 cmd = f"scp {user}@mathdoc-tex:{fp_pdf_file_name} {local_fp_pdf}" 

656 utils.execute_cmd(cmd) 

657 

658 local_content_pdf = os.path.join(settings.LOG_DIR, "tmp/", article_tex_name + ".pdf_content") 

659 cmd = f"cp {content_pdf_file_name} {local_content_pdf}" 

660 utils.execute_cmd(cmd) 

661 

662 pdf_reader_fp = pypdf.PdfReader(local_fp_pdf, strict=False) 

663 pdf_reader_content = pypdf.PdfReader(local_content_pdf, strict=False) 

664 pdf_writer = pypdf.PdfWriter() 

665 

666 for page in range(len(pdf_reader_fp.pages)): 

667 current_page = pdf_reader_fp.pages[page] 

668 if page == 0: 

669 pdf_writer.add_page(current_page) 

670 

671 for page in range(len(pdf_reader_content.pages)): 

672 current_page = pdf_reader_content.pages[page] 

673 if page > 0: 

674 pdf_writer.add_page(current_page) 

675 

676 # Add the Table of Contents (sidebar in a PDF reader) 

677 add_outline(pdf_reader_content, pdf_writer, pdf_reader_content.outline) 

678 

679 # Add the anchors 

680 for dest in pdf_reader_content.named_destinations.values(): 

681 pdf_writer.add_named_destination_object(dest) 

682 

683 merged_pdf = os.path.join(settings.LOG_DIR, "tmp/", article_tex_name + ".pdf_merged") 

684 with open(merged_pdf, "wb") as f_: 

685 pdf_writer.write(f_) 

686 

687 # Compiled PDF are sometimes buggy (wrong xref table). Use pdftk to fix the file. 

688 ptf_tools_bin = os.path.join(settings.BASE_DIR, "bin") 

689 cmd = f"{ptf_tools_bin}/update_pdf.sh {local_content_pdf} {merged_pdf}" 

690 utils.execute_cmd(cmd) 

691 

692 # Add metadata to the PDF, including EXIF data 

693 add_metadata(article, local_content_pdf, merged_pdf) 

694 

695 local_pdf = os.path.join(settings.LOG_DIR, "tmp/", article_tex_name + ".pdf") 

696 

697 # pypdf creates a PDF that starts on page 1, fix it 

698 if article.fpage: 

699 is_roman = False 

700 try: 

701 first_page = int(article.fpage) 

702 except ValueError: 

703 first_page = xml_utils.roman_to_int(article.fpage) 

704 is_roman = True 

705 

706 reader = pypdf.PdfReader(merged_pdf) 

707 writer = pypdf.PdfWriter() 

708 page_count = 0 

709 for page in reader.pages: 

710 page_count += 1 

711 writer.add_page(page) 

712 

713 if is_roman: 

714 writer.set_page_label( 

715 page_index_from=0, page_index_to=page_count - 1, start=first_page, style="/r" 

716 ) 

717 else: 

718 writer.set_page_label( 

719 page_index_from=0, page_index_to=page_count - 1, start=first_page, style="/D" 

720 ) 

721 writer.write(local_pdf) 

722 writer.close() 

723 

724 # copy to mersenne-tex 

725 cmd = f"scp {local_pdf} {user}@mathdoc-tex:{final_pdf_file_name}" 

726 utils.execute_cmd(cmd) 

727 

728 

729def compile_article( 

730 article, 

731 colid, 

732 issue_id, 

733 article_path, 

734 article_tex_name, 

735 replace_frontpage_only=False, 

736 skip_compilation=False, 

737 lang="", 

738): 

739 user = settings.MERSENNE_TEX_USER 

740 

741 if lang != "": 

742 article_tex_name += "-" + lang 

743 

744 article_cfg_file_name = os.path.join(article_path, article_tex_name + ".cfg") 

745 # Regular compilation: compiled_pdf and final_pdf are the same 

746 # recompilation of the front page: compiled_pdf is the entire pdf with the new front page 

747 # final_pdf is the pdf after the merge (new front page; old content) 

748 compiled_pdf_file_name = final_pdf_file_name = os.path.join( 

749 article_path, article_tex_name + ".pdf" 

750 ) 

751 sav_pdf_file_name = compiled_pdf_file_name + "_SAV" 

752 

753 # Save the pdf file 

754 cmd = f"ssh {user}@mathdoc-tex cp {compiled_pdf_file_name} {sav_pdf_file_name}" 

755 utils.execute_cmd(cmd) 

756 

757 # Save the cfg file (no cfg for translations) 

758 if lang == "": 

759 cmd = f"ssh {user}@mathdoc-tex cp {article_cfg_file_name} {article_cfg_file_name}_SAV" 

760 utils.execute_cmd(cmd) 

761 

762 # create temporarly file ! attention sur ptf-tools apache n'a pas le droit d'écrire ds /tmp ?!. 

763 prefix = os.path.join(settings.LOG_DIR, "tmp/") 

764 resolver.create_folder(prefix) 

765 

766 if replace_frontpage_only and skip_compilation: 

767 # We want to update the front page without compiling the tex: 

768 # We copy the original <article_tex_name>.PDF to <article_tex_name>_FP.PDF 

769 

770 article_tex_name2 = article_tex_name + "_FP" 

771 new_compiled_pdf_file_name = os.path.join(article_path, article_tex_name2 + ".pdf") 

772 cmd = f"ssh {user}@mathdoc-tex cp {compiled_pdf_file_name} {new_compiled_pdf_file_name}" 

773 utils.execute_cmd(cmd) 

774 compiled_pdf_file_name = new_compiled_pdf_file_name 

775 

776 elif replace_frontpage_only: 

777 # Copy CFG/TEX/PDF to a new name. pdflatex will generate new files, thus preserving existing files 

778 article_tex_name2 = article_tex_name + "_FP" 

779 

780 cmd = f"ssh {user}@mathdoc-tex rm -f {os.path.join(article_path, article_tex_name2)}.*" 

781 utils.execute_cmd(cmd) 

782 

783 article_tex_file_name = os.path.join(article_path, article_tex_name + ".tex") 

784 article_tex_file_name2 = os.path.join(article_path, article_tex_name2 + ".tex") 

785 cmd = f"ssh {user}@mathdoc-tex cp {article_tex_file_name} {article_tex_file_name2}" 

786 utils.execute_cmd(cmd) 

787 

788 article_cfg_file_name2 = os.path.join(article_path, article_tex_name2 + ".cfg") 

789 cmd = f"ssh {user}@mathdoc-tex cp {article_cfg_file_name} {article_cfg_file_name2}_SAV" 

790 utils.execute_cmd(cmd) 

791 

792 article_cdrdoidates_file_name = os.path.join( 

793 article_path, article_tex_name + ".cdrdoidates" 

794 ) 

795 if os.path.isfile(article_cdrdoidates_file_name): 

796 article_cdrdoidates_file_name2 = os.path.join( 

797 article_path, article_tex_name2 + ".cdrdoidates" 

798 ) 

799 cmd = f"ssh {user}@mathdoc-tex cp {article_cdrdoidates_file_name} {article_cdrdoidates_file_name2}" 

800 utils.execute_cmd(cmd) 

801 

802 article_tex_name = article_tex_name2 

803 article_cfg_file_name = os.path.join(article_path, article_tex_name + ".cfg") 

804 compiled_pdf_file_name = os.path.join(article_path, article_tex_name + ".pdf") 

805 final_pdf_file_name = compiled_pdf_file_name + ".new" 

806 

807 if not skip_compilation: 

808 # Remove \ItIsPublished from the cfg file 

809 if lang == "": 

810 cmd = f'''ssh {user}@mathdoc-tex "sed 's/\\\\\\\\ItIsPublished//' {article_cfg_file_name}_SAV > {article_cfg_file_name}.1"''' 

811 utils.execute_cmd(cmd) 

812 cmd = f'''ssh {user}@mathdoc-tex "sed 's/\\\\\\\\gdef \\\\\\\\CDRpublished {{true}}//' {article_cfg_file_name}.1 > {article_cfg_file_name}"''' 

813 utils.execute_cmd(cmd) 

814 

815 article_tex_file_name = os.path.join(article_path, article_tex_name + ".tex") 

816 

817 # Save the tex file 

818 cmd = f"ssh {user}@mathdoc-tex cp {article_tex_file_name} {article_tex_file_name}_SAV" 

819 utils.execute_cmd(cmd) 

820 

821 lines = read_tex_file(article_tex_file_name) 

822 new_lines, bib_name = replace_dates_in_tex( 

823 lines, article, colid, replace_frontpage_only, lang=lang 

824 ) 

825 

826 if bib_name and replace_frontpage_only: 

827 convert_file_to_utf8(article_path, bib_name + ".bib", bib_name + "_FP.bib") 

828 

829 f = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", prefix=prefix, delete=False) 

830 fpath = f.name # ex: /tmp/Rxsft 

831 f.write("".join(new_lines)) 

832 f.close() 

833 

834 # copy to mersenne-tex 

835 cmd = f"scp {fpath} {user}@mathdoc-tex:{article_tex_file_name}" 

836 utils.execute_cmd(cmd) 

837 # os.unlink(f.name) 

838 

839 # recompile article 

840 ptf_tools_bin = os.path.join(settings.BASE_DIR, "bin") 

841 # execute script to compile 

842 cmd = f"ssh {user}@mathdoc-tex 'bash -s' -- < {ptf_tools_bin}/create_frontpage.sh {article_path} {article_tex_name}" 

843 utils.execute_cmd(cmd) 

844 

845 # Protect the tex file with the 'published' option 

846 new_lines = protect_tex(new_lines) 

847 

848 # create temporarly file ! attention sur ptf-tools apache n'a pas le droit d'écrire ds /tmp ?!. 

849 prefix = os.path.join(settings.LOG_DIR, "tmp/") 

850 f = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", prefix=prefix, delete=False) 

851 fpath = f.name # ex: /tmp/Rxsft 

852 f.write("".join(new_lines)) 

853 f.close() 

854 

855 # copy to mersenne-tex 

856 cmd = f"scp {fpath} {user}@mathdoc-tex:{article_tex_file_name}" 

857 utils.execute_cmd(cmd) 

858 

859 if replace_frontpage_only: 

860 # At the point the PDF has been recompiled, possibly with a new template 

861 # Use the 1st page of the new PDF with the other pages of the production PDF 

862 datastream = article.datastream_set.filter(mimetype="application/pdf").get() 

863 content_pdf_file_name = os.path.join( 

864 settings.MERSENNE_PROD_DATA_FOLDER, datastream.location 

865 ) 

866 

867 replace_front_page( 

868 article, 

869 article_tex_name, 

870 compiled_pdf_file_name, 

871 content_pdf_file_name, 

872 final_pdf_file_name, 

873 ) 

874 

875 # Copy PDF to MERSENNE_TEST_DATA_FOLDER 

876 datastream = article.datastream_set.filter(mimetype="application/pdf").get() 

877 to_path = os.path.join(settings.MERSENNE_TEST_DATA_FOLDER, datastream.location) 

878 # remove destination if exists to test if final pdf is really created 

879 if os.path.exists(to_path): 

880 os.remove(to_path) 

881 utils.linearize_pdf(final_pdf_file_name, to_path) 

882 

883 # if not replace_frontpage_only: 

884 # # Add EXIF metadata in the final PDF (replace_front_page already does it) 

885 # add_metadata(article, to_path, to_path) 

886 

887 

888def create_frontpage( 

889 colid, 

890 container, 

891 updated_articles, 

892 test=True, 

893 replace_frontpage_only=False, 

894 skip_compilation=False, 

895 lang="", 

896): 

897 # create frontpage by recompiling articles on mersenne-tex with date XXXX-XX-XX 

898 # flow : 

899 # get directory of article sources : cedram_dev/production/ .. 

900 # Add publication date in the source TeX 

901 # remote execute latexmk -pdf article.pdf 

902 # replace pdf of the article on mersenne_test_data 

903 

904 # TODO refactor the code and only use compile_tex for all collections 

905 

906 if colid == "PCJ": 

907 for article in updated_articles: 

908 lines = create_tex_for_pcj(article) 

909 compile_tex(lines, article, update=True) 

910 return 

911 

912 try: 

913 year = int(container.year) 

914 except ValueError: 

915 year = 0 

916 

917 if ( 

918 colid in ["CRMATH", "CRMECA", "CRPHYS", "CRGEOS", "CRCHIM", "CRBIOL"] 

919 and year < 2020 

920 and lang == "" 

921 ): 

922 # No front page for Elsevier CRAS 

923 return 

924 

925 issue_id = container.pid 

926 

927 issue_path = resolver.get_cedram_issue_tex_folder(colid, issue_id) 

928 # non utilisé ?now = datetime.now().astimezone() 

929 # non utilisé ? timestamp = now.strftime("%Y-%m-%d %H:%M:%S %Z") 

930 

931 try: 

932 for article in updated_articles: 

933 # article path 

934 article_tex_name = article.get_ojs_id() 

935 if not article_tex_name: 

936 raise Exception(f"Article {article.pid} has no ojs-id -> cedram tex path") 

937 article_path = os.path.join(issue_path, article_tex_name) 

938 # non utilisé ? file_date = os.path.join(article_path, article_tex_name + '.ptf') 

939 

940 # publish_timestamp_file = os.path.join(article_path, article_tex_name + "-pdftimestamp.txt") 

941 # onlinefirst_timestamp_file = os.path.join(article_path, article_tex_name + "-dateposted.txt") 

942 

943 # flow : 

944 # - si on est en test ; 

945 # date_pre_publish a été mis à jour mais pas les autres 

946 # on ne crée une date temporaire type XXXX-XX-XX que pour online_first 

947 # (car sinon lors de la mise en prod du online_first, la présence du fichier pdftimestamp 

948 # avec XXXX-XX-XX ferait apparaitre cette date) 

949 # - si article.my_container.with_online_first && et pas de article.date_online_first existe: 

950 # : on met XXXX-xx-xx pour online first 

951 # (si l'article a déjà une date online-first, il a à priori déjà était recompilé) 

952 # - si on passe en prod, on prend les dates de l'article 

953 # si container.with_online_first: 

954 # article.date_online_first ds le bon fichier 

955 # si article.date_published : on met à jour le fichier qui va bien 

956 

957 if not test and (article.date_online_first or article.date_published): 

958 compile_article( 

959 article, 

960 colid, 

961 issue_id, 

962 article_path, 

963 article_tex_name, 

964 replace_frontpage_only, 

965 skip_compilation, 

966 lang, 

967 ) 

968 

969 except Exception as e: 

970 # pas de rollback car on ne modifie rien en BDD / éventuellement remettre un pdf.SAV en place 

971 raise e 

972 

973 

974def create_translated_pdf( 

975 article, xml_content, lang, pdf_file_name, html_file_name, skip_compilation=False 

976): 

977 user = settings.MERSENNE_TEX_USER 

978 

979 issue_path = resolver.get_cedram_issue_tex_folder( 

980 article.get_top_collection().pid, article.my_container.pid 

981 ) 

982 article_tex_name = article.get_ojs_id() 

983 if not article_tex_name: 

984 raise Exception(f"Article {article.pid} has no ojs-id -> cedram tex path") 

985 article_path = os.path.join(issue_path, article_tex_name) 

986 

987 xml_base_name = article_tex_name + ".xml" 

988 local_xml = os.path.join(settings.LOG_DIR, "tmp", xml_base_name) 

989 remote_xml = os.path.join(article_path, xml_base_name) 

990 

991 if not skip_compilation: 

992 # Create the XML file locally 

993 with open(local_xml, "w", encoding="utf-8") as file_: 

994 file_.write(xml_content) 

995 

996 # Copy XML file to mersenne-tex 

997 cmd = f"scp {local_xml} {user}@mathdoc-tex:{remote_xml}" 

998 utils.execute_cmd(cmd) 

999 

1000 remote_html_base_name = f"trad-{lang}.html" 

1001 remote_html = os.path.join(article_path, remote_html_base_name) 

1002 # Copy HTML file to mersenne-tex 

1003 cmd = f"scp {html_file_name} {user}@mathdoc-tex:{remote_html}" 

1004 utils.execute_cmd(cmd) 

1005 

1006 # Create the PDF 

1007 ptf_tools_bin = os.path.join(settings.BASE_DIR, "bin") 

1008 # execute script to compile 

1009 cmd = f"ssh {user}@mathdoc-tex 'bash -s' -- < {ptf_tools_bin}/translate_article.sh {article_path} {xml_base_name} {remote_html_base_name} {lang}" 

1010 utils.execute_cmd(cmd) 

1011 

1012 remote_pdf_base_name = f"{article_tex_name}-{lang}.pdf" 

1013 remote_pdf = os.path.join(article_path, remote_pdf_base_name) 

1014 # pdf-traduction should have created remote.pdf 

1015 # Copy the PDF file 

1016 cmd = f"scp {user}@mathdoc-tex:{remote_pdf} {pdf_file_name}" 

1017 utils.execute_cmd(cmd)