Coverage for src/ptf/tex.py: 4%

1import os

2import subprocess

3import tempfile

5import pypdf

6from pylatexenc.latexencode import unicode_to_latex

8from django.conf import settings

10from ptf import model_helpers

11from ptf import models

12from ptf import utils

13from ptf.cmds.xml import xml_utils

14from ptf.cmds.xml.jats.jats_parser import get_tex_from_xml

15from ptf.display import resolver

18def get_tex_keyword_date_published(colid, article, lang=""):

19 keyword = "\\dateposted" if colid in ["AHL", "AIF", "OGEO", "JTNB"] else "\\datepublished"

21 if colid in ["CRMATH", "CRMECA", "CRPHYS", "CRGEOS", "CRCHIM", "CRBIOL"]:

22 if not article.date_online_first:

23 keyword = "\\dateposted"

25 if lang != "":

26 keyword = "\\CDRsetmeta{traduction_date_posted}"

28 return keyword

31def get_tex_keyword_date_online_first(colid, article, lang=""):

32 return "\\dateposted"

35def read_tex_file(filename):

36 """

37 read a tex file. Detects if it is in utf-8 or iso-8859-1

38 returns a string of the body

39 """

41 lines = []

43 if os.path.isfile(filename):

44 try:

45 with open(filename, encoding="utf-8") as f_:

46 lines = f_.readlines()

47 except UnicodeDecodeError:

48 with open(filename, encoding="iso-8859-1") as f_:

49 lines = f_.readlines()

51 return lines

54def convert_file_to_utf8(article_path, from_name, to_name):

55 filename = os.path.join(article_path, from_name)

56 lines = read_tex_file(filename)

58 user = settings.MERSENNE_TEX_USER

59 prefix = os.path.join(settings.LOG_DIR, "tmp/")

60 resolver.create_folder(prefix)

62 f = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", prefix=prefix, delete=False)

63 fpath = f.name # ex: /tmp/Rxsft

64 f.write("".join(lines))

65 f.close()

67 out_filemane = os.path.join(article_path, to_name)

68 # copy to mersenne-tex

69 cmd = f"scp {fpath} {user}@mathdoc-tex:{out_filemane}"

70 utils.execute_cmd(cmd)

73def write_tex_file(filename, lines, create_temp_file=False):

74 fpath = filename

75 if create_temp_file:

76 prefix = os.path.join(settings.LOG_DIR, "tmp/")

77 resolver.create_folder(prefix)

79 f = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", prefix=prefix, delete=False)

81 fpath = f.name # ex: /tmp/Rxsft

82 f.write("".join(lines))

83 f.close()

84 else:

85 with open(filename, "w", encoding="utf-8") as f_:

86 f_.write("".join(lines))

87 return fpath

90def insert_date_published(new_lines, article, colid, begin_document_pos, lang=""):

91 if article.date_published is not None:

92 keyword = get_tex_keyword_date_published(colid, article, lang)

93 str_ = f'{keyword}{{{article.date_published.strftime("%Y-%m-%d")}}}\n'

94 new_lines.insert(begin_document_pos, str_)

95 begin_document_pos += 1

97 if article.my_container is not None:

98 is_thematic_issue = len(article.my_container.title_html) > 0

99 is_issue_finalized = not article.my_container.with_online_first

100 is_thematic_finalized = is_thematic_issue and is_issue_finalized

101

102 if is_thematic_finalized and article.date_online_first is None:

103 # Finalized thematic issue where the article did not go through online first

104 # => Add \datepublished so that "Issue date :" appears in the PDF

105 keyword2 = "\\datepublished"

106 if keyword2 != keyword:

107 str_ = f'{keyword2}{{{article.date_published.strftime("%Y-%m-%d")}}}\n'

108 new_lines.insert(begin_document_pos, str_)

109 begin_document_pos += 1

110

111 return begin_document_pos

112

113

114def insert_date_online_first(new_lines, article, colid, begin_document_pos, lang=""):

115 if article.date_online_first is not None:

116 keyword = get_tex_keyword_date_online_first(colid, article, lang)

117 str_ = f'{keyword}{{{article.date_online_first.strftime("%Y-%m-%d")}}}\n'

118 new_lines.insert(begin_document_pos, str_)

119 begin_document_pos += 1

120

121 return begin_document_pos

122

123

124def insert_end_page(new_lines, article, colid, begin_document_pos):

125 if article.lpage:

126 str_ = "\\makeatletter\\def\\cdr@end@page{" + article.lpage + "}\\makeatother\n"

127 new_lines.insert(begin_document_pos + 1, str_)

128

129 return begin_document_pos

130

131

132def replace_dates_in_tex(lines, article, colid, replace_frontpage_only=False, lang=""):

133 r"""

134 add or replace \dateposted and \datepublished in the source Tex

135 lines is a list of lines of the source Tex

136

137 """

138 new_lines = []

139 bib_name = ""

140

141 keyword_date_published = get_tex_keyword_date_published(colid, article, lang)

142 keyword_date_online_first = get_tex_keyword_date_online_first(colid, article, lang)

143 found_date_online_first = False

144 found_date_published = False

145 begin_document_pos = -1

146 i = 0

147 skip_lines = 0

148

149 while i < len(lines):

150 line = lines[i]

151 len_line = len(line)

152 j = 0

153 while j < len_line and line[j] in [" ", "\t"]:

154 j += 1

155

156 if j < len_line and line[j] != "%": # the line is not a comment

157 if replace_frontpage_only and (

158 line.find("\\datepublished{", j) == j

159 or line.find("\\dateposted{", j) == j

160 or line.find("\\CDRsetmeta{traduction_date_posted}{", j) == j

161 ):

162 skip_lines += 1

163 elif (

164 line.find(f"{keyword_date_published}{{", j) == j

165 ): # replace existing \datepublished

166 found_date_published = True

167 insert_date_published(new_lines, article, colid, len(new_lines), lang=lang)

168

169 elif (

170 line.find(f"{keyword_date_online_first}{{", j) == j

171 ): # replace existing \dateposted

172 found_date_online_first = True

173 insert_date_online_first(new_lines, article, colid, len(new_lines), lang=lang)

174

175 elif (

176 line.find("\\begin{document", j) == j

177 ): # \begin{document} add dates if not present

178 begin_document_pos = i - skip_lines

179 new_lines.append(line)

180

181 elif line.find("\\documentclass", j) == j or line.find("{\\documentclass", j) == j:

182 # remove published from \documentclass to allow compilation

183 line = (

184 line.replace(",published,", ",")

185 .replace(",published", "")

186 .replace("published", "")

187 )

188 # # remove Unicode temporarily

189 # line = line.replace(",Unicode,", ",").replace(",Unicode", "").replace("Unicode", "")

190 new_lines.append(line)

191

192 elif line.find("\\makeatletter\\def\\cdr@end@page", j) == j:

193 # Command to specify the last page (present in the front page)

194 # Move it after \begin{document}

195 pass

196 elif (

197 line.find("\\bibliography", j) == j

198 and line.find("\\bibliographystyle", j) != j

199 and replace_frontpage_only

200 ):

201 end = line.find("}")

202 if end > 0:

203 bib_name = line[j + 14 : end]

204 new_lines.append("\\bibliography{" + bib_name + "_FP}\n")

205 else:

206 new_lines.append(line)

207 else:

208 new_lines.append(line)

209

210 i += 1

211

212 if begin_document_pos > 0 and not found_date_online_first:

213 begin_document_pos = insert_date_online_first(

214 new_lines, article, colid, begin_document_pos, lang=lang

215 )

216

217 if begin_document_pos > 0 and not found_date_published:

218 begin_document_pos = insert_date_published(

219 new_lines, article, colid, begin_document_pos, lang=lang

220 )

221

222 if replace_frontpage_only and begin_document_pos > 0:

223 begin_document_pos = insert_end_page(new_lines, article, colid, begin_document_pos)

224

225 # Always add Unicode as the new tex file is in utf-8

226 # new_lines = protect_tex(new_lines, "Unicode")

227

228 return new_lines, bib_name

229

230

231def protect_tex(lines, keyword="published"):

232 new_lines = []

233

234 i = 0

235 inside_documentclass = False

236

237 while i < len(lines):

238 line = lines[i]

239 len_line = len(line)

240 j = 0

241 while j < len_line and line[j] in [" ", "\t"]:

242 j += 1

243

244 if j < len_line and line[j] != "%": # the line is not a comment

245 if line.find("\\documentclass", j) == j or line.find("{\\documentclass", j) == j:

246 # add published to \documentclass after compilation

247 j = line.find("]")

248 if j > 0:

249 if line.find("{cedram") > 0: # Ignore {article}

250 line = line[0:j] + "," + keyword + line[j:]

251 else:

252 inside_documentclass = True

253 elif inside_documentclass:

254 k = line.find("]")

255 if k == j:

256 if line.find("{cedram") > 0: # Ignore {article}

257 new_lines.append(f",{keyword}\n")

258 inside_documentclass = False

259 elif k > -1:

260 if line.find("{cedram") > 0: # Ignore {article}

261 line = line[0:k] + "," + keyword + line[k:]

262 inside_documentclass = False

263

264 new_lines.append(line)

265 i += 1

266

267 return new_lines

268

269

270def get_tex_corresponding_emails(author_contributions):

271 emails = []

272

273 for contribution in author_contributions:

274 if contribution.corresponding and contribution.email:

275 emails.append(unicode_to_latex(contribution.email).replace(r"\_", r"_"))

276

277 return emails

278

279

280def get_tex_authors(author_contributions):

281 lines = []

282

283 # are_all_equal = models.are_all_equal_contrib(author_contributions)

284

285 for contribution in author_contributions:

286 # \author{\firstname{Antoine} \lastname{Lavoisier}}

287 # \address{Rue sans aplomb, Paris, France}

288 # \email[A. Lavoisier]{a-lavois@lead-free-univ.edu}

289 first_name = unicode_to_latex(contribution.first_name)

290 last_name = unicode_to_latex(contribution.last_name)

291 line = f"\\author{{\\firstname{{{first_name}}} \\lastname{{{last_name}}}"

292 if contribution.orcid:

293 line += f"\\CDRorcid{{{contribution.orcid}}}"

294 if contribution.equal_contrib: # and not are_all_equal:

295 line += "\\IsEqualContrib"

296 if contribution.deceased_before_publication: # and not are_all_equal:

297 line += "\\dead"

298 lines.append(line + "}\n")

299

300 for contribaddress in contribution.contribaddress_set.all():

301 address = unicode_to_latex(contribaddress.address)

302 lines.append(f"\\address{{{address}}}\n")

303

304 if contribution.corresponding and len(contribution.email) > 0:

305 email = unicode_to_latex(contribution.email)

306 lines.append(f"\\email{{{email}}}\n")

307

308 lines.append("\n")

309

310 return lines

311

312

313def create_tex_for_pcj(article):

314 pci = article.get_pci_section()

315

316 extid = model_helpers.get_extid(article, "rdoi")

317 rdoi = extid.id_value if extid is not None else ""

318

319 lines = [

320 "\\documentclass[PCJ,Unicode,screen,Recup]{cedram}\n",

321 "\\usepackage{pax}\n",

322 "\\usepackage{mathrsfs}\n" "\n",

323 "\\issueinfo{"

324 + article.my_container.volume

325 + "}{}{}{"

326 + article.my_container.year

327 + "}\n",

328 f"\\renewcommand*{{\\thearticle}}{{{article.article_number}}}\n",

329 f"\\DOI{{{article.doi}}}\n",

330 f"\\RDOI{{{rdoi}}}\n",

331 f"\\setPCI{{{pci}}}\n",

332 f"\\CDRsetmeta{{articletype}}{{{article.atype}}}",

333 ]

334

335 conf = article.get_conference()

336 if len(conf) > 0:

337 lines.append(f"\\setPCIconf{{{conf}}}\n")

338

339 author_contributions = article.get_author_contributions()

340

341 corresponding_emails = get_tex_corresponding_emails(author_contributions)

342 for email in corresponding_emails:

343 lines.append(f"\\PCIcorresp{{{email}}}\n")

344

345 lines.append("\n")

346

347 # \title[Sample for the template]{Sample for the template, with quite a very long title}

348 title = article.title_tex.replace("", "|||i|||").replace("", "|||/i|||")

349 title = title.replace("", "|||sup|||").replace("", "|||/sup|||")

350 title = title.replace("", "|||sub|||").replace("", "|||/sub|||")

351 title = unicode_to_latex(title)

352 title = title.replace("|||i|||", "\\protect\\emph{").replace("|||/i|||", "}")

353 title = title.replace("|||sup|||", "\\protect\\textsuperscript{").replace("|||/sup|||", "}")

354 title = title.replace("|||sub|||", "\\protect\\textsubscript{").replace("|||/sub|||", "}")

355 lines.append(f"\\title{{{title}}}\n")

356 lines.append("\n")

357 lines.extend(get_tex_authors(author_contributions))

358

359 # No keywords for PCJ

360 # # \keywords{Example, Keyword}

361 # kwd_gps = article.get_non_msc_kwds()

362 # if len(kwd_gps) > 0:

363 # kwd_gp = kwd_gps.first()

364 # keywords = ", ".join([kwd.value for kwd in kwd_gp.kwd_set.all()])

365 # lines.append(f"\\keywords{{{unicode_to_latex(keywords)}}}\n")

366 # lines.append("\n")

367

368 abstracts = article.get_abstracts()

369 if len(abstracts) > 0:

370 abstract = abstracts.first()

371 value = get_tex_from_xml(abstract.value_xml, "abstract", for_tex_file=True)

372

373 # .replace('$', '$').replace('$', '$') \

374 # .replace('', '|||i|||').replace('', '|||/i|||') \

375

376 # value = abstract.value_tex \

377 # .replace('', '|||i|||').replace('', '|||/i|||') \

378 # .replace('', '|||strong|||').replace('', '|||/strong|||') \

379 # .replace('', '|||sub|||').replace('', '|||/sub|||') \

380 # .replace('', '|||sup|||').replace('', '|||/sup|||') \

381 # .replace('', '').replace('', '') \

382 # .replace('<ul>', '|||ul|||').replace('</ul>', '|||/ul|||') \

383 # .replace('<ol type="1">', '|||ol|||').replace('</ol>', '|||/ol|||') \

384 # .replace('<li>', '|||li|||').replace('</li>', '|||/li|||') \

385 # .replace(' ', '|||newline|||') \

386 # .replace('&', '\\&') \

387 # .replace('<', '<') \

388 # .replace('>', '>')

389 #

390 # links = []

391 # pos = value.find("<a href=")

392 # while pos != -1:

393 # last_href = value.find('"', pos + 9)

394 # href = value[pos + 9:last_href]

395 # first_text = value.find('>', last_href) + 1

396 # last_text = value.find('</a>', first_text)

397 # text = value[first_text:last_text]

398 # links.append((href, text))

399 # value = value[0:pos] + '|||a|||' + value[last_text + 4:]

400 # pos = value.find("<a href=")

401 #

402 # value = unicode_to_latex(value)

403 # value = value.replace('|||i|||', '{\\it ').replace('|||/i|||', '}')

404 # value = value.replace('|||strong|||', '{\\bf ').replace('|||/strong|||', '}')

405 # value = value.replace('|||sub|||', '\\textsubscript{').replace('|||/sub|||', '}')

406 # value = value.replace('|||sup|||', '\\textsuperscript{').replace('|||/sup|||', '}')

407 # value = value.replace('|||ul|||', '\n\\begin{itemize}\n').replace('|||/ul|||', '\\end{itemize}\n')

408 # value = value.replace('|||ol|||', '\n\\begin{enumerate}\n').replace('|||/ol|||', '\\end{enumerate}\n')

409 # value = value.replace('|||li|||', '\\item ').replace('|||/li|||', '\n')

410 # value = value.replace('|||newline|||', '\\newline\n')

411 # for link in links:

412 # text = f'\\href{{{link[0]}}}{{{link[1]}}}'

413 # value = value.replace('|||a|||', text, 1)

414

415 lines.append("\\begin{abstract}\n")

416 lines.append(value + "\n")

417 lines.append("\\end{abstract}\n")

418

419 date_ = article.date_published.strftime("%Y-%m-%d") if article.date_published else "AAAA-MM-DD"

420 keyword = get_tex_keyword_date_published("PCJ", article)

421 lines.append(f"{keyword}{{{date_}}}\n")

422

423 lines.append("\\begin{document}\n")

424 lines.append("\\maketitle\n")

425 article_pdf = f"article_{article.pid}.pdf"

426 lines.append(f"\\PCIincludepdf{{{article_pdf}}}\n")

427

428 lines.append("\\end{document}\n")

429

430 return lines

431

432

433def compile_tex(lines, article, update=False):

434 """

435 1) Create a tex file from the list of lines

436 2) Upload the file to mathdoc-tex (+ the pdf for PCJ)

437 3) Compile the file

438 4) Replace the pdf in /mersenne_test_data

439 5) linearize the pdf

440 TODO: merge ptf_tools/views create_frontpage (not done while PCJ is unstable to avoid compilation bugs in prod)

441 """

442

443 # Only allowed on ptf-tools

444 if settings.SITE_NAME != "ptf_tools":

445 return

446

447 user = settings.MERSENNE_TEX_USER

448 issue = article.my_container

449 colid = issue.my_collection.pid

450 issue_path = resolver.get_cedram_issue_tex_folder(colid, issue.pid)

451 article_pdf = ""

452

453 if colid != "PCJ":

454 article_tex_name = article.get_ojs_id()

455 if not article_tex_name:

456 raise Exception(f"Article {article.pid} has no ojs-id -> cedram tex path")

457 article_path = os.path.join(issue_path, article_tex_name)

458 else:

459 article_tex_name = article.pid

460 article_path = os.path.join(issue_path, article_tex_name)

461 article_pdf = f"article_{article.pid}.pdf"

462

463 if not update:

464 # Create the article folder

465 cmd = f"ssh {user}@mathdoc-tex mkdir -p {article_path}"

466 utils.execute_cmd(cmd)

467

468 # copy the pdf to mersenne-tex

469 relative_folder = resolver.get_relative_folder(colid, issue.pid, article.pid)

470 folder = os.path.join(settings.RESOURCES_ROOT, relative_folder)

471 pdf_file_name = os.path.join(folder, article.pid + ".pdf")

472

473 cmd = f"scp {pdf_file_name} {user}@mathdoc-tex:{article_path}/{article_pdf}"

474 utils.execute_cmd(cmd)

475

476 article_tex_file_name = os.path.join(article_path, article_tex_name + ".tex")

477 fpath = write_tex_file("", lines, create_temp_file=True)

478

479 # copy to mersenne-tex

480 cmd = f"scp {fpath} {user}@mathdoc-tex:{article_tex_file_name}"

481 utils.execute_cmd(cmd)

482 # os.unlink(f.name)

483

484 # recompile article

485 ptf_tools_bin = os.path.join(settings.BASE_DIR, "bin")

486 # execute script to compile

487 cmd = f"ssh {user}@mathdoc-tex 'bash -s' -- < {ptf_tools_bin}/create_frontpage.sh {article_path} {article_tex_name} {colid} {article_pdf}"

488 utils.execute_cmd(cmd)

489

490 # replace pdf

491 cedram_pdf_location = os.path.join(article_path, article_tex_name + ".pdf")

492 relative_folder = resolver.get_relative_folder(colid, issue.pid, article.pid)

493 to_path = os.path.join(

494 settings.MERSENNE_TEST_DATA_FOLDER, relative_folder, article.pid + ".pdf"

495 )

496 if settings.MERSENNE_CREATE_FRONTPAGE:

497 utils.linearize_pdf(cedram_pdf_location, to_path)

498

499 return to_path

500

501

502def add_outline(reader, writer, outlines, parent=None):

503 child_parent = parent

504 for item in outlines:

505 if type(item) == list:

506 add_outline(reader, writer, item, child_parent)

507 else:

508 title = item["/Title"]

509 page_num = reader.get_destination_page_number(item)

510

511 if item["/Type"] == "/XYZ":

512 child_parent = writer.add_outline_item(

513 title,

514 page_num,

515 parent,

516 None,

517 False,

518 False,

519 pypdf.generic.Fit("/XYZ", (item["/Left"], item["/Top"], 1)),

520 )

521 else:

522 child_parent = writer.add_outline_item(title, page_num, parent, None, False, False)

523

524

525def test():

526 local_fp_pdf = "/home/touvierj/Bureau/test_FP.pdf"

527 local_content_pdf = "/home/touvierj/Bureau/test_content.pdf"

528 merged_pdf = "/home/touvierj/Bureau/test_merged.pdf"

529

530 pdf_reader_fp = pypdf.PdfReader(local_fp_pdf, strict=False)

531 pdf_reader_content = pypdf.PdfReader(local_content_pdf, strict=False)

532 pdf_writer = pypdf.PdfWriter()

533

534 for page in range(len(pdf_reader_fp.pages)):

535 current_page = pdf_reader_fp.pages[page]

536 if page == 0:

537 pdf_writer.add_page(current_page)

538

539 for page in range(len(pdf_reader_content.pages)):

540 current_page = pdf_reader_content.pages[page]

541 if page > 0:

542 pdf_writer.add_page(current_page)

543

544 # Add the Table of Contents (sidebar in a PDF reader)

545 add_outline(pdf_reader_content, pdf_writer, pdf_reader_content.outline)

546

547 # Add the anchors

548 for dest in pdf_reader_content.named_destinations.values():

549 pdf_writer.add_named_destination_object(dest)

550

551 with open(merged_pdf, "wb") as f_:

552 pdf_writer.write(f_)

553

554 # Add metadata to the PDF, including EXIF data

555 add_metadata(models.Article.objects.first(), local_content_pdf, merged_pdf)

556

557 exit()

558

559 fpage = "i"

560 merged_pdf = "/home/touvierj/Bureau/good2.pdf"

561 local_pdf = "/home/touvierj/Bureau/new2.pdf"

562

563 is_roman = False

564 try:

565 first_page = int(fpage)

566 except ValueError:

567 first_page = xml_utils.roman_to_int(fpage)

568 is_roman = True

569

570 reader = pypdf.PdfReader(merged_pdf)

571 writer = pypdf.PdfWriter()

572 for page in reader.pages:

573 writer.add_page(page)

574

575 if is_roman:

576 writer.set_page_label(page_index_from=0, page_index_to=first_page - 1, style="/r")

577 else:

578 writer.set_page_label(page_index_from=0, page_index_to=first_page - 1, style="/D")

579 writer.write(local_pdf)

580 writer.close()

581

582

583def add_metadata(article, in_pdf, out_pdf):

584 reader = pypdf.PdfReader(in_pdf, strict=False)

585

586 metadata = reader.metadata

587 cmd = f"exiftool -tagsFromFile {in_pdf}"

588

589 if in_pdf == out_pdf:

590 cmd += " -overwrite_original_in_place"

591

592 container = article.my_container

593 collection = article.get_collection()

594

595 msc_kwds, kwds, trans_kwds = article.get_kwds_by_type()

596 keywords = ", ".join([str(x.value) for x in kwds])

597

598 lang = ""

599 if article.lang == "fr":

600 lang = "fr-FR"

601 elif article.lang == "en":

602 lang = "en-GB"

603

604 if "/Title" in metadata:

605 title = metadata["/Title"]

606 if "'" in title and '"' not in title:

607 cmd += f' -Title="{title}"'

608 elif "'" not in title:

609 cmd += f" -Title='{title}'"

610

611 if "/Author" in metadata:

612 author = metadata["/Author"]

613 if "'" in author and '"' not in author:

614 cmd += f' -Author="{author}"'

615 elif "'" not in author:

616 cmd += f" -Author='{author}'"

617

618 cmd += " -Creator='Centre Mersenne'"

619 cmd += " -Subject=''"

620 if lang:

621 cmd += f" -xmp-dc-Language='{lang}'"

622 cmd += f" -xmp-dc:publisher='{container.my_publisher.pub_name}'"

623 cmd += f" -xmp-prism:DOI='{article.doi}'"

624 cmd += f" -Keywords='{keywords}'"

625 cmd += f" -xmp-xmp:Keywords='{keywords}'"

626 cmd += f" -xmp-pdf:Keywords='{keywords}'"

627 cmd += " -xmp-pdf:Copyright='© The author(s)'"

628

629 if container.volume:

630 cmd += f" -xmp-prism:Volume='{container.volume}'"

631 if container.number:

632 cmd += f" -xmp-prism:Number='{container.number}'"

633 if collection.issn:

634 cmd += f" -xmp-prism:ISSN='{collection.issn}'"

635 if collection.e_issn:

636 cmd += f" -xmp-prism:EISSN='{collection.e_issn}'"

637 if container.title_tex:

638 cmd += f" -xmp-prism:IssueName='{container.title_tex}'"

639 cmd += " " + out_pdf

640

641 output = subprocess.check_output(cmd, shell=True)

642 return output

643

644

645def replace_front_page(

646 article, article_tex_name, fp_pdf_file_name, content_pdf_file_name, final_pdf_file_name

647):

648 # At the point the PDF has been recompiled, possibly with a new template

649 # Use the 1st page of the new PDF with the other pages of the .pdf_SAV

650

651 user = settings.MERSENNE_TEX_USER

652

653 # Copy the PDF files locally (pypdf is installed in ptf-tools)

654 local_fp_pdf = os.path.join(settings.LOG_DIR, "tmp/", article_tex_name + ".pdf_FP")

655 cmd = f"scp {user}@mathdoc-tex:{fp_pdf_file_name} {local_fp_pdf}"

656 utils.execute_cmd(cmd)

657

658 local_content_pdf = os.path.join(settings.LOG_DIR, "tmp/", article_tex_name + ".pdf_content")

659 cmd = f"cp {content_pdf_file_name} {local_content_pdf}"

660 utils.execute_cmd(cmd)

661

662 pdf_reader_fp = pypdf.PdfReader(local_fp_pdf, strict=False)

663 pdf_reader_content = pypdf.PdfReader(local_content_pdf, strict=False)

664 pdf_writer = pypdf.PdfWriter()

665

666 for page in range(len(pdf_reader_fp.pages)):

667 current_page = pdf_reader_fp.pages[page]

668 if page == 0:

669 pdf_writer.add_page(current_page)

670

671 for page in range(len(pdf_reader_content.pages)):

672 current_page = pdf_reader_content.pages[page]

673 if page > 0:

674 pdf_writer.add_page(current_page)

675

676 # Add the Table of Contents (sidebar in a PDF reader)

677 add_outline(pdf_reader_content, pdf_writer, pdf_reader_content.outline)

678

679 # Add the anchors

680 for dest in pdf_reader_content.named_destinations.values():

681 pdf_writer.add_named_destination_object(dest)

682

683 merged_pdf = os.path.join(settings.LOG_DIR, "tmp/", article_tex_name + ".pdf_merged")

684 with open(merged_pdf, "wb") as f_:

685 pdf_writer.write(f_)

686

687 # Compiled PDF are sometimes buggy (wrong xref table). Use pdftk to fix the file.

688 ptf_tools_bin = os.path.join(settings.BASE_DIR, "bin")

689 cmd = f"{ptf_tools_bin}/update_pdf.sh {local_content_pdf} {merged_pdf}"

690 utils.execute_cmd(cmd)

691

692 # Add metadata to the PDF, including EXIF data

693 add_metadata(article, local_content_pdf, merged_pdf)

694

695 local_pdf = os.path.join(settings.LOG_DIR, "tmp/", article_tex_name + ".pdf")

696

697 # pypdf creates a PDF that starts on page 1, fix it

698 if article.fpage:

699 is_roman = False

700 try:

701 first_page = int(article.fpage)

702 except ValueError:

703 first_page = xml_utils.roman_to_int(article.fpage)

704 is_roman = True

705

706 reader = pypdf.PdfReader(merged_pdf)

707 writer = pypdf.PdfWriter()

708 page_count = 0

709 for page in reader.pages:

710 page_count += 1

711 writer.add_page(page)

712

713 if is_roman:

714 writer.set_page_label(

715 page_index_from=0, page_index_to=page_count - 1, start=first_page, style="/r"

716 )

717 else:

718 writer.set_page_label(

719 page_index_from=0, page_index_to=page_count - 1, start=first_page, style="/D"

720 )

721 writer.write(local_pdf)

722 writer.close()

723

724 # copy to mersenne-tex

725 cmd = f"scp {local_pdf} {user}@mathdoc-tex:{final_pdf_file_name}"

726 utils.execute_cmd(cmd)

727

728

729def compile_article(

730 article,

731 colid,

732 issue_id,

733 article_path,

734 article_tex_name,

735 replace_frontpage_only=False,

736 skip_compilation=False,

737 lang="",

738):

739 user = settings.MERSENNE_TEX_USER

740

741 if lang != "":

742 article_tex_name += "-" + lang

743

744 article_cfg_file_name = os.path.join(article_path, article_tex_name + ".cfg")

745 # Regular compilation: compiled_pdf and final_pdf are the same

746 # recompilation of the front page: compiled_pdf is the entire pdf with the new front page

747 # final_pdf is the pdf after the merge (new front page; old content)

748 compiled_pdf_file_name = final_pdf_file_name = os.path.join(

749 article_path, article_tex_name + ".pdf"

750 )

751 sav_pdf_file_name = compiled_pdf_file_name + "_SAV"

752

753 # Save the pdf file

754 cmd = f"ssh {user}@mathdoc-tex cp {compiled_pdf_file_name} {sav_pdf_file_name}"

755 utils.execute_cmd(cmd)

756

757 # Save the cfg file (no cfg for translations)

758 if lang == "":

759 cmd = f"ssh {user}@mathdoc-tex cp {article_cfg_file_name} {article_cfg_file_name}_SAV"

760 utils.execute_cmd(cmd)

761

762 # create temporarly file ! attention sur ptf-tools apache n'a pas le droit d'écrire ds /tmp ?!.

763 prefix = os.path.join(settings.LOG_DIR, "tmp/")

764 resolver.create_folder(prefix)

765

766 if replace_frontpage_only and skip_compilation:

767 # We want to update the front page without compiling the tex:

768 # We copy the original <article_tex_name>.PDF to <article_tex_name>_FP.PDF

769

770 article_tex_name2 = article_tex_name + "_FP"

771 new_compiled_pdf_file_name = os.path.join(article_path, article_tex_name2 + ".pdf")

772 cmd = f"ssh {user}@mathdoc-tex cp {compiled_pdf_file_name} {new_compiled_pdf_file_name}"

773 utils.execute_cmd(cmd)

774 compiled_pdf_file_name = new_compiled_pdf_file_name

775

776 elif replace_frontpage_only:

777 # Copy CFG/TEX/PDF to a new name. pdflatex will generate new files, thus preserving existing files

778 article_tex_name2 = article_tex_name + "_FP"

779

780 cmd = f"ssh {user}@mathdoc-tex rm -f {os.path.join(article_path, article_tex_name2)}.*"

781 utils.execute_cmd(cmd)

782

783 article_tex_file_name = os.path.join(article_path, article_tex_name + ".tex")

784 article_tex_file_name2 = os.path.join(article_path, article_tex_name2 + ".tex")

785 cmd = f"ssh {user}@mathdoc-tex cp {article_tex_file_name} {article_tex_file_name2}"

786 utils.execute_cmd(cmd)

787

788 article_cfg_file_name2 = os.path.join(article_path, article_tex_name2 + ".cfg")

789 cmd = f"ssh {user}@mathdoc-tex cp {article_cfg_file_name} {article_cfg_file_name2}_SAV"

790 utils.execute_cmd(cmd)

791

792 article_cdrdoidates_file_name = os.path.join(

793 article_path, article_tex_name + ".cdrdoidates"

794 )

795 if os.path.isfile(article_cdrdoidates_file_name):

796 article_cdrdoidates_file_name2 = os.path.join(

797 article_path, article_tex_name2 + ".cdrdoidates"

798 )

799 cmd = f"ssh {user}@mathdoc-tex cp {article_cdrdoidates_file_name} {article_cdrdoidates_file_name2}"

800 utils.execute_cmd(cmd)

801

802 article_tex_name = article_tex_name2

803 article_cfg_file_name = os.path.join(article_path, article_tex_name + ".cfg")

804 compiled_pdf_file_name = os.path.join(article_path, article_tex_name + ".pdf")

805 final_pdf_file_name = compiled_pdf_file_name + ".new"

806

807 if not skip_compilation:

808 # Remove \ItIsPublished from the cfg file

809 if lang == "":

810 cmd = f'''ssh {user}@mathdoc-tex "sed 's/\\\\\\\\ItIsPublished//' {article_cfg_file_name}_SAV > {article_cfg_file_name}.1"'''

811 utils.execute_cmd(cmd)

812 cmd = f'''ssh {user}@mathdoc-tex "sed 's/\\\\\\\\gdef \\\\\\\\CDRpublished {{true}}//' {article_cfg_file_name}.1 > {article_cfg_file_name}"'''

813 utils.execute_cmd(cmd)

814

815 article_tex_file_name = os.path.join(article_path, article_tex_name + ".tex")

816

817 # Save the tex file

818 cmd = f"ssh {user}@mathdoc-tex cp {article_tex_file_name} {article_tex_file_name}_SAV"

819 utils.execute_cmd(cmd)

820

821 lines = read_tex_file(article_tex_file_name)

822 new_lines, bib_name = replace_dates_in_tex(

823 lines, article, colid, replace_frontpage_only, lang=lang

824 )

825

826 if bib_name and replace_frontpage_only:

827 convert_file_to_utf8(article_path, bib_name + ".bib", bib_name + "_FP.bib")

828

829 f = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", prefix=prefix, delete=False)

830 fpath = f.name # ex: /tmp/Rxsft

831 f.write("".join(new_lines))

832 f.close()

833

834 # copy to mersenne-tex

835 cmd = f"scp {fpath} {user}@mathdoc-tex:{article_tex_file_name}"

836 utils.execute_cmd(cmd)

837 # os.unlink(f.name)

838

839 # recompile article

840 ptf_tools_bin = os.path.join(settings.BASE_DIR, "bin")

841 # execute script to compile

842 cmd = f"ssh {user}@mathdoc-tex 'bash -s' -- < {ptf_tools_bin}/create_frontpage.sh {article_path} {article_tex_name}"

843 utils.execute_cmd(cmd)

844

845 # Protect the tex file with the 'published' option

846 new_lines = protect_tex(new_lines)

847

848 # create temporarly file ! attention sur ptf-tools apache n'a pas le droit d'écrire ds /tmp ?!.

849 prefix = os.path.join(settings.LOG_DIR, "tmp/")

850 f = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", prefix=prefix, delete=False)

851 fpath = f.name # ex: /tmp/Rxsft

852 f.write("".join(new_lines))

853 f.close()

854

855 # copy to mersenne-tex

856 cmd = f"scp {fpath} {user}@mathdoc-tex:{article_tex_file_name}"

857 utils.execute_cmd(cmd)

858

859 if replace_frontpage_only:

860 # At the point the PDF has been recompiled, possibly with a new template

861 # Use the 1st page of the new PDF with the other pages of the production PDF

862 datastream = article.datastream_set.filter(mimetype="application/pdf").get()

863 content_pdf_file_name = os.path.join(

864 settings.MERSENNE_PROD_DATA_FOLDER, datastream.location

865 )

866

867 replace_front_page(

868 article,

869 article_tex_name,

870 compiled_pdf_file_name,

871 content_pdf_file_name,

872 final_pdf_file_name,

873 )

874

875 # Copy PDF to MERSENNE_TEST_DATA_FOLDER

876 datastream = article.datastream_set.filter(mimetype="application/pdf").get()

877 to_path = os.path.join(settings.MERSENNE_TEST_DATA_FOLDER, datastream.location)

878 # remove destination if exists to test if final pdf is really created

879 if os.path.exists(to_path):

880 os.remove(to_path)

881 utils.linearize_pdf(final_pdf_file_name, to_path)

882

883 # if not replace_frontpage_only:

884 # # Add EXIF metadata in the final PDF (replace_front_page already does it)

885 # add_metadata(article, to_path, to_path)

886

887

888def create_frontpage(

889 colid,

890 container,

891 updated_articles,

892 test=True,

893 replace_frontpage_only=False,

894 skip_compilation=False,

895 lang="",

896):

897 # create frontpage by recompiling articles on mersenne-tex with date XXXX-XX-XX

898 # flow :

899 # get directory of article sources : cedram_dev/production/ ..

900 # Add publication date in the source TeX

901 # remote execute latexmk -pdf article.pdf

902 # replace pdf of the article on mersenne_test_data

903

904 # TODO refactor the code and only use compile_tex for all collections

905

906 if colid == "PCJ":

907 for article in updated_articles:

908 lines = create_tex_for_pcj(article)

909 compile_tex(lines, article, update=True)

910 return

911

912 try:

913 year = int(container.year)

914 except ValueError:

915 year = 0

916

917 if (

918 colid in ["CRMATH", "CRMECA", "CRPHYS", "CRGEOS", "CRCHIM", "CRBIOL"]

919 and year < 2020

920 and lang == ""

921 ):

922 # No front page for Elsevier CRAS

923 return

924

925 issue_id = container.pid

926

927 issue_path = resolver.get_cedram_issue_tex_folder(colid, issue_id)

928 # non utilisé ?now = datetime.now().astimezone()

929 # non utilisé ? timestamp = now.strftime("%Y-%m-%d %H:%M:%S %Z")

930

931 try:

932 for article in updated_articles:

933 # article path

934 article_tex_name = article.get_ojs_id()

935 if not article_tex_name:

936 raise Exception(f"Article {article.pid} has no ojs-id -> cedram tex path")

937 article_path = os.path.join(issue_path, article_tex_name)

938 # non utilisé ? file_date = os.path.join(article_path, article_tex_name + '.ptf')

939

940 # publish_timestamp_file = os.path.join(article_path, article_tex_name + "-pdftimestamp.txt")

941 # onlinefirst_timestamp_file = os.path.join(article_path, article_tex_name + "-dateposted.txt")

942

943 # flow :

944 # - si on est en test ;

945 # date_pre_publish a été mis à jour mais pas les autres

946 # on ne crée une date temporaire type XXXX-XX-XX que pour online_first

947 # (car sinon lors de la mise en prod du online_first, la présence du fichier pdftimestamp

948 # avec XXXX-XX-XX ferait apparaitre cette date)

949 # - si article.my_container.with_online_first && et pas de article.date_online_first existe:

950 # : on met XXXX-xx-xx pour online first

951 # (si l'article a déjà une date online-first, il a à priori déjà était recompilé)

952 # - si on passe en prod, on prend les dates de l'article

953 # si container.with_online_first:

954 # article.date_online_first ds le bon fichier

955 # si article.date_published : on met à jour le fichier qui va bien

956

957 if not test and (article.date_online_first or article.date_published):

958 compile_article(

959 article,

960 colid,

961 issue_id,

962 article_path,

963 article_tex_name,

964 replace_frontpage_only,

965 skip_compilation,

966 lang,

967 )

968

969 except Exception as e:

970 # pas de rollback car on ne modifie rien en BDD / éventuellement remettre un pdf.SAV en place

971 raise e

972

973

974def create_translated_pdf(

975 article, xml_content, lang, pdf_file_name, html_file_name, skip_compilation=False

976):

977 user = settings.MERSENNE_TEX_USER

978

979 issue_path = resolver.get_cedram_issue_tex_folder(

980 article.get_top_collection().pid, article.my_container.pid

981 )

982 article_tex_name = article.get_ojs_id()

983 if not article_tex_name:

984 raise Exception(f"Article {article.pid} has no ojs-id -> cedram tex path")

985 article_path = os.path.join(issue_path, article_tex_name)

986

987 xml_base_name = article_tex_name + ".xml"

988 local_xml = os.path.join(settings.LOG_DIR, "tmp", xml_base_name)

989 remote_xml = os.path.join(article_path, xml_base_name)

990

991 if not skip_compilation:

992 # Create the XML file locally

993 with open(local_xml, "w", encoding="utf-8") as file_:

994 file_.write(xml_content)

995

996 # Copy XML file to mersenne-tex

997 cmd = f"scp {local_xml} {user}@mathdoc-tex:{remote_xml}"

998 utils.execute_cmd(cmd)

999

1000 remote_html_base_name = f"trad-{lang}.html"

1001 remote_html = os.path.join(article_path, remote_html_base_name)

1002 # Copy HTML file to mersenne-tex

1003 cmd = f"scp {html_file_name} {user}@mathdoc-tex:{remote_html}"

1004 utils.execute_cmd(cmd)

1005

1006 # Create the PDF

1007 ptf_tools_bin = os.path.join(settings.BASE_DIR, "bin")

1008 # execute script to compile

1009 cmd = f"ssh {user}@mathdoc-tex 'bash -s' -- < {ptf_tools_bin}/translate_article.sh {article_path} {xml_base_name} {remote_html_base_name} {lang}"

1010 utils.execute_cmd(cmd)

1011

1012 remote_pdf_base_name = f"{article_tex_name}-{lang}.pdf"

1013 remote_pdf = os.path.join(article_path, remote_pdf_base_name)

1014 # pdf-traduction should have created remote.pdf

1015 # Copy the PDF file

1016 cmd = f"scp {user}@mathdoc-tex:{remote_pdf} {pdf_file_name}"

1017 utils.execute_cmd(cmd)