Coverage for src/ptf/tex.py: 4%
546 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-05 09:56 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-05 09:56 +0000
1import os
2import subprocess
3import tempfile
5import pypdf
6from pylatexenc.latexencode import unicode_to_latex
8from django.conf import settings
10from ptf import model_helpers
11from ptf import models
12from ptf import utils
13from ptf.cmds.xml import xml_utils
14from ptf.cmds.xml.jats.jats_parser import get_tex_from_xml
15from ptf.display import resolver
18def get_tex_keyword_date_published(colid, article, lang=""):
19 keyword = "\\dateposted" if colid in ["AHL", "AIF", "OGEO", "JTNB"] else "\\datepublished"
21 if colid in ["CRMATH", "CRMECA", "CRPHYS", "CRGEOS", "CRCHIM", "CRBIOL"]:
22 if not article.date_online_first:
23 keyword = "\\dateposted"
25 if lang != "":
26 keyword = "\\CDRsetmeta{traduction_date_posted}"
28 return keyword
31def get_tex_keyword_date_online_first(colid, article, lang=""):
32 return "\\dateposted"
35def read_tex_file(filename):
36 """
37 read a tex file. Detects if it is in utf-8 or iso-8859-1
38 returns a string of the body
39 """
41 lines = []
43 if os.path.isfile(filename):
44 try:
45 with open(filename, encoding="utf-8") as f_:
46 lines = f_.readlines()
47 except UnicodeDecodeError:
48 with open(filename, encoding="iso-8859-1") as f_:
49 lines = f_.readlines()
51 return lines
54def convert_file_to_utf8(article_path, from_name, to_name):
55 filename = os.path.join(article_path, from_name)
56 lines = read_tex_file(filename)
58 user = settings.MERSENNE_TEX_USER
59 prefix = os.path.join(settings.LOG_DIR, "tmp/")
60 resolver.create_folder(prefix)
62 f = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", prefix=prefix, delete=False)
63 fpath = f.name # ex: /tmp/Rxsft
64 f.write("".join(lines))
65 f.close()
67 out_filemane = os.path.join(article_path, to_name)
68 # copy to mersenne-tex
69 cmd = f"scp {fpath} {user}@mathdoc-tex:{out_filemane}"
70 utils.execute_cmd(cmd)
73def write_tex_file(filename, lines, create_temp_file=False):
74 fpath = filename
75 if create_temp_file:
76 prefix = os.path.join(settings.LOG_DIR, "tmp/")
77 resolver.create_folder(prefix)
79 f = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", prefix=prefix, delete=False)
81 fpath = f.name # ex: /tmp/Rxsft
82 f.write("".join(lines))
83 f.close()
84 else:
85 with open(filename, "w", encoding="utf-8") as f_:
86 f_.write("".join(lines))
87 return fpath
90def insert_date_published(new_lines, article, colid, begin_document_pos, lang=""):
91 if article.date_published is not None:
92 keyword = get_tex_keyword_date_published(colid, article, lang)
93 str_ = f'{keyword}{{{article.date_published.strftime("%Y-%m-%d")}}}\n'
94 new_lines.insert(begin_document_pos, str_)
95 begin_document_pos += 1
97 if article.my_container is not None:
98 is_thematic_issue = len(article.my_container.title_html) > 0
99 is_issue_finalized = not article.my_container.with_online_first
100 is_thematic_finalized = is_thematic_issue and is_issue_finalized
102 if is_thematic_finalized and article.date_online_first is None:
103 # Finalized thematic issue where the article did not go through online first
104 # => Add \datepublished so that "Issue date :" appears in the PDF
105 keyword2 = "\\datepublished"
106 if keyword2 != keyword:
107 str_ = f'{keyword2}{{{article.date_published.strftime("%Y-%m-%d")}}}\n'
108 new_lines.insert(begin_document_pos, str_)
109 begin_document_pos += 1
111 return begin_document_pos
114def insert_date_online_first(new_lines, article, colid, begin_document_pos, lang=""):
115 if article.date_online_first is not None:
116 keyword = get_tex_keyword_date_online_first(colid, article, lang)
117 str_ = f'{keyword}{{{article.date_online_first.strftime("%Y-%m-%d")}}}\n'
118 new_lines.insert(begin_document_pos, str_)
119 begin_document_pos += 1
121 return begin_document_pos
124def insert_end_page(new_lines, article, colid, begin_document_pos):
125 if article.lpage:
126 str_ = "\\makeatletter\\def\\cdr@end@page{" + article.lpage + "}\\makeatother\n"
127 new_lines.insert(begin_document_pos + 1, str_)
129 return begin_document_pos
132def replace_dates_in_tex(lines, article, colid, replace_frontpage_only=False, lang=""):
133 r"""
134 add or replace \dateposted and \datepublished in the source Tex
135 lines is a list of lines of the source Tex
137 """
138 new_lines = []
139 bib_name = ""
141 keyword_date_published = get_tex_keyword_date_published(colid, article, lang)
142 keyword_date_online_first = get_tex_keyword_date_online_first(colid, article, lang)
143 found_date_online_first = False
144 found_date_published = False
145 begin_document_pos = -1
146 i = 0
147 skip_lines = 0
149 while i < len(lines):
150 line = lines[i]
151 len_line = len(line)
152 j = 0
153 while j < len_line and line[j] in [" ", "\t"]:
154 j += 1
156 if j < len_line and line[j] != "%": # the line is not a comment
157 if replace_frontpage_only and (
158 line.find("\\datepublished{", j) == j
159 or line.find("\\dateposted{", j) == j
160 or line.find("\\CDRsetmeta{traduction_date_posted}{", j) == j
161 ):
162 skip_lines += 1
163 elif (
164 line.find(f"{keyword_date_published}{{", j) == j
165 ): # replace existing \datepublished
166 found_date_published = True
167 insert_date_published(new_lines, article, colid, len(new_lines), lang=lang)
169 elif (
170 line.find(f"{keyword_date_online_first}{{", j) == j
171 ): # replace existing \dateposted
172 found_date_online_first = True
173 insert_date_online_first(new_lines, article, colid, len(new_lines), lang=lang)
175 elif (
176 line.find("\\begin{document", j) == j
177 ): # \begin{document} add dates if not present
178 begin_document_pos = i - skip_lines
179 new_lines.append(line)
181 elif line.find("\\documentclass", j) == j or line.find("{\\documentclass", j) == j:
182 # remove published from \documentclass to allow compilation
183 line = (
184 line.replace(",published,", ",")
185 .replace(",published", "")
186 .replace("published", "")
187 )
188 # # remove Unicode temporarily
189 # line = line.replace(",Unicode,", ",").replace(",Unicode", "").replace("Unicode", "")
190 new_lines.append(line)
192 elif line.find("\\makeatletter\\def\\cdr@end@page", j) == j:
193 # Command to specify the last page (present in the front page)
194 # Move it after \begin{document}
195 pass
196 elif (
197 line.find("\\bibliography", j) == j
198 and line.find("\\bibliographystyle", j) != j
199 and replace_frontpage_only
200 ):
201 end = line.find("}")
202 if end > 0:
203 bib_name = line[j + 14 : end]
204 new_lines.append("\\bibliography{" + bib_name + "_FP}\n")
205 else:
206 new_lines.append(line)
207 else:
208 new_lines.append(line)
210 i += 1
212 if begin_document_pos > 0 and not found_date_online_first:
213 begin_document_pos = insert_date_online_first(
214 new_lines, article, colid, begin_document_pos, lang=lang
215 )
217 if begin_document_pos > 0 and not found_date_published:
218 begin_document_pos = insert_date_published(
219 new_lines, article, colid, begin_document_pos, lang=lang
220 )
222 if replace_frontpage_only and begin_document_pos > 0:
223 begin_document_pos = insert_end_page(new_lines, article, colid, begin_document_pos)
225 # Always add Unicode as the new tex file is in utf-8
226 # new_lines = protect_tex(new_lines, "Unicode")
228 return new_lines, bib_name
231def protect_tex(lines, keyword="published"):
232 new_lines = []
234 i = 0
235 inside_documentclass = False
237 while i < len(lines):
238 line = lines[i]
239 len_line = len(line)
240 j = 0
241 while j < len_line and line[j] in [" ", "\t"]:
242 j += 1
244 if j < len_line and line[j] != "%": # the line is not a comment
245 if line.find("\\documentclass", j) == j or line.find("{\\documentclass", j) == j:
246 # add published to \documentclass after compilation
247 j = line.find("]")
248 if j > 0:
249 if line.find("{cedram") > 0: # Ignore {article}
250 line = line[0:j] + "," + keyword + line[j:]
251 else:
252 inside_documentclass = True
253 elif inside_documentclass:
254 k = line.find("]")
255 if k == j:
256 if line.find("{cedram") > 0: # Ignore {article}
257 new_lines.append(f",{keyword}\n")
258 inside_documentclass = False
259 elif k > -1:
260 if line.find("{cedram") > 0: # Ignore {article}
261 line = line[0:k] + "," + keyword + line[k:]
262 inside_documentclass = False
264 new_lines.append(line)
265 i += 1
267 return new_lines
270def get_tex_corresponding_emails(author_contributions):
271 emails = []
273 for contribution in author_contributions:
274 if contribution.corresponding and contribution.email:
275 emails.append(unicode_to_latex(contribution.email).replace(r"\_", r"_"))
277 return emails
280def get_tex_authors(author_contributions):
281 lines = []
283 # are_all_equal = models.are_all_equal_contrib(author_contributions)
285 for contribution in author_contributions:
286 # \author{\firstname{Antoine} \lastname{Lavoisier}}
287 # \address{Rue sans aplomb, Paris, France}
288 # \email[A. Lavoisier]{a-lavois@lead-free-univ.edu}
289 first_name = unicode_to_latex(contribution.first_name)
290 last_name = unicode_to_latex(contribution.last_name)
291 line = f"\\author{{\\firstname{{{first_name}}} \\lastname{{{last_name}}}"
292 if contribution.orcid:
293 line += f"\\CDRorcid{{{contribution.orcid}}}"
294 if contribution.equal_contrib: # and not are_all_equal:
295 line += "\\IsEqualContrib"
296 if contribution.deceased_before_publication: # and not are_all_equal:
297 line += "\\dead"
298 lines.append(line + "}\n")
300 for contribaddress in contribution.contribaddress_set.all():
301 address = unicode_to_latex(contribaddress.address)
302 lines.append(f"\\address{{{address}}}\n")
304 if contribution.corresponding and len(contribution.email) > 0:
305 email = unicode_to_latex(contribution.email)
306 lines.append(f"\\email{{{email}}}\n")
308 lines.append("\n")
310 return lines
313def create_tex_for_pcj(article):
314 pci = article.get_pci_section()
316 extid = model_helpers.get_extid(article, "rdoi")
317 rdoi = extid.id_value if extid is not None else ""
319 lines = [
320 "\\documentclass[PCJ,Unicode,screen,Recup]{cedram}\n",
321 "\\usepackage{pax}\n",
322 "\\usepackage{mathrsfs}\n" "\n",
323 "\\issueinfo{"
324 + article.my_container.volume
325 + "}{}{}{"
326 + article.my_container.year
327 + "}\n",
328 f"\\renewcommand*{{\\thearticle}}{{{article.article_number}}}\n",
329 f"\\DOI{{{article.doi}}}\n",
330 f"\\RDOI{{{rdoi}}}\n",
331 f"\\setPCI{{{pci}}}\n",
332 f"\\CDRsetmeta{{articletype}}{{{article.atype}}}",
333 ]
335 conf = article.get_conference()
336 if len(conf) > 0:
337 lines.append(f"\\setPCIconf{{{conf}}}\n")
339 author_contributions = article.get_author_contributions()
341 corresponding_emails = get_tex_corresponding_emails(author_contributions)
342 for email in corresponding_emails:
343 lines.append(f"\\PCIcorresp{{{email}}}\n")
345 lines.append("\n")
347 # \title[Sample for the template]{Sample for the template, with quite a very long title}
348 title = article.title_tex.replace("<i>", "|||i|||").replace("</i>", "|||/i|||")
349 title = title.replace("<sup>", "|||sup|||").replace("</sup>", "|||/sup|||")
350 title = title.replace("<sub>", "|||sub|||").replace("</sub>", "|||/sub|||")
351 title = unicode_to_latex(title)
352 title = title.replace("|||i|||", "\\protect\\emph{").replace("|||/i|||", "}")
353 title = title.replace("|||sup|||", "\\protect\\textsuperscript{").replace("|||/sup|||", "}")
354 title = title.replace("|||sub|||", "\\protect\\textsubscript{").replace("|||/sub|||", "}")
355 lines.append(f"\\title{{{title}}}\n")
356 lines.append("\n")
357 lines.extend(get_tex_authors(author_contributions))
359 # No keywords for PCJ
360 # # \keywords{Example, Keyword}
361 # kwd_gps = article.get_non_msc_kwds()
362 # if len(kwd_gps) > 0:
363 # kwd_gp = kwd_gps.first()
364 # keywords = ", ".join([kwd.value for kwd in kwd_gp.kwd_set.all()])
365 # lines.append(f"\\keywords{{{unicode_to_latex(keywords)}}}\n")
366 # lines.append("\n")
368 abstracts = article.get_abstracts()
369 if len(abstracts) > 0:
370 abstract = abstracts.first()
371 value = get_tex_from_xml(abstract.value_xml, "abstract", for_tex_file=True)
373 # .replace('<span class="mathjax-formula">$', '$').replace('$</span>', '$') \
374 # .replace('<span class="italique">', '|||i|||').replace('</span>', '|||/i|||') \
376 # value = abstract.value_tex \
377 # .replace('<i>', '|||i|||').replace('</i>', '|||/i|||') \
378 # .replace('<strong>', '|||strong|||').replace('</strong>', '|||/strong|||') \
379 # .replace('<sub>', '|||sub|||').replace('</sub>', '|||/sub|||') \
380 # .replace('<sup>', '|||sup|||').replace('</sup>', '|||/sup|||') \
381 # .replace('<p>', '').replace('</p>', '') \
382 # .replace('<ul>', '|||ul|||').replace('</ul>', '|||/ul|||') \
383 # .replace('<ol type="1">', '|||ol|||').replace('</ol>', '|||/ol|||') \
384 # .replace('<li>', '|||li|||').replace('</li>', '|||/li|||') \
385 # .replace('<br/>', '|||newline|||') \
386 # .replace('&', '\\&') \
387 # .replace('<', '<') \
388 # .replace('>', '>')
389 #
390 # links = []
391 # pos = value.find("<a href=")
392 # while pos != -1:
393 # last_href = value.find('"', pos + 9)
394 # href = value[pos + 9:last_href]
395 # first_text = value.find('>', last_href) + 1
396 # last_text = value.find('</a>', first_text)
397 # text = value[first_text:last_text]
398 # links.append((href, text))
399 # value = value[0:pos] + '|||a|||' + value[last_text + 4:]
400 # pos = value.find("<a href=")
401 #
402 # value = unicode_to_latex(value)
403 # value = value.replace('|||i|||', '{\\it ').replace('|||/i|||', '}')
404 # value = value.replace('|||strong|||', '{\\bf ').replace('|||/strong|||', '}')
405 # value = value.replace('|||sub|||', '\\textsubscript{').replace('|||/sub|||', '}')
406 # value = value.replace('|||sup|||', '\\textsuperscript{').replace('|||/sup|||', '}')
407 # value = value.replace('|||ul|||', '\n\\begin{itemize}\n').replace('|||/ul|||', '\\end{itemize}\n')
408 # value = value.replace('|||ol|||', '\n\\begin{enumerate}\n').replace('|||/ol|||', '\\end{enumerate}\n')
409 # value = value.replace('|||li|||', '\\item ').replace('|||/li|||', '\n')
410 # value = value.replace('|||newline|||', '\\newline\n')
411 # for link in links:
412 # text = f'\\href{{{link[0]}}}{{{link[1]}}}'
413 # value = value.replace('|||a|||', text, 1)
415 lines.append("\\begin{abstract}\n")
416 lines.append(value + "\n")
417 lines.append("\\end{abstract}\n")
419 date_ = article.date_published.strftime("%Y-%m-%d") if article.date_published else "AAAA-MM-DD"
420 keyword = get_tex_keyword_date_published("PCJ", article)
421 lines.append(f"{keyword}{{{date_}}}\n")
423 lines.append("\\begin{document}\n")
424 lines.append("\\maketitle\n")
425 article_pdf = f"article_{article.pid}.pdf"
426 lines.append(f"\\PCIincludepdf{{{article_pdf}}}\n")
428 lines.append("\\end{document}\n")
430 return lines
433def compile_tex(lines, article, update=False):
434 """
435 1) Create a tex file from the list of lines
436 2) Upload the file to mathdoc-tex (+ the pdf for PCJ)
437 3) Compile the file
438 4) Replace the pdf in /mersenne_test_data
439 5) linearize the pdf
440 TODO: merge ptf_tools/views create_frontpage (not done while PCJ is unstable to avoid compilation bugs in prod)
441 """
443 # Only allowed on ptf-tools
444 if settings.SITE_NAME != "ptf_tools":
445 return
447 user = settings.MERSENNE_TEX_USER
448 issue = article.my_container
449 colid = issue.my_collection.pid
450 issue_path = resolver.get_cedram_issue_tex_folder(colid, issue.pid)
451 article_pdf = ""
453 if colid != "PCJ":
454 article_tex_name = article.get_ojs_id()
455 if not article_tex_name:
456 raise Exception(f"Article {article.pid} has no ojs-id -> cedram tex path")
457 article_path = os.path.join(issue_path, article_tex_name)
458 else:
459 article_tex_name = article.pid
460 article_path = os.path.join(issue_path, article_tex_name)
461 article_pdf = f"article_{article.pid}.pdf"
463 if not update:
464 # Create the article folder
465 cmd = f"ssh {user}@mathdoc-tex mkdir -p {article_path}"
466 utils.execute_cmd(cmd)
468 # copy the pdf to mersenne-tex
469 relative_folder = resolver.get_relative_folder(colid, issue.pid, article.pid)
470 folder = os.path.join(settings.RESOURCES_ROOT, relative_folder)
471 pdf_file_name = os.path.join(folder, article.pid + ".pdf")
473 cmd = f"scp {pdf_file_name} {user}@mathdoc-tex:{article_path}/{article_pdf}"
474 utils.execute_cmd(cmd)
476 article_tex_file_name = os.path.join(article_path, article_tex_name + ".tex")
477 fpath = write_tex_file("", lines, create_temp_file=True)
479 # copy to mersenne-tex
480 cmd = f"scp {fpath} {user}@mathdoc-tex:{article_tex_file_name}"
481 utils.execute_cmd(cmd)
482 # os.unlink(f.name)
484 # recompile article
485 ptf_tools_bin = os.path.join(settings.BASE_DIR, "bin")
486 # execute script to compile
487 cmd = f"ssh {user}@mathdoc-tex 'bash -s' -- < {ptf_tools_bin}/create_frontpage.sh {article_path} {article_tex_name} {colid} {article_pdf}"
488 utils.execute_cmd(cmd)
490 # replace pdf
491 cedram_pdf_location = os.path.join(article_path, article_tex_name + ".pdf")
492 relative_folder = resolver.get_relative_folder(colid, issue.pid, article.pid)
493 to_path = os.path.join(
494 settings.MERSENNE_TEST_DATA_FOLDER, relative_folder, article.pid + ".pdf"
495 )
496 if settings.MERSENNE_CREATE_FRONTPAGE:
497 utils.linearize_pdf(cedram_pdf_location, to_path)
499 return to_path
502def add_outline(reader, writer, outlines, parent=None):
503 child_parent = parent
504 for item in outlines:
505 if type(item) == list:
506 add_outline(reader, writer, item, child_parent)
507 else:
508 title = item["/Title"]
509 page_num = reader.get_destination_page_number(item)
511 if item["/Type"] == "/XYZ":
512 child_parent = writer.add_outline_item(
513 title,
514 page_num,
515 parent,
516 None,
517 False,
518 False,
519 pypdf.generic.Fit("/XYZ", (item["/Left"], item["/Top"], 1)),
520 )
521 else:
522 child_parent = writer.add_outline_item(title, page_num, parent, None, False, False)
525def test():
526 local_fp_pdf = "/home/touvierj/Bureau/test_FP.pdf"
527 local_content_pdf = "/home/touvierj/Bureau/test_content.pdf"
528 merged_pdf = "/home/touvierj/Bureau/test_merged.pdf"
530 pdf_reader_fp = pypdf.PdfReader(local_fp_pdf, strict=False)
531 pdf_reader_content = pypdf.PdfReader(local_content_pdf, strict=False)
532 pdf_writer = pypdf.PdfWriter()
534 for page in range(len(pdf_reader_fp.pages)):
535 current_page = pdf_reader_fp.pages[page]
536 if page == 0:
537 pdf_writer.add_page(current_page)
539 for page in range(len(pdf_reader_content.pages)):
540 current_page = pdf_reader_content.pages[page]
541 if page > 0:
542 pdf_writer.add_page(current_page)
544 # Add the Table of Contents (sidebar in a PDF reader)
545 add_outline(pdf_reader_content, pdf_writer, pdf_reader_content.outline)
547 # Add the anchors
548 for dest in pdf_reader_content.named_destinations.values():
549 pdf_writer.add_named_destination_object(dest)
551 with open(merged_pdf, "wb") as f_:
552 pdf_writer.write(f_)
554 # Add metadata to the PDF, including EXIF data
555 add_metadata(models.Article.objects.first(), local_content_pdf, merged_pdf)
557 exit()
559 fpage = "i"
560 merged_pdf = "/home/touvierj/Bureau/good2.pdf"
561 local_pdf = "/home/touvierj/Bureau/new2.pdf"
563 is_roman = False
564 try:
565 first_page = int(fpage)
566 except ValueError:
567 first_page = xml_utils.roman_to_int(fpage)
568 is_roman = True
570 reader = pypdf.PdfReader(merged_pdf)
571 writer = pypdf.PdfWriter()
572 for page in reader.pages:
573 writer.add_page(page)
575 if is_roman:
576 writer.set_page_label(page_index_from=0, page_index_to=first_page - 1, style="/r")
577 else:
578 writer.set_page_label(page_index_from=0, page_index_to=first_page - 1, style="/D")
579 writer.write(local_pdf)
580 writer.close()
583def add_metadata(article, in_pdf, out_pdf):
584 reader = pypdf.PdfReader(in_pdf, strict=False)
586 metadata = reader.metadata
587 cmd = f"exiftool -tagsFromFile {in_pdf}"
589 if in_pdf == out_pdf:
590 cmd += " -overwrite_original_in_place"
592 container = article.my_container
593 collection = article.get_collection()
595 msc_kwds, kwds, trans_kwds = article.get_kwds_by_type()
596 keywords = ", ".join([str(x.value) for x in kwds])
598 lang = ""
599 if article.lang == "fr":
600 lang = "fr-FR"
601 elif article.lang == "en":
602 lang = "en-GB"
604 if "/Title" in metadata:
605 title = metadata["/Title"]
606 if "'" in title and '"' not in title:
607 cmd += f' -Title="{title}"'
608 elif "'" not in title:
609 cmd += f" -Title='{title}'"
611 if "/Author" in metadata:
612 author = metadata["/Author"]
613 if "'" in author and '"' not in author:
614 cmd += f' -Author="{author}"'
615 elif "'" not in author:
616 cmd += f" -Author='{author}'"
618 cmd += " -Creator='Centre Mersenne'"
619 cmd += " -Subject=''"
620 if lang:
621 cmd += f" -xmp-dc-Language='{lang}'"
622 cmd += f" -xmp-dc:publisher='{container.my_publisher.pub_name}'"
623 cmd += f" -xmp-prism:DOI='{article.doi}'"
624 cmd += f" -Keywords='{keywords}'"
625 cmd += f" -xmp-xmp:Keywords='{keywords}'"
626 cmd += f" -xmp-pdf:Keywords='{keywords}'"
627 cmd += " -xmp-pdf:Copyright='© The author(s)'"
629 if container.volume:
630 cmd += f" -xmp-prism:Volume='{container.volume}'"
631 if container.number:
632 cmd += f" -xmp-prism:Number='{container.number}'"
633 if collection.issn:
634 cmd += f" -xmp-prism:ISSN='{collection.issn}'"
635 if collection.e_issn:
636 cmd += f" -xmp-prism:EISSN='{collection.e_issn}'"
637 if container.title_tex:
638 cmd += f" -xmp-prism:IssueName='{container.title_tex}'"
639 cmd += " " + out_pdf
641 output = subprocess.check_output(cmd, shell=True)
642 return output
645def replace_front_page(
646 article, article_tex_name, fp_pdf_file_name, content_pdf_file_name, final_pdf_file_name
647):
648 # At the point the PDF has been recompiled, possibly with a new template
649 # Use the 1st page of the new PDF with the other pages of the .pdf_SAV
651 user = settings.MERSENNE_TEX_USER
653 # Copy the PDF files locally (pypdf is installed in ptf-tools)
654 local_fp_pdf = os.path.join(settings.LOG_DIR, "tmp/", article_tex_name + ".pdf_FP")
655 cmd = f"scp {user}@mathdoc-tex:{fp_pdf_file_name} {local_fp_pdf}"
656 utils.execute_cmd(cmd)
658 local_content_pdf = os.path.join(settings.LOG_DIR, "tmp/", article_tex_name + ".pdf_content")
659 cmd = f"cp {content_pdf_file_name} {local_content_pdf}"
660 utils.execute_cmd(cmd)
662 pdf_reader_fp = pypdf.PdfReader(local_fp_pdf, strict=False)
663 pdf_reader_content = pypdf.PdfReader(local_content_pdf, strict=False)
664 pdf_writer = pypdf.PdfWriter()
666 for page in range(len(pdf_reader_fp.pages)):
667 current_page = pdf_reader_fp.pages[page]
668 if page == 0:
669 pdf_writer.add_page(current_page)
671 for page in range(len(pdf_reader_content.pages)):
672 current_page = pdf_reader_content.pages[page]
673 if page > 0:
674 pdf_writer.add_page(current_page)
676 # Add the Table of Contents (sidebar in a PDF reader)
677 add_outline(pdf_reader_content, pdf_writer, pdf_reader_content.outline)
679 # Add the anchors
680 for dest in pdf_reader_content.named_destinations.values():
681 pdf_writer.add_named_destination_object(dest)
683 merged_pdf = os.path.join(settings.LOG_DIR, "tmp/", article_tex_name + ".pdf_merged")
684 with open(merged_pdf, "wb") as f_:
685 pdf_writer.write(f_)
687 # Compiled PDF are sometimes buggy (wrong xref table). Use pdftk to fix the file.
688 ptf_tools_bin = os.path.join(settings.BASE_DIR, "bin")
689 cmd = f"{ptf_tools_bin}/update_pdf.sh {local_content_pdf} {merged_pdf}"
690 utils.execute_cmd(cmd)
692 # Add metadata to the PDF, including EXIF data
693 add_metadata(article, local_content_pdf, merged_pdf)
695 local_pdf = os.path.join(settings.LOG_DIR, "tmp/", article_tex_name + ".pdf")
697 # pypdf creates a PDF that starts on page 1, fix it
698 if article.fpage:
699 is_roman = False
700 try:
701 first_page = int(article.fpage)
702 except ValueError:
703 first_page = xml_utils.roman_to_int(article.fpage)
704 is_roman = True
706 reader = pypdf.PdfReader(merged_pdf)
707 writer = pypdf.PdfWriter()
708 page_count = 0
709 for page in reader.pages:
710 page_count += 1
711 writer.add_page(page)
713 if is_roman:
714 writer.set_page_label(
715 page_index_from=0, page_index_to=page_count - 1, start=first_page, style="/r"
716 )
717 else:
718 writer.set_page_label(
719 page_index_from=0, page_index_to=page_count - 1, start=first_page, style="/D"
720 )
721 writer.write(local_pdf)
722 writer.close()
724 # copy to mersenne-tex
725 cmd = f"scp {local_pdf} {user}@mathdoc-tex:{final_pdf_file_name}"
726 utils.execute_cmd(cmd)
729def compile_article(
730 article,
731 colid,
732 issue_id,
733 article_path,
734 article_tex_name,
735 replace_frontpage_only=False,
736 skip_compilation=False,
737 lang="",
738):
739 user = settings.MERSENNE_TEX_USER
741 if lang != "":
742 article_tex_name += "-" + lang
744 article_cfg_file_name = os.path.join(article_path, article_tex_name + ".cfg")
745 # Regular compilation: compiled_pdf and final_pdf are the same
746 # recompilation of the front page: compiled_pdf is the entire pdf with the new front page
747 # final_pdf is the pdf after the merge (new front page; old content)
748 compiled_pdf_file_name = final_pdf_file_name = os.path.join(
749 article_path, article_tex_name + ".pdf"
750 )
751 sav_pdf_file_name = compiled_pdf_file_name + "_SAV"
753 # Save the pdf file
754 cmd = f"ssh {user}@mathdoc-tex cp {compiled_pdf_file_name} {sav_pdf_file_name}"
755 utils.execute_cmd(cmd)
757 # Save the cfg file (no cfg for translations)
758 if lang == "":
759 cmd = f"ssh {user}@mathdoc-tex cp {article_cfg_file_name} {article_cfg_file_name}_SAV"
760 utils.execute_cmd(cmd)
762 # create temporarly file ! attention sur ptf-tools apache n'a pas le droit d'écrire ds /tmp ?!.
763 prefix = os.path.join(settings.LOG_DIR, "tmp/")
764 resolver.create_folder(prefix)
766 if replace_frontpage_only and skip_compilation:
767 # We want to update the front page without compiling the tex:
768 # We copy the original <article_tex_name>.PDF to <article_tex_name>_FP.PDF
770 article_tex_name2 = article_tex_name + "_FP"
771 new_compiled_pdf_file_name = os.path.join(article_path, article_tex_name2 + ".pdf")
772 cmd = f"ssh {user}@mathdoc-tex cp {compiled_pdf_file_name} {new_compiled_pdf_file_name}"
773 utils.execute_cmd(cmd)
774 compiled_pdf_file_name = new_compiled_pdf_file_name
776 elif replace_frontpage_only:
777 # Copy CFG/TEX/PDF to a new name. pdflatex will generate new files, thus preserving existing files
778 article_tex_name2 = article_tex_name + "_FP"
780 cmd = f"ssh {user}@mathdoc-tex rm -f {os.path.join(article_path, article_tex_name2)}.*"
781 utils.execute_cmd(cmd)
783 article_tex_file_name = os.path.join(article_path, article_tex_name + ".tex")
784 article_tex_file_name2 = os.path.join(article_path, article_tex_name2 + ".tex")
785 cmd = f"ssh {user}@mathdoc-tex cp {article_tex_file_name} {article_tex_file_name2}"
786 utils.execute_cmd(cmd)
788 article_cfg_file_name2 = os.path.join(article_path, article_tex_name2 + ".cfg")
789 cmd = f"ssh {user}@mathdoc-tex cp {article_cfg_file_name} {article_cfg_file_name2}_SAV"
790 utils.execute_cmd(cmd)
792 article_cdrdoidates_file_name = os.path.join(
793 article_path, article_tex_name + ".cdrdoidates"
794 )
795 if os.path.isfile(article_cdrdoidates_file_name):
796 article_cdrdoidates_file_name2 = os.path.join(
797 article_path, article_tex_name2 + ".cdrdoidates"
798 )
799 cmd = f"ssh {user}@mathdoc-tex cp {article_cdrdoidates_file_name} {article_cdrdoidates_file_name2}"
800 utils.execute_cmd(cmd)
802 article_tex_name = article_tex_name2
803 article_cfg_file_name = os.path.join(article_path, article_tex_name + ".cfg")
804 compiled_pdf_file_name = os.path.join(article_path, article_tex_name + ".pdf")
805 final_pdf_file_name = compiled_pdf_file_name + ".new"
807 if not skip_compilation:
808 # Remove \ItIsPublished from the cfg file
809 if lang == "":
810 cmd = f'''ssh {user}@mathdoc-tex "sed 's/\\\\\\\\ItIsPublished//' {article_cfg_file_name}_SAV > {article_cfg_file_name}.1"'''
811 utils.execute_cmd(cmd)
812 cmd = f'''ssh {user}@mathdoc-tex "sed 's/\\\\\\\\gdef \\\\\\\\CDRpublished {{true}}//' {article_cfg_file_name}.1 > {article_cfg_file_name}"'''
813 utils.execute_cmd(cmd)
815 article_tex_file_name = os.path.join(article_path, article_tex_name + ".tex")
817 # Save the tex file
818 cmd = f"ssh {user}@mathdoc-tex cp {article_tex_file_name} {article_tex_file_name}_SAV"
819 utils.execute_cmd(cmd)
821 lines = read_tex_file(article_tex_file_name)
822 new_lines, bib_name = replace_dates_in_tex(
823 lines, article, colid, replace_frontpage_only, lang=lang
824 )
826 if bib_name and replace_frontpage_only:
827 convert_file_to_utf8(article_path, bib_name + ".bib", bib_name + "_FP.bib")
829 f = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", prefix=prefix, delete=False)
830 fpath = f.name # ex: /tmp/Rxsft
831 f.write("".join(new_lines))
832 f.close()
834 # copy to mersenne-tex
835 cmd = f"scp {fpath} {user}@mathdoc-tex:{article_tex_file_name}"
836 utils.execute_cmd(cmd)
837 # os.unlink(f.name)
839 # recompile article
840 ptf_tools_bin = os.path.join(settings.BASE_DIR, "bin")
841 # execute script to compile
842 cmd = f"ssh {user}@mathdoc-tex 'bash -s' -- < {ptf_tools_bin}/create_frontpage.sh {article_path} {article_tex_name}"
843 utils.execute_cmd(cmd)
845 # Protect the tex file with the 'published' option
846 new_lines = protect_tex(new_lines)
848 # create temporarly file ! attention sur ptf-tools apache n'a pas le droit d'écrire ds /tmp ?!.
849 prefix = os.path.join(settings.LOG_DIR, "tmp/")
850 f = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", prefix=prefix, delete=False)
851 fpath = f.name # ex: /tmp/Rxsft
852 f.write("".join(new_lines))
853 f.close()
855 # copy to mersenne-tex
856 cmd = f"scp {fpath} {user}@mathdoc-tex:{article_tex_file_name}"
857 utils.execute_cmd(cmd)
859 if replace_frontpage_only:
860 # At the point the PDF has been recompiled, possibly with a new template
861 # Use the 1st page of the new PDF with the other pages of the production PDF
862 datastream = article.datastream_set.filter(mimetype="application/pdf").get()
863 content_pdf_file_name = os.path.join(
864 settings.MERSENNE_PROD_DATA_FOLDER, datastream.location
865 )
867 replace_front_page(
868 article,
869 article_tex_name,
870 compiled_pdf_file_name,
871 content_pdf_file_name,
872 final_pdf_file_name,
873 )
875 # Copy PDF to MERSENNE_TEST_DATA_FOLDER
876 datastream = article.datastream_set.filter(mimetype="application/pdf").get()
877 to_path = os.path.join(settings.MERSENNE_TEST_DATA_FOLDER, datastream.location)
878 # remove destination if exists to test if final pdf is really created
879 if os.path.exists(to_path):
880 os.remove(to_path)
881 utils.linearize_pdf(final_pdf_file_name, to_path)
883 # if not replace_frontpage_only:
884 # # Add EXIF metadata in the final PDF (replace_front_page already does it)
885 # add_metadata(article, to_path, to_path)
888def create_frontpage(
889 colid,
890 container,
891 updated_articles,
892 test=True,
893 replace_frontpage_only=False,
894 skip_compilation=False,
895 lang="",
896):
897 # create frontpage by recompiling articles on mersenne-tex with date XXXX-XX-XX
898 # flow :
899 # get directory of article sources : cedram_dev/production/ ..
900 # Add publication date in the source TeX
901 # remote execute latexmk -pdf article.pdf
902 # replace pdf of the article on mersenne_test_data
904 # TODO refactor the code and only use compile_tex for all collections
906 if colid == "PCJ":
907 for article in updated_articles:
908 lines = create_tex_for_pcj(article)
909 compile_tex(lines, article, update=True)
910 return
912 try:
913 year = int(container.year)
914 except ValueError:
915 year = 0
917 if (
918 colid in ["CRMATH", "CRMECA", "CRPHYS", "CRGEOS", "CRCHIM", "CRBIOL"]
919 and year < 2020
920 and lang == ""
921 ):
922 # No front page for Elsevier CRAS
923 return
925 issue_id = container.pid
927 issue_path = resolver.get_cedram_issue_tex_folder(colid, issue_id)
928 # non utilisé ?now = datetime.now().astimezone()
929 # non utilisé ? timestamp = now.strftime("%Y-%m-%d %H:%M:%S %Z")
931 try:
932 for article in updated_articles:
933 # article path
934 article_tex_name = article.get_ojs_id()
935 if not article_tex_name:
936 raise Exception(f"Article {article.pid} has no ojs-id -> cedram tex path")
937 article_path = os.path.join(issue_path, article_tex_name)
938 # non utilisé ? file_date = os.path.join(article_path, article_tex_name + '.ptf')
940 # publish_timestamp_file = os.path.join(article_path, article_tex_name + "-pdftimestamp.txt")
941 # onlinefirst_timestamp_file = os.path.join(article_path, article_tex_name + "-dateposted.txt")
943 # flow :
944 # - si on est en test ;
945 # date_pre_publish a été mis à jour mais pas les autres
946 # on ne crée une date temporaire type XXXX-XX-XX que pour online_first
947 # (car sinon lors de la mise en prod du online_first, la présence du fichier pdftimestamp
948 # avec XXXX-XX-XX ferait apparaitre cette date)
949 # - si article.my_container.with_online_first && et pas de article.date_online_first existe:
950 # : on met XXXX-xx-xx pour online first
951 # (si l'article a déjà une date online-first, il a à priori déjà était recompilé)
952 # - si on passe en prod, on prend les dates de l'article
953 # si container.with_online_first:
954 # article.date_online_first ds le bon fichier
955 # si article.date_published : on met à jour le fichier qui va bien
957 if not test and (article.date_online_first or article.date_published):
958 compile_article(
959 article,
960 colid,
961 issue_id,
962 article_path,
963 article_tex_name,
964 replace_frontpage_only,
965 skip_compilation,
966 lang,
967 )
969 except Exception as e:
970 # pas de rollback car on ne modifie rien en BDD / éventuellement remettre un pdf.SAV en place
971 raise e
974def create_translated_pdf(
975 article, xml_content, lang, pdf_file_name, html_file_name, skip_compilation=False
976):
977 user = settings.MERSENNE_TEX_USER
979 issue_path = resolver.get_cedram_issue_tex_folder(
980 article.get_top_collection().pid, article.my_container.pid
981 )
982 article_tex_name = article.get_ojs_id()
983 if not article_tex_name:
984 raise Exception(f"Article {article.pid} has no ojs-id -> cedram tex path")
985 article_path = os.path.join(issue_path, article_tex_name)
987 xml_base_name = article_tex_name + ".xml"
988 local_xml = os.path.join(settings.LOG_DIR, "tmp", xml_base_name)
989 remote_xml = os.path.join(article_path, xml_base_name)
991 if not skip_compilation:
992 # Create the XML file locally
993 with open(local_xml, "w", encoding="utf-8") as file_:
994 file_.write(xml_content)
996 # Copy XML file to mersenne-tex
997 cmd = f"scp {local_xml} {user}@mathdoc-tex:{remote_xml}"
998 utils.execute_cmd(cmd)
1000 remote_html_base_name = f"trad-{lang}.html"
1001 remote_html = os.path.join(article_path, remote_html_base_name)
1002 # Copy HTML file to mersenne-tex
1003 cmd = f"scp {html_file_name} {user}@mathdoc-tex:{remote_html}"
1004 utils.execute_cmd(cmd)
1006 # Create the PDF
1007 ptf_tools_bin = os.path.join(settings.BASE_DIR, "bin")
1008 # execute script to compile
1009 cmd = f"ssh {user}@mathdoc-tex 'bash -s' -- < {ptf_tools_bin}/translate_article.sh {article_path} {xml_base_name} {remote_html_base_name} {lang}"
1010 utils.execute_cmd(cmd)
1012 remote_pdf_base_name = f"{article_tex_name}-{lang}.pdf"
1013 remote_pdf = os.path.join(article_path, remote_pdf_base_name)
1014 # pdf-traduction should have created remote.pdf
1015 # Copy the PDF file
1016 cmd = f"scp {user}@mathdoc-tex:{remote_pdf} {pdf_file_name}"
1017 utils.execute_cmd(cmd)