Coverage for src/ptf/cmds/ptf_cmds/base_ptf_cmds.py: 58%
865 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-05 09:56 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-05 09:56 +0000
1import copy
2import datetime
3import json
4import os
5import subprocess
6import sys
8import lxml.etree as etree
9from PIL import Image
11from django.conf import settings
12from django.core.exceptions import ObjectDoesNotExist
13from django.template.loader import render_to_string
15from ptf import exceptions
16from ptf import model_helpers
17from ptf import utils
18from ptf.cmds.database_cmds import add_contributors
19from ptf.cmds.database_cmds import addArticleDatabaseCmd
20from ptf.cmds.database_cmds import addBibItemDatabaseCmd
21from ptf.cmds.database_cmds import addBibItemIdDatabaseCmd
22from ptf.cmds.database_cmds import addCollectionDatabaseCmd
23from ptf.cmds.database_cmds import addContainerDatabaseCmd
24from ptf.cmds.database_cmds import addDataStreamDatabaseCmd
25from ptf.cmds.database_cmds import addExtIdDatabaseCmd
26from ptf.cmds.database_cmds import addExtLinkDatabaseCmd
27from ptf.cmds.database_cmds import addFrontMatterDatabaseCmd
28from ptf.cmds.database_cmds import addProviderDatabaseCmd
29from ptf.cmds.database_cmds import addPublisherDatabaseCmd
30from ptf.cmds.database_cmds import addRelatedObjectDatabaseCmd
31from ptf.cmds.database_cmds import addRelationshipDatabaseCmd
32from ptf.cmds.database_cmds import addResourceCountDatabaseCmd
33from ptf.cmds.database_cmds import addResourceInSpecialIssueDatabaseCmd
34from ptf.cmds.database_cmds import addSiteDatabaseCmd
35from ptf.cmds.database_cmds import addSupplementaryMaterialDatabaseCmd
36from ptf.cmds.database_cmds import addXmlBaseDatabaseCmd
37from ptf.cmds.database_cmds import baseCmd
38from ptf.cmds.database_cmds import publishArticleDatabaseCmd
39from ptf.cmds.database_cmds import publishContainerDatabaseCmd
40from ptf.cmds.database_cmds import updateCollectionDatabaseCmd
41from ptf.cmds.database_cmds import updateExtLinkDatabaseCmd
42from ptf.cmds.database_cmds import updateResourceIdDatabaseCmd
43from ptf.cmds.solr_cmds import addArticleSolrCmd
44from ptf.cmds.solr_cmds import addBookPartSolrCmd
45from ptf.cmds.solr_cmds import addContainerSolrCmd
46from ptf.cmds.solr_cmds import solrFactory
47from ptf.cmds.solr_cmds import updateResourceSolrCmd
48from ptf.cmds.xml import xml_utils
49from ptf.display import resolver
50from ptf.model_data import PublisherData
51from ptf.models import ExtLink
52from ptf.models import Person
53from ptf.models import Relationship
56def myconverter(o):
57 if isinstance(o, datetime.datetime): 57 ↛ exitline 57 didn't return from function 'myconverter' because the condition on line 57 was always true
58 return o.__str__()
61def do_solr_commit():
62 solrFactory.do_solr_commit()
65def do_solr_rollback():
66 solrFactory.do_solr_rollback()
69#####################################################################
70#
71# addPtfCmd: base class of PtfCmds
72#
73# PtfCmds may have a cmd and a sub-cmd
74# The cmd is executed, and the id of the returned object is passed
75# to the sub-cmd before its execution.
76#
77# This allows to store an object in Django, get the django object id,
78# then store the corresponding document in Solr
79#
80######################################################################
83class addPtfCmd(baseCmd):
84 def __init__(self, params=None):
85 if params is not None and "solr_commit" in params:
86 params["commit"] = params["solr_commit"]
88 super().__init__(params)
89 self.required_delete_params.append("object_to_be_deleted")
91 self.cmd: baseCmd | None = None
92 self.sub_cmd = None
94 def internal_do(self):
95 obj = super().internal_do()
97 if self.cmd: 97 ↛ 100line 97 didn't jump to line 100 because the condition on line 97 was always true
98 obj = self.cmd.do()
100 if self.sub_cmd:
101 self.sub_cmd.db_obj = obj
102 self.sub_cmd.id = obj.id
103 self.sub_cmd.pid = obj.pid
105 # if hasattr(obj, "title_tex"):
106 # self.sub_cmd.title = obj.title_tex
108 self.sub_cmd.do()
109 # au cas d'un futur undo sur la cmd
110 self.set_object_to_be_deleted(obj)
111 return obj
113 def set_object_to_be_deleted(self, obj):
114 if obj is not None: 114 ↛ exitline 114 didn't return from function 'set_object_to_be_deleted' because the condition on line 114 was always true
115 self.object_to_be_deleted = obj
116 self.cmd.object_to_be_deleted = obj
118 def internal_undo(self):
119 id = super().internal_undo()
121 if self.cmd: 121 ↛ 124line 121 didn't jump to line 124 because the condition on line 121 was always true
122 id = self.cmd.undo()
124 if self.sub_cmd:
125 self.sub_cmd.id = id
126 self.sub_cmd.undo()
128 return id
131#####################################################################
132#
133# addSitePtfCmd: adds/remove a PtfSite
134# params: 'site_name', 'site_domain'
135#
136# Exception raised:
137# - ValueError if the init params are empty
138# - exceptions.ResourceExists during do if the site already exists
139# - exceptions.ResourceDoesNotExist during undo if the site does not exist
140# - RuntimeError during undo if resources are still published
141#
142######################################################################
143class addSitePtfCmd(addPtfCmd):
144 def __init__(self, params=None):
145 super().__init__(params)
147 self.cmd = addSiteDatabaseCmd(params)
150#####################################################################
151#
152# addProviderPtfCmd: adds/remove a Provider
153# params: 'name', 'pid_type', 'sid_type'
154#
155# Exception raised:
156# - ValueError if the init params are empty
157# - exceptions.ResourceExists during do if the provider already exists
158# - exceptions.ResourceDoesNotExist during undo if the provider does not exist
159#
160######################################################################
161class addProviderPtfCmd(addPtfCmd):
162 def __init__(self, params=None):
163 super().__init__(params)
165 self.cmd = addProviderDatabaseCmd(params)
168#####################################################################
169#
170# addXmlBasePtfCmd: adds/remove an XmlBase
171# XmlBase is the root URL of an ExtLink (ex: http://archive.numdam.org/article)
172# params: 'base'
173#
174# Exception raised:
175# - ValueError if the init params are empty
176# - exceptions.ResourceExists during do if the XmlBase already exists
177# - exceptions.ResourceDoesNotExist during undo if the XmlBase does not exist
178# - RuntimeError during undo if related extlinks or objects still exist
179#
180######################################################################
181class addXmlBasePtfCmd(addPtfCmd):
182 def __init__(self, params=None):
183 super().__init__(params)
185 self.cmd = addXmlBaseDatabaseCmd(params)
188#####################################################################
189#
190# addExtLinkPtfCmd: adds/remove an ExtLink
191# params: 'rel': 'website' or 'small_icon'
192# 'mimetype', 'location', 'metadata', 'seq'
193#
194# Needs a Resource object (required) and a XmlBase object (option)
195#
196# Exception raised:
197# - ValueError if the init params are empty
198# - exceptions.ResourceExists during do if the ExtLink already exists
199# - exceptions.ResourceDoesNotExist during undo if the ExtLink does not exist
200# - RuntimeError during undo if resources are still published
201#
202######################################################################
203class addExtLinkPtfCmd(addPtfCmd):
204 cmd: addExtLinkDatabaseCmd
206 def __init__(self, params=None):
207 super().__init__(params)
209 self.cmd = addExtLinkDatabaseCmd(params)
211 def set_resource(self, resource):
212 self.cmd.set_resource(resource)
214 def set_base(self, base):
215 self.cmd.set_base(base)
217 def pre_do(self):
218 super().pre_do()
220 if self.to_folder and self.location.find("file:") == 0: 220 ↛ 225line 220 didn't jump to line 225 because the condition on line 220 was never true
221 # import avec un full path de fichier (ex: Elsevier CRAS)
222 # 1. On copie le fichier
223 # 2. On met à jour le champs location pour utiliser l'arborescence PTF
224 # On fait ça dans le pre_do pour stocker un objet avec le champ location final
225 from_path = self.location[5:]
227 convert_image = False
228 extension = os.path.splitext(self.location)[1]
229 if extension == ".tif" or extension == ".tiff":
230 convert_image = True
231 extension = ".jpg"
233 resource = self.cmd.resource
234 relative_path = resource.pid + extension
235 new_location = os.path.join(resource.get_relative_folder(), relative_path)
236 to_path = os.path.join(self.to_folder, new_location)
238 dest_folder = os.path.dirname(to_path)
239 os.makedirs(dest_folder, exist_ok=True)
241 if convert_image:
242 im = Image.open(from_path)
243 im.thumbnail(im.size)
244 im.save(to_path, "JPEG", quality=100)
245 else:
246 resolver.copy_file(from_path, to_path)
248 self.location = new_location
249 self.cmd.location = new_location
252#####################################################################
253#
254# addExtIdPtfCmd: adds/remove an ExtId
255# params: 'id_type', 'id_value'
256#
257# Needs a Resource object
258#
259# Exception raised:
260# - ValueError if the init params are empty
261# - exceptions.ResourceExists during do if the ExtId already exists
262# - exceptions.ResourceDoesNotExist during undo if the ExtId does not exist
263# - RuntimeError during undo if resources are still published
264#
265######################################################################
266class addExtIdPtfCmd(addPtfCmd):
267 def __init__(self, params=None):
268 super().__init__(params)
270 self.cmd = addExtIdDatabaseCmd(params)
272 def set_resource(self, resource):
273 self.cmd.set_resource(resource)
276#####################################################################
277#
278# addRelatedObjectPtfCmd: adds/remove a RelatedObject
279# params: 'rel':
280# 'mimetype', 'location', 'metadata', 'seq'
281#
282# Needs a Resource object and a XmlBase object
283#
284# Exception raised:
285# - ValueError if the init params are empty
286# - exceptions.ResourceExists during do if the RelatedObject already exists
287# - exceptions.ResourceDoesNotExist during undo if the RelatedObject does not exist
288# - RuntimeError during undo if resources are still published
289#
290######################################################################
291class addRelatedObjectPtfCmd(addPtfCmd):
292 def __init__(self, params=None):
293 super().__init__(params)
294 self.do_linearize = True
296 # need Resource to construct complete path
297 self.required_delete_params.append("resource")
299 self.cmd = addRelatedObjectDatabaseCmd(params)
301 def set_resource(self, resource):
302 self.resource = resource
303 self.cmd.set_resource(resource)
305 def set_base(self, base):
306 self.cmd.set_base(base)
308 def pre_do(self):
309 super().pre_do()
311 full_path_pos = self.location.find("file:")
312 if ( 312 ↛ 325line 312 didn't jump to line 325 because the condition on line 312 was never true
313 self.from_folder and self.to_folder and self.from_folder == settings.CEDRAM_TEX_FOLDER
314 ) or (self.to_folder and full_path_pos != -1):
315 # A. Import d'un XML Cedrics. Les champs location sont relatifs au from_folder.
316 # (contrairement à un import Cedrics transformé en JATS où les champs sont plus ou moins
317 # relatifs au to_folder)
318 # B. Autre possibilité: import avec un full path de fichier (ex: Elsevier CRAS)
319 # RelatedObject est utilisé pour les images des articles (HTML)
320 # Pour les images de couvertures des numéros, ce sont des ExtLink
321 # (voir addExtLinkPtfCmd)
322 # 1. On copie le fichier
323 # 2. On met à jour le champs location pour utiliser l'arborescence PTF
324 # On fait ça dans le pre_do pour stocker un objet avec le champ location final
325 location = self.location
326 if full_path_pos > -1:
327 from_path = location[full_path_pos + 5 :].replace(
328 "/ums_dev/numdam_dev", "/numdam_dev"
329 )
330 else:
331 from_path = os.path.join(self.from_folder, location)
333 convert_image = False
334 extension = os.path.splitext(from_path)[1]
335 resource = self.cmd.resource
337 if full_path_pos > -1 and extension in xml_utils.get_elsevier_image_extensions():
338 convert_image = True
339 extension = ".jpg"
341 if full_path_pos > 0:
342 relative_path = location[0:full_path_pos]
343 else:
344 i = location.find("/Attach/")
345 if i > 0:
346 relative_path = "a" + location[i + 2 :]
347 elif extension == ".tex":
348 relative_path = os.path.join("src/tex", resource.pid + extension)
349 elif extension == ".jpg":
350 basename = os.path.splitext(os.path.basename(from_path))[0]
351 relative_path = os.path.join("src/tex/figures", basename + extension)
352 elif hasattr(self, "supplementary_material") and self.supplementary_material:
353 # Supplements from Elsevier. They are declared with "file://"
354 # They need to be copied in attach/basename
355 relative_path = "attach/" + os.path.basename(from_path)
356 else:
357 relative_path = resource.pid + extension
359 new_location = os.path.join(resource.get_relative_folder(), relative_path)
360 to_path = os.path.join(self.to_folder, new_location)
362 dest_folder = os.path.dirname(to_path)
363 os.makedirs(dest_folder, exist_ok=True)
365 do_copy = True
366 # linearize_pdf directly create the to_path (ptf-tools only)
367 # there is no need to copy the file in that case
368 if extension.lower() == ".pdf" and self.do_linearize:
369 do_copy = utils.linearize_pdf(from_path, to_path)
370 if do_copy:
371 if convert_image:
372 im = Image.open(from_path)
373 size = 1000, 1000
374 im.thumbnail(size, Image.Resampling.LANCZOS)
375 im.save(to_path, "JPEG", quality=90)
376 else:
377 resolver.copy_file(from_path, to_path)
379 self.location = new_location
380 self.cmd.location = new_location
382 def post_do(self, obj):
383 super().post_do(obj)
384 # on est dans le cas où on veut récupérer depuis mathdoc_archive (sinon les fichiers sont copiés dans le pre_do)
385 if self.from_folder == settings.MATHDOC_ARCHIVE_FOLDER and self.to_folder: 385 ↛ 389line 385 didn't jump to line 389 because the condition on line 385 was never true
386 # on passe ds binary files pour profiter de la logique copy_binary_files qui copie aussi les ExtLink (icon, small-icon)
387 # sinon ces fichiers ne sont pas copiés -> soit icon dans DataStream ou peut-être créer une classe addBinaryFiles dont dépendraient ts les objects avec fichiers
388 # les couvertures ne sont pas dans les xml cedram donc pas de question à se poser dans ce cas
389 resolver.copy_binary_files(obj.resource, self.from_folder, self.to_folder)
391 def pre_undo(self):
392 super().pre_undo()
393 if self.to_folder: 393 ↛ 394line 393 didn't jump to line 394 because the condition on line 393 was never true
394 path = os.path.join(self.to_folder, self.object_to_be_deleted.location)
395 resolver.delete_file(path=path)
398#####################################################################
399#
400# addSupplementaryMaterialPtfCmd: adds/remove a Supplementary Material
401# params: 'rel':
402# 'mimetype', 'location', 'metadata', 'seq', 'caption'
403#
404# Needs a Resource object and a XmlBase object
405#
406# Exception raised:
407# - ValueError if the init params are empty
408# - exceptions.ResourceExists during do if the RelatedObject already exists
409# - exceptions.ResourceDoesNotExist during undo if the RelatedObject does not exist
410# - RuntimeError during undo if resources are still published
411#
412######################################################################
413class addSupplementaryMaterialPtfCmd(addRelatedObjectPtfCmd):
414 def __init__(self, params=None):
415 super().__init__(params)
416 self.cmd = addSupplementaryMaterialDatabaseCmd(params)
417 self.do_linearize = False
420#####################################################################
421#
422# addDataStreamPtfCmd: adds/remove a RelatedObject
423# params: 'rel':
424# 'mimetype', 'location', 'metadata', 'seq'
425#
426# Needs a Resource object and a XmlBase object
427#
428# Exception raised:
429# - ValueError if the init params are empty
430# - exceptions.ResourceExists during do if the DataStream already exists
431# - exceptions.ResourceDoesNotExist during undo if the DataStream does not exist
432# - RuntimeError during undo if resources are still published
433#
434######################################################################
435class addDataStreamPtfCmd(addRelatedObjectPtfCmd):
436 def __init__(self, params=None):
437 super().__init__(params)
438 self.cmd = addDataStreamDatabaseCmd(params)
441# #####################################################################
442# #
443# # addOrUpdateDataStreamPtfCmd: adds or Update a Datastream
444# # params: 'rel':
445# # 'mimetype', 'location', 'metadata', 'seq'
446# #
447# # if new location specify params: 'new_location'
448# # Needs a Resource object and a XmlBase object
449# #
450# # Exception raised:
451# # - ValueError if the init params are empty
452# # - RuntimeError during undo if resources are still published
453# #
454# ######################################################################
455# class addOrUpdateDataStreamPtfCmd(baseCmd):
456# def set_resource(self, resource):
457# self.resource = resource
458#
459# def internal_do(self):
460# super(addOrUpdateDataStreamPtfCmd, self).internal_do()
461# # copy new article pdf cedram_dev to mersenne_test_data
462# datastream_qs = DataStream.objects.filter(resource=self.resource,
463# base=self.base,
464# rel=self.rel,
465# location=self.location)
466#
467# cmd = addDataStreamPtfCmd({'rel':self.rel,
468# 'mimetype':self.mimetype,
469# 'location':self.location,
470# 'text':self.text,
471# 'seq':self.seq
472# })
473# cmd.set_base(self.base)
474# cmd.set_resource(self.resource)
475#
477# if datastream_qs.count() > 0:
478# cmd.set_object_to_be_deleted(datastream_qs.get())
479# cmd.undo()
480# cmd.set_params({'location': self.new_location})
481# cmd.do()
484#####################################################################
485#
486# addResourceCountPtfCmd: adds/remove a ResourceCount
487#
488# A ResourceCount is a generic count element.
489# Exemple: page count, table count, image count...
490#
491# params: 'name', 'value', 'seq'
492#
493# Needs a Resource object
494#
495# Exception raised:
496# - ValueError if the init params are empty
497# - exceptions.ResourceExists during do if the ResourceCount already exists
498# - exceptions.ResourceDoesNotExist during undo if the ResourceCount does not exist
499# - RuntimeError during undo if resources are still published
500#
501######################################################################
502class addResourceCountPtfCmd(addPtfCmd):
503 def __init__(self, params=None):
504 super().__init__(params)
506 self.cmd = addResourceCountDatabaseCmd(params)
508 def set_resource(self, resource):
509 self.cmd.set_resource(resource)
512#####################################################################
513#
514# addBibItemPtfCmd: adds/remove a BibItem
515#
516# No verification is done to check if a BibItem already exists
517# Rationale: BibItems are only added in a loop within an article.
518# The check is actually the existence of the article.
519#
520# Exception raised:
521# - ValueError if the init params are empty
522# - exceptions.ResourceDoesNotExist during undo if the BibItem does not exist
523# - RuntimeError during undo if resources are still published
524#
525######################################################################
526class addBibItemPtfCmd(addPtfCmd):
527 def __init__(self, params=None):
528 super().__init__(params)
530 self.cmd = addBibItemDatabaseCmd(params)
532 def set_resource(self, resource):
533 self.cmd.set_resource(resource)
536#####################################################################
537#
538# addBibItemIdPtfCmd: adds/remove a BibItemId
539#
540# No verification is done to check if a BibItemId already exists
541# Rationale: BibItems are only added inside an article/book
542# The check is actually the existence of the resource.
543#
544# Exception raised:
545# - ValueError if the init params are empty
546# - exceptions.ResourceDoesNotExist during undo if the BibItemId does not exist
547# - RuntimeError during undo if resources are still published
548#
549######################################################################
550class addBibItemIdPtfCmd(addPtfCmd):
551 def __init__(self, params=None):
552 super().__init__(params)
554 self.cmd = addBibItemIdDatabaseCmd(params)
556 def set_bibitem(self, bibitem):
557 self.cmd.set_bibitem(bibitem)
560#####################################################################
561#
562# addFrontMatterPtfCmd: adds/remove a FrontMatter
563#
564# No verification is done to check if a FrontMatter already exists
565# Rationale: FrontMatters are only added inside a book
566# The check is actually the existence of the book.
567#
568# Exception raised:
569# - ValueError if the init params are empty
570# - exceptions.ResourceDoesNotExist during undo if the FrontMatter does not exist
571# - RuntimeError during undo if resources are still published
572#
573######################################################################
574class addFrontMatterPtfCmd(addPtfCmd):
575 def __init__(self, params=None):
576 super().__init__(params)
578 self.cmd = addFrontMatterDatabaseCmd(params)
580 def set_resource(self, resource):
581 self.cmd.set_resource(resource)
584#####################################################################
585#
586# addRelationshipPtfCmd: adds/remove a Relationship
587#
588# Relationship relates 2 resources (ex: articles) with a relation. ex "follows", "followed-by"
589#
590# RelationName are created with a fixture (see app/ptf/apps/ptf/fixtures/initial_data.json
591# Example { "left" : "follows", "right" : "followed-by" }
592# A related-article of an article has 1 relation name (ex "follows" or "followed-by")
593# You need to know if the relation was stored in the left or right attribute of a RelationName,
594# so that you can create/search the Relationship with the correct object/subject.
595# Ex: with A "follows" B, A is the subject and B the object because "follows" is a RelationName.left attribute
596# with A "followed-by" B, A is the object the B the subject because "followed-by" is a RelationName.right attribute
597# A Relationship relates 2 resources with a RelationName
598#
599# Exception raised:
600# - ValueError if the init params are empty
601# - exceptions.ResourceExists during do if the Relationship already exists
602# - exceptions.ResourceDoesNotExist during undo if the Relationship does not exist
603# - RuntimeError during undo if resources are still published
604#
605######################################################################
606class addRelationshipPtfCmd(addPtfCmd):
607 def __init__(self, params=None):
608 super().__init__(params)
610 self.cmd = addRelationshipDatabaseCmd(params)
612 def set_subject_resource(self, resource):
613 self.cmd.set_subject_resource(resource)
615 def set_object_resource(self, resource):
616 self.cmd.set_object_resource(resource)
618 def set_relationname(self, relationname):
619 self.cmd.set_relationname(relationname)
622#####################################################################
623#
624# addPublisherPtfCmd: adds/remove a publisher
625# params: 'name', 'location'
626#
627# Exception raised:
628# - ValueError if the init params are empty
629# - exceptions.ResourceExists during do if the Publisher already exists
630# - exceptions.ResourceDoesNotExist during undo if the Publisher does not exist
631#
632######################################################################
633class addPublisherPtfCmd(addPtfCmd):
634 def __init__(self, params=None):
635 super().__init__(params)
637 self.cmd = addPublisherDatabaseCmd(params)
638 # self.sub_cmd = addPublisherSolrCmd(params)
641#####################################################################
642#
643# addResourcePtfCmd: adds/remove folder for a Resource
644#
645#
646# is responsible of creation/deletion of resource folders
647######################################################################
648class addResourcePtfCmd(addPtfCmd):
649 def post_do(self, obj):
650 super().post_do(obj)
651 # if self.from_folder and self.to_folder:
652 # # binary_files (PDF, images, TeX, Attach) are copied in the addRelatedObjectPtfCmd::pre_do
653 # # We only need to copy the html images
654 # resolver.copy_html_images(obj, from_folder=self.from_folder, to_folder=self.to_folder)
656 def pre_undo(self):
657 super().pre_undo()
658 if self.object_to_be_deleted and self.to_folder: 658 ↛ 659line 658 didn't jump to line 659 because the condition on line 658 was never true
659 resolver.delete_object_folder(
660 object_folder=self.object_to_be_deleted.get_relative_folder(),
661 to_folder=self.to_folder,
662 )
665#####################################################################
666#
667# addCollectionPtfCmd: adds/remove a journal
668# a Collection needs a Provider object
669#
670# params: 'coltype', 'title_xml', 'wall',
671# 'pid', 'sid',
672# 'title_tex', 'title_html',
673# 'other_ids' Ex. [ ('cedram-id','AFST'), ('issn', '0240-2963') ]
674#
675# Exception raised:
676# - ValueError if the init params are empty
677# - exceptions.ResourceExists during do if the Collection already exists
678# - exceptions.ResourceDoesNotExist during undo if the Collection does not exist
679#
680######################################################################
681class addCollectionPtfCmd(addResourcePtfCmd):
682 def __init__(self, params=None):
683 super().__init__(params)
685 self.cmd = addCollectionDatabaseCmd(params)
687 # self.sub_cmd = addCollectionSolrCmd(params)
689 def set_provider(self, provider):
690 self.cmd.set_provider(provider)
692 def set_parent(self, parent):
693 self.cmd.set_parent(parent)
696#####################################################################
697#
698# addContainerPtfCmd: adds/remove an issue
699# a Container needs a Collection (journal, book-series) that needs a Provider object
700#
701# params: 'year', 'vseries', 'volume', 'number'
702# 'doi','seq',
703#
704# (params common to Container/Article)
705# 'title_xml', 'title_tex', 'title_html', 'lang',
706# 'other_ids' Ex: [ ('cedram-id','AFST'), ('issn', '0240-2963') ]
707# 'abstracts' Ex: [ { 'tag': tag, 'lang': lang, 'value': value } ]
708# 'contributors' Ex: [ { 'first_name': 'John', "corresponding": True...}, ... ]
709# 'kwd_groups' Ex1: [ { 'content_type': content_type, 'lang': lang, 'value': value } ]
710# Ex2: # [ { 'content_type': content_type, 'lang': lang,
711# 'kwds': [ value1, value2,... ] } ]
712#
713# Exception raised:
714# - ValueError if the init params are empty
715# - exceptions.ResourceExists during do if the issue already exists
716# - exceptions.ResourceDoesNotExist during undo if the Container does not exist
717#
718######################################################################
719class addContainerPtfCmd(addResourcePtfCmd):
720 def __init__(self, params=None):
721 super().__init__(params)
722 self.required_params.extend(["xobj"])
724 self.cmd = addContainerDatabaseCmd(params)
725 if hasattr(self, "xobj") and (
726 self.xobj.ctype.startswith("book") or self.xobj.ctype == "lecture-notes"
727 ):
728 self.sub_cmd = addContainerSolrCmd(params)
730 self.article_ids = []
732 def add_collection(self, collection):
733 self.cmd.add_collection(collection)
734 if self.sub_cmd:
735 self.sub_cmd.add_collection(collection)
737 def set_publisher(self, publisher):
738 pass
740 # self.sub_cmd.publisher_id = publisher.id
742 def set_provider(self, provider):
743 self.cmd.set_provider(provider)
745 def pre_undo(self):
746 # To delete a container directly (cmd=addContainerPtfCmd({'pid':pid,'ctype':ctype}); cmd.undo() and
747 # associated set)
748 # you simply need to pass its pid AND ctype.
749 # addContainerPtfCmd is then responsible to remove the issue and its articles from the system
750 # Django automatically remove all objects related to the container (cascade)
751 # But we need to manually remove the articles of the container from SolR
752 # Store the article ids in pre_undo and delete the Solr articles in
753 # internal_undo
754 #
755 # addResourcePtfCmd is responsible to remove articles binary files from the system
757 super().pre_undo()
758 if self.object_to_be_deleted: 758 ↛ exitline 758 didn't return from function 'pre_undo' because the condition on line 758 was always true
759 for article in self.object_to_be_deleted.article_set.all():
760 self.article_ids.append(article.id)
762 # Exception to the Django cascade mecanism: Relationship.
763 # A Relationship links 2 articles.
764 # If an article is removed, Django automatically deletes the Relationship.
765 # It's not good, we want the relationship to remain, but the article field set to None
767 qs = Relationship.objects.filter(resource=article)
768 for r in qs:
769 if r.related is None:
770 r.delete()
771 else:
772 r.resource = None
773 r.save()
774 qs = Relationship.objects.filter(related=article)
775 for r in qs:
776 if r.resource is None:
777 r.delete()
778 else:
779 r.related = None
780 r.save()
782 def internal_undo(self):
783 for id in self.article_ids:
784 cmd = addArticleSolrCmd({"id": id, "solr_commit": False})
785 cmd.undo()
787 id = super().internal_undo()
788 return id
790 def post_undo(self):
791 super().post_undo()
793 Person.objects.clean()
796#####################################################################
797#
798# addArticlePtfCmd: adds/remove an article
799# an Article needs a Container that needs a Collection (Journal) that needs a Provider object
800#
801# params: fpage, lpage, doi, seq, atype (article type), page_range, elocation, article_number, talk_number
802#
803# pseq (parent seq)
804# related_article ?
805#
806# (params common to Container/Article)
807# 'title_xml', 'title_tex', 'title_html', 'lang',
808# 'other_ids' Ex: [ ('cedram-id','AFST'), ('issn', '0240-2963') ]
809# 'abstracts' Ex: [ { 'tag': tag, 'lang': lang, 'value': value } ]
810# 'contributors' Ex: [ { 'first_name': 'John', "corresponding": True...}, ... ]
811# 'kwd_groups' Ex1: [ { 'content_type': content_type, 'lang': lang, 'value': value } ]
812# Ex2: # [ { 'content_type': content_type, 'lang': lang,
813# 'kwds': [ value1, value2,... ] } ]
816#
817# Exception raised:
818# - ValueError if the init params are empty
819# - exceptions.ResourceExists during do if the article already exists
820# - exceptions.ResourceDoesNotExist during undo if the Article does not exist
821#
822######################################################################
823class addResourceInSpecialIssuePtfCmd(addResourcePtfCmd):
824 def __init__(self, params=None):
825 super().__init__(params)
826 self.cmd = addResourceInSpecialIssueDatabaseCmd(params)
829class addArticlePtfCmd(addResourcePtfCmd):
830 def __init__(self, params=None):
831 super().__init__(params)
832 self.cmd = addArticleDatabaseCmd(params)
834 # is_cr = False
835 # if (hasattr(settings, 'SITE_NAME') and len(settings.SITE_NAME) == 6 and settings.SITE_NAME[
836 # 0:2] == "cr"):
837 # is_cr = True
838 #
839 # to_appear = False
840 # if (params is not None and 'xobj' in params and
841 # hasattr(settings, 'ISSUE_TO_APPEAR_PID') and
842 # params['xobj'].pid.find(settings.ISSUE_TO_APPEAR_PID) == 0):
843 # to_appear = True
844 #
845 # # The articles to appear are not stored in the search engine.
846 # if is_cr or not to_appear:
847 self.sub_cmd = addArticleSolrCmd(params)
849 def set_container(self, container):
850 self.cmd.set_container(container)
851 if self.sub_cmd: 851 ↛ exitline 851 didn't return from function 'set_container' because the condition on line 851 was always true
852 self.sub_cmd.set_container(container)
854 def set_provider(self, provider):
855 self.cmd.set_provider(provider)
857 def set_eprint(self, eprint):
858 self.sub_cmd.set_eprint(eprint)
860 def set_source(self, source):
861 self.sub_cmd.set_source(source)
863 def set_thesis(self, thesis):
864 self.sub_cmd.set_thesis(thesis)
866 def add_collection(self, collection):
867 self.cmd.set_collection(collection)
869 if self.sub_cmd: 869 ↛ exitline 869 didn't return from function 'add_collection' because the condition on line 869 was always true
870 self.sub_cmd.add_collection(collection)
872 def post_do(self, article):
873 super().post_do(article)
874 for xtrans_article, trans_article in zip( 874 ↛ 877line 874 didn't jump to line 877 because the loop on line 874 never started
875 self.xobj.translations, self.cmd.translated_articles
876 ):
877 solr_xtrans_article = copy.deepcopy(xtrans_article)
878 solr_xtrans_article.trans_title_tex = self.xobj.title_tex
879 solr_xtrans_article.trans_title_html = self.xobj.title_html
880 if article.trans_lang == xtrans_article.lang:
881 if article.trans_title_tex:
882 solr_xtrans_article.title_tex = article.trans_title_tex
883 solr_xtrans_article.title_html = article.trans_title_html
884 for abstract in self.xobj.abstracts:
885 if abstract["tag"] == "abstract" and abstract["lang"] == xtrans_article.lang:
886 solr_xtrans_article.abstracts = [abstract]
888 sub_cmd = addArticleSolrCmd({"xobj": solr_xtrans_article})
889 sub_cmd.set_container(article.my_container)
890 sub_cmd.add_collection(article.get_collection())
891 sub_cmd.db_obj = trans_article
892 sub_cmd.id = trans_article.id
893 sub_cmd.pid = trans_article.pid
894 sub_cmd.do()
895 # xtrans_article.doi = doi_sav
897 def pre_undo(self):
898 super().pre_undo()
900 qs = Relationship.objects.filter(resource=self.object_to_be_deleted)
901 for r in qs:
902 if r.related is None:
903 r.delete()
904 else:
905 r.resource = None
906 r.save()
907 qs = Relationship.objects.filter(related=self.object_to_be_deleted)
908 for r in qs:
909 if r.resource is None:
910 r.delete()
911 else:
912 r.related = None
913 r.save()
915 def internal_undo(self):
916 if self.object_to_be_deleted: 916 ↛ 924line 916 didn't jump to line 924 because the condition on line 916 was always true
917 cmd = addArticleSolrCmd({"id": self.object_to_be_deleted.id, "solr_commit": False})
918 cmd.undo()
920 for trans_article in self.object_to_be_deleted.translations.all(): 920 ↛ 921line 920 didn't jump to line 921 because the loop on line 920 never started
921 cmd = addArticleSolrCmd({"id": trans_article.id, "solr_commit": False})
922 cmd.undo()
924 id = super().internal_undo()
925 return id
928#####################################################################
929#
930# addBookPartPtfCmd: adds/remove a book part
931#
932# TODO an Article is used to store a book part in the database. Why not use a JournalArticle in SolR ?
933#
934# params: 'year', 'fpage', 'lpage'
935# 'colid' Ex: [ 1,2 ]
936#
937# (params common to Book)
938# 'title_xml', 'title_tex', 'title_html', 'lang',
939# 'other_ids' Ex: [ ('cedram-id','AFST'), ('issn', '0240-2963') ]
940# 'ext_ids' Ex: [ ('zbl-item-id','0216.23901'), ('mr-item-id', '289322') ]
941# 'abstracts' Ex: [ { 'tag': tag, 'lang': lang, 'value': value } ]
942# 'contributors' Ex: [ { 'first_name': 'John', "corresponding": True...}, ... ]
943# 'kwd_groups' Ex1: [ { 'content_type': content_type, 'lang': lang, 'value': value } ]
944# Ex2: # [ { 'content_type': content_type, 'lang': lang,
945# 'kwds': [ value1, value2,... ] } ]
946# 'bibitem' Ex: ["1) Name - Title", "2) Name2 - Title2" ]
947#
948# Exception raised:
949# - ValueError if the init params are empty
950# - exceptions.ResourceExists during do if the book part already exists
951# - exceptions.ResourceDoesNotExist during undo if the BookPart does not exist
952#
953######################################################################
954class addBookPartPtfCmd(addResourcePtfCmd):
955 def __init__(self, params=None):
956 super().__init__(params)
958 self.cmd = addArticleDatabaseCmd(params)
959 self.sub_cmd = addBookPartSolrCmd(params)
961 def set_container(self, container):
962 self.cmd.set_container(container)
963 self.sub_cmd.set_container(container)
964 # 'colid' is used to find the collection of a book part
965 # TODO store the book_id as well ?
967 def add_collection(self, collection):
968 # manage collection MBK : only index the other collection
969 if collection.pid != "MBK": 969 ↛ exitline 969 didn't return from function 'add_collection' because the condition on line 969 was always true
970 self.sub_cmd.add_collection(collection)
973##########################################################################
974##########################################################################
975#
976# Update Commands
977#
978##########################################################################
979##########################################################################
982#####################################################################
983#
984# updateCollectionPtfCmd: updates a journal
985# a Collection needs a Provider object
986#
987# params: 'coltype', 'title_xml', 'wall',
988# 'pid', 'sid',
989# 'title_tex', 'title_html',
990# 'other_ids' Ex. [ ('cedram-id','AFST'), ('issn', '0240-2963') ]
991#
992# Exception raised:
993# - ValueError if the init params are empty
994# - exceptions.ResourceDoesNotExist during do if the Collection does not exist
995#
996######################################################################
997class updateCollectionPtfCmd(addPtfCmd):
998 def __init__(self, params=None):
999 super().__init__(params)
1001 self.cmd = updateCollectionDatabaseCmd(params)
1002 # self.sub_cmd = addCollectionSolrCmd(params)
1004 def set_provider(self, provider):
1005 self.cmd.set_provider(provider)
1007 def set_publisher(self, publisher):
1008 self.sub_cmd.set_publisher(publisher)
1011#####################################################################
1012#
1013# updateResourceIdPtfCmd: upates an existing ResourceId
1014# params: 'id_type': 'doi', 'issn', 'e-issn'
1015# 'id_value'
1016#
1017# Needs a Resource object (required)
1018#
1019# Exception raised:
1020# - ValueError if the init params are empty
1021# - exceptions.ResourceDoesNotExist during do if the ResourceId does not exist
1022#
1023######################################################################
1024class updateResourceIdPtfCmd(addPtfCmd):
1025 def __init__(self, params={}):
1026 super().__init__(params)
1028 self.cmd = updateResourceIdDatabaseCmd(params)
1030 def set_resource(self, resource):
1031 self.cmd.set_resource(resource)
1034#####################################################################
1035#
1036# updateExtLinkPtfCmd: upates an existing ExtLink
1037# params: 'rel': 'website' or 'small_icon'
1038# 'mimetype', 'location', 'metadata', 'seq'
1039#
1040# Needs a Resource object (required)
1041# TODO: update the related XmlBase object
1042#
1043# Exception raised:
1044# - ValueError if the init params are empty
1045# - exceptions.ResourceDoesNotExist during do if the ExtLink does not exist
1046#
1047######################################################################
1048class updateExtLinkPtfCmd(addPtfCmd):
1049 def __init__(self, params=None):
1050 super().__init__(params)
1052 self.cmd = updateExtLinkDatabaseCmd(params)
1054 def set_resource(self, resource):
1055 self.cmd.set_resource(resource)
1058class importExtraDataPtfCmd(baseCmd):
1059 """
1060 Restore additional info, such as checked/false_positive attributes on extid/bibitemid
1062 results: articles are updated
1063 """
1065 def __init__(self, params=None):
1066 self.pid = None
1067 self.import_folder = None
1069 super().__init__(params)
1071 self.required_params.extend(["pid", "import_folder"])
1073 def copy_file(self, filename, resource, from_pid):
1074 # on recupere potentiellement l'image positionnée via ptf-tools pour la resource
1075 # il faut renommer l'image car la logique est d'avoir une image avec pour nom pid.EXT
1076 # En cas de déplacement d'online first, from_pid peut être différent de resource.pid
1077 basename = os.path.basename(filename)
1078 extension = os.path.splitext(filename)[1]
1079 if (f"{from_pid}{extension}") == basename: 1079 ↛ 1080line 1079 didn't jump to line 1080 because the condition on line 1079 was never true
1080 new_basename = f"{resource.pid}{extension}"
1081 from_path = os.path.join(self.import_folder, filename)
1082 new_filename = os.path.join(resource.get_relative_folder(), new_basename)
1083 to_path = os.path.join(settings.MERSENNE_TEST_DATA_FOLDER, new_filename)
1084 resolver.copy_file(from_path, to_path)
1085 filename = new_filename
1086 return filename
1088 def import_article_extra_info(self, article, article_data):
1089 if article_data is None: 1089 ↛ 1090line 1089 didn't jump to line 1090 because the condition on line 1089 was never true
1090 return
1092 for extid_data in article_data["extids"]:
1093 model_helpers.add_or_update_extid(
1094 article,
1095 extid_data["type"],
1096 extid_data["value"],
1097 extid_data["checked"],
1098 extid_data["false_positive"],
1099 False,
1100 )
1102 for ref_data in article_data["references"]:
1103 bibitem = model_helpers.get_bibitem_by_seq(article, ref_data["seq"])
1104 if bibitem: 1104 ↛ 1102line 1104 didn't jump to line 1102 because the condition on line 1104 was always true
1105 for bibid_data in ref_data["bibids"]:
1106 model_helpers.add_or_update_bibitemid(
1107 bibitem,
1108 bibid_data["type"],
1109 bibid_data["value"],
1110 bibid_data["checked"],
1111 bibid_data["false_positive"],
1112 False,
1113 )
1115 if "date_published" in article_data: 1115 ↛ 1116line 1115 didn't jump to line 1116 because the condition on line 1115 was never true
1116 date = model_helpers.parse_date_str(article_data["date_published"])
1117 article.date_published = date
1118 article.save()
1120 if "date_pre_published" in article_data: 1120 ↛ 1121line 1120 didn't jump to line 1121 because the condition on line 1120 was never true
1121 date = model_helpers.parse_date_str(article_data["date_pre_published"])
1122 article.date_pre_published = date
1123 article.save()
1125 if "date_online_first" in article_data: 1125 ↛ 1126line 1125 didn't jump to line 1126 because the condition on line 1125 was never true
1126 date = model_helpers.parse_date_str(article_data["date_online_first"])
1127 article.date_online_first = date
1128 article.save()
1130 if "deployed_date" in article_data: 1130 ↛ 1131line 1130 didn't jump to line 1131 because the condition on line 1130 was never true
1131 date = model_helpers.parse_date_str(article_data["deployed_date"])
1132 ptfSite = model_helpers.get_site_mersenne(article.get_top_collection().pid)
1133 article.deploy(ptfSite, date)
1135 if "icon" in article_data:
1136 file = self.copy_file(article_data["icon"], article, article_data["pid"])
1137 cmd = addorUpdateExtLinkPtfCmd({"rel": "icon", "location": file})
1138 cmd.set_resource(article)
1139 cmd.do()
1141 if "show_body" in article_data: 1141 ↛ 1145line 1141 didn't jump to line 1145 because the condition on line 1141 was always true
1142 article.show_body = article_data["show_body"]
1143 article.save()
1145 if "do_not_publish" in article_data: 1145 ↛ 1149line 1145 didn't jump to line 1149 because the condition on line 1145 was always true
1146 article.do_not_publish = article_data["do_not_publish"]
1147 article.save()
1149 if ( 1149 ↛ 1154line 1149 didn't jump to line 1154
1150 settings.SITE_NAME == "ptf_tools"
1151 and "doi_status" in article_data
1152 and article_data["doi_status"] != 0
1153 ):
1154 if (
1155 article.pid == article_data["pid"]
1156 ): # on restreint aux articles qui ne changent pas de pid
1157 from mersenne_tools.models import DOIBatch
1158 from ptf_tools.doi import get_doibatch
1160 doib = get_doibatch(article)
1161 if not doib:
1162 doibatch = DOIBatch(
1163 resource=article,
1164 status=article_data["doi_status"],
1165 id=article_data["doibatch_id"],
1166 xml=article_data["doibatch_xml"],
1167 log="-- import --",
1168 )
1169 doibatch.save()
1171 def import_container_extra_info(self, container, data):
1172 ptfSite = model_helpers.get_site_mersenne(container.my_collection.pid)
1174 if "deployed_date" in data:
1175 date = model_helpers.parse_date_str(data["deployed_date"])
1176 container.deploy(ptfSite, date)
1178 if "icon" in data: 1178 ↛ 1179line 1178 didn't jump to line 1179 because the condition on line 1178 was never true
1179 file = self.copy_file(data["icon"], container, container.pid)
1180 cmd = addorUpdateExtLinkPtfCmd({"rel": "icon", "location": file})
1181 cmd.set_resource(container)
1182 cmd.do()
1184 for article_data in data["articles"]:
1185 article = None
1186 if article_data["doi"]: 1186 ↛ 1188line 1186 didn't jump to line 1188 because the condition on line 1186 was always true
1187 article = model_helpers.get_article_by_doi(article_data["doi"])
1188 if not article: 1188 ↛ 1189line 1188 didn't jump to line 1189 because the condition on line 1188 was never true
1189 article = model_helpers.get_article(article_data["pid"])
1190 if article: 1190 ↛ 1184line 1190 didn't jump to line 1184 because the condition on line 1190 was always true
1191 self.import_article_extra_info(article, article_data)
1193 def internal_do(self):
1194 super().internal_do()
1195 article_pid = None
1197 resource = model_helpers.get_resource(self.pid)
1198 if not resource: 1198 ↛ 1199line 1198 didn't jump to line 1199 because the condition on line 1198 was never true
1199 raise exceptions.ResourceDoesNotExist(f"Resource {self.pid} does not exist")
1201 obj = resource.cast()
1203 classname = obj.classname.lower()
1204 if classname == "article": 1204 ↛ 1205line 1204 didn't jump to line 1205 because the condition on line 1204 was never true
1205 article_pid = self.pid
1207 container = obj.get_container()
1208 container_pid = container.pid
1209 collection = container.my_collection
1211 file = resolver.get_archive_filename(
1212 self.import_folder, collection.pid, container_pid, "json", article_pid=article_pid
1213 )
1215 if os.path.exists(file): 1215 ↛ exitline 1215 didn't return from function 'internal_do' because the condition on line 1215 was always true
1216 with open(file, encoding="utf-8") as f:
1217 data = json.load(f)
1219 fct_name = f"import_{classname}_extra_info"
1220 ftor = getattr(self, fct_name, None)
1221 if callable(ftor): 1221 ↛ exitline 1221 didn't return from function 'internal_do' because the condition on line 1221 was always true
1222 ftor(obj, data)
1225#####################################################################
1226#
1227# addDjvuPtfCmd: add a Djvu to an existing issue
1228# Used when an issue is sent to Numdam by ptf-tools
1229#
1230# Needs a Resource object (required)
1231#
1232# Exception raised:
1233# - ValueError if the init params are empty
1234#
1235######################################################################
1236class addDjvuPtfCmd(baseCmd):
1237 def __init__(self, params={}):
1238 self.resource = None
1240 super().__init__(params)
1242 self.required_params.extend(["resource"])
1244 def set_resource(self, resource):
1245 self.resource = resource
1247 def convert_pdf_to_djvu(self):
1248 obj = self.resource.cast()
1249 qs = obj.datastream_set.filter(mimetype="image/x.djvu")
1250 if qs.count() == 0:
1251 qs = obj.datastream_set.filter(mimetype="application/pdf")
1252 if qs.count() != 0:
1253 datastream = qs.first()
1254 location = datastream.location.replace(".pdf", ".djvu")
1256 folder = settings.MERSENNE_PROD_DATA_FOLDER
1257 if (
1258 hasattr(settings, "NUMDAM_COLLECTIONS")
1259 and obj.my_container.my_collection.pid in settings.NUMDAM_COLLECTIONS
1260 ):
1261 folder = settings.MERSENNE_TEST_DATA_FOLDER
1263 # Create the djvu in MERSENNE_PROD_DATA_FOLDER (used to archive)
1264 djvu_filename = os.path.join(folder, location)
1266 if not os.path.isfile(djvu_filename):
1267 pdf_filename = os.path.join(folder, datastream.location)
1268 if not os.path.isfile(pdf_filename):
1269 pdf_filename = os.path.join(
1270 settings.MERSENNE_TEST_DATA_FOLDER, datastream.location
1271 )
1273 cmd_str = "pdf2djvu --quiet --dpi 600 --output {} {}".format(
1274 djvu_filename, pdf_filename
1275 )
1277 subprocess.check_output(cmd_str, shell=True)
1279 # Copy the new djvu in MERSENNE_TEST_DATA_FOLDER (used to deploy)
1280 djvu_filename_in_test = os.path.join(
1281 settings.MERSENNE_TEST_DATA_FOLDER, location
1282 )
1283 if djvu_filename_in_test != djvu_filename:
1284 resolver.copy_file(djvu_filename, djvu_filename_in_test)
1286 cmd = addDataStreamDatabaseCmd(
1287 {
1288 "rel": "full-text",
1289 "mimetype": "image/x.djvu",
1290 "location": location,
1291 "text": "Full (DJVU)",
1292 "seq": qs.count() + 1,
1293 }
1294 )
1295 cmd.set_resource(obj)
1296 cmd.do()
1298 if (
1299 not hasattr(obj, "ctype")
1300 or (hasattr(obj, "ctype") and obj.ctype.startswith("book"))
1301 or (hasattr(obj, "ctype") and obj.ctype == "lecture-notes")
1302 ):
1303 self.update_solr(obj, location)
1305 def update_solr(self, resource, djvu_location):
1306 params = {"djvu": djvu_location}
1307 cmd = updateResourceSolrCmd(params)
1308 cmd.set_resource(resource)
1309 cmd.do()
1311 # Convert the PDF in Djvu
1312 def internal_do(self):
1313 super().internal_do()
1315 self.convert_pdf_to_djvu()
1318#####################################################################
1319#
1320# addorUpdateContribsPtfCmd: update the list of contributions of a Resource
1321# Remove the existing contributions and replace with the new ones
1322#
1323# Needs a Resource object (required)
1324#
1325# Exception raised:
1326# - ValueError if the init params are empty
1327#
1328######################################################################
1329class addorUpdateContribsPtfCmd(baseCmd):
1330 def __init__(self, params={}):
1331 self.resource = None
1332 self.contributors = []
1334 super().__init__(params)
1336 self.required_params.extend(["resource"])
1338 def set_resource(self, resource):
1339 self.resource = resource
1341 def internal_do(self):
1342 super().internal_do()
1344 self.resource.contributions.all().delete()
1345 add_contributors(self.contributors, self.resource)
1347 cmd = updateResourceSolrCmd({"contributors": self.contributors})
1348 cmd.set_resource(self.resource)
1349 cmd.do()
1352#####################################################################
1353#
1354# addorUpdateKwdsPtfCmd: update the keywords of a Resource
1355# Remove the existing keywords and replace with the new ones
1356#
1357# Needs a Resource object (required)
1358#
1359# TODO: pass a list of kwd_groups instead of separate kwd_<lang> values
1360#
1361# Exception raised:
1362# - ValueError if the init params are empty
1363#
1364######################################################################
1365# class addorUpdateKwdsPtfCmd(baseCmd):
1366# def __init__(self, params={}):
1367# self.resource = None
1368# self.kwds_fr = None
1369# self.kwds_en = None
1370# self.kwd_uns_fr = None
1371# self.kwd_uns_en = None
1372#
1373# super(addorUpdateKwdsPtfCmd, self).__init__(params)
1374#
1375# self.required_params.extend(['resource'])
1376#
1377# def set_resource(self, resource):
1378# self.resource = resource
1379#
1380# def addOrUpdateKwds(self, kwd_uns, kwds, lang):
1381# kwds_groups_qs = self.resource.kwdgroup_set.filter(content_type='', lang=lang)
1382# if kwds_groups_qs.exists():
1383# # There is already a kwd_group.
1384# group = kwds_groups_qs.first()
1385# # First, delete all its kwds
1386# group.kwd_set.all().delete()
1387# group.delete()
1388#
1389# new_kwd_group = None
1390#
1391# if kwd_uns or kwds:
1392# new_kwd_group = {'content_type': '', 'lang': lang, 'kwds': kwds}
1393# if kwd_uns:
1394# new_kwd_group['value_tex'] = kwd_uns
1395# new_kwd_group['value_html'] = kwd_uns
1396# new_kwd_group[
1397# 'value_xml'] = '<unstructured-kwd-group xml:space="preserve">' + kwd_uns + '</unstructured-kwd-group>'
1398# else:
1399# # Build value_tex and value_html for display and SolR
1400# # But do not create value_xml: it is done by the XML export templates (OAI, PubMed)
1401# value = ''
1402# for kwd in kwds:
1403# if value:
1404# value += ', '
1405# value += kwd
1406# new_kwd_group['value_tex'] = value
1407# new_kwd_group['value_html'] = value
1408#
1409# addKwdGroup(new_kwd_group, self.resource)
1410#
1411# return new_kwd_group
1412#
1413# def internal_do(self):
1414# super(addorUpdateKwdsPtfCmd, self).internal_do()
1415#
1416# kwd_groups = []
1417# kwd_group = self.addOrUpdateKwds(self.kwd_uns_fr, self.kwds_fr, 'fr')
1418# if kwd_group:
1419# kwd_groups.append(kwd_group)
1420#
1421# kwd_group = self.addOrUpdateKwds(self.kwd_uns_en, self.kwds_en, 'en')
1422# if kwd_group:
1423# kwd_groups.append(kwd_group)
1424#
1425# cmd = updateResourceSolrCmd({'kwd_groups': kwd_groups})
1426# cmd.set_resource(self.resource)
1427# cmd.do()
1430#####################################################################
1431#
1432# addorUpdateExtLinkPtfCmd: update the list of contribs of a Resource
1433# Remove the existing contribs and replace with the new ones
1434#
1435# Needs a Resource object (required)
1436#
1437# Exception raised:
1438# - ValueError if the init params are empty
1439#
1440# TODO : les images de couv - les icon - sont stockées ici mais du coup ne profite pas DIRECTEMENT de la logique de copie de fichiers des RelatedObjects
1441######################################################################
1442class addorUpdateExtLinkPtfCmd(baseCmd):
1443 def __init__(self, params={}):
1444 self.resource = None
1445 self.location = None
1446 self.rel = None
1447 self.mimetype = ""
1449 super().__init__(params)
1451 self.required_params.extend(["resource", "rel"])
1453 def set_resource(self, resource):
1454 self.resource = resource
1456 def internal_do(self):
1457 super().internal_do()
1459 extlink_qs = ExtLink.objects.filter(resource=self.resource, rel=self.rel)
1461 if extlink_qs.exists(): 1461 ↛ 1462line 1461 didn't jump to line 1462 because the condition on line 1461 was never true
1462 extlink = extlink_qs.first()
1463 if self.location:
1464 extlink.location = self.location
1465 extlink.save()
1466 else:
1467 extlink.delete()
1468 elif self.location: 1468 ↛ exitline 1468 didn't return from function 'internal_do' because the condition on line 1468 was always true
1469 params = {
1470 "rel": self.rel,
1471 "mimetype": self.mimetype,
1472 "location": self.location,
1473 "seq": 1,
1474 "metadata": "",
1475 }
1477 cmd = addExtLinkPtfCmd(params)
1478 cmd.set_resource(self.resource)
1479 cmd.do()
1482#####################################################################
1483#
1484# updateArticlePtfCmd: update an existing Article
1485# Olivier: 12/06/2020. This function needs major refactoring.
1486# If page_count is not provided, it gets deleted.
1487# There should be a way to pass only attributes to edit
1488#
1489# Needs an Article object (required)
1490#
1491# Exception raised:
1492# - ValueError if the init params are empty
1493#
1494######################################################################
1495class updateArticlePtfCmd(baseCmd):
1496 def __init__(self, params={}):
1497 self.article = None
1498 self.title_xml = None
1499 self.title_html = None
1500 self.title_tex = None
1501 self.authors = None
1502 self.page_count = None
1503 self.use_page_count = True
1504 self.icon_location = None
1505 self.body = None
1506 self.body_tex = None
1507 self.body_html = None
1508 self.body_xml = None
1509 # self.use_kwds = None
1510 # self.kwds_fr = None
1511 # self.kwds_en = None
1512 # self.kwd_uns_fr = None
1513 # self.kwd_uns_en = None
1515 super().__init__(params)
1517 self.required_params.extend(["article"])
1519 def set_article(self, article):
1520 self.article = article
1522 def internal_do(self):
1523 super().internal_do()
1525 container = self.article.my_container
1526 collection = container.my_collection
1528 if self.title_tex and self.title_html and self.title_xml:
1529 self.article.title_tex = self.title_tex
1530 self.article.title_html = self.title_html
1531 self.article.title_xml = self.title_xml
1532 self.article.save()
1534 if self.body_xml or self.body_html or self.body_tex:
1535 self.article.body_tex = self.body_tex
1536 self.article.body_html = self.body_html
1537 self.article.body_xml = self.body_xml
1538 self.article.save()
1540 # Authors
1541 if self.authors:
1542 params = {"contributors": self.authors}
1543 cmd = addorUpdateContribsPtfCmd(params)
1544 cmd.set_resource(self.article)
1545 cmd.do()
1547 # Page count
1548 if self.use_page_count:
1549 qs = self.article.resourcecount_set.filter(name="page-count")
1550 if qs.exists():
1551 qs.first().delete()
1552 if self.page_count:
1553 seq = self.article.resourcecount_set.count() + 1
1554 params = {"name": "page-count", "value": self.page_count, "seq": seq}
1555 cmd = addResourceCountPtfCmd(params)
1556 cmd.set_resource(self.article)
1557 cmd.do()
1559 # Add a DataStream for the PDF
1560 qs = self.article.datastream_set.filter(mimetype="application/pdf")
1561 if not qs.exists():
1562 folder = resolver.get_relative_folder(collection.pid, container.pid, self.article.pid)
1563 location = os.path.join(folder, self.article.pid + ".pdf")
1564 params = {
1565 "rel": "full-text",
1566 "mimetype": "application/pdf",
1567 "location": location,
1568 "seq": self.article.datastream_set.count() + 1,
1569 "text": "Full (PDF)",
1570 }
1571 cmd = addDataStreamPtfCmd(params)
1572 cmd.set_resource(self.article)
1573 cmd.do()
1575 # image ajoutée via ptf-tools pour un article
1576 if self.icon_location:
1577 params = {"rel": "icon", "location": self.icon_location}
1578 cmd = addorUpdateExtLinkPtfCmd(params)
1579 cmd.set_resource(self.article)
1580 cmd.do()
1582 # Kwds
1583 # if self.use_kwds:
1584 # params = {'kwds_en': self.kwds_en, 'kwds_fr': self.kwds_fr,
1585 # 'kwd_uns_en': self.kwd_uns_en, 'kwd_uns_fr': self.kwd_uns_fr}
1586 # cmd = addorUpdateKwdsPtfCmd(params)
1587 # cmd.set_resource(self.article)
1588 # cmd.do()
1590 if self.body or self.title_tex:
1591 params = {}
1592 if self.body:
1593 params["body"] = self.body
1594 if self.title_tex and self.title_html:
1595 params["title_tex"] = self.title_tex
1596 params["title_html"] = self.title_html
1598 cmd = updateResourceSolrCmd(params)
1599 cmd.set_resource(self.article)
1600 cmd.do()
1603#####################################################################
1604#
1605# updateContainerPtfCmd: update an existing Container
1606#
1607# Needs a Container object (required)
1608#
1609# Exception raised:
1610# - ValueError if the init params are empty
1611#
1612######################################################################
1613class updateContainerPtfCmd(baseCmd):
1614 def __init__(self, params={}):
1615 self.resource = None
1616 self.icon_location = None
1618 super().__init__(params)
1620 self.required_params.extend(["resource"])
1622 def set_resource(self, resource):
1623 self.resource = resource
1625 def internal_do(self):
1626 super().internal_do()
1628 params = {"rel": "icon", "location": self.icon_location}
1629 cmd = addorUpdateExtLinkPtfCmd(params)
1630 cmd.set_resource(self.resource)
1631 cmd.do()
1634##########################################################################
1635##########################################################################
1636#
1637# Export Commands
1638#
1639##########################################################################
1640##########################################################################
1643class exportExtraDataPtfCmd(baseCmd):
1644 """
1645 Exports additional info, such as checked/false_positive attributes on extid/bibitemid
1647 force_pid is only used when the volume to be published becomes published
1648 Ex: AIF_0_0 becomes AIF_2018. We want to backup data in AIF_2018.json
1649 so that additional are restored when AIF_2018.xml is read
1651 export_all export all extids.
1652 If you want to archive, export_all should be False (checked extids are in the XML)
1653 If you want to store in a temp file (updateXML), then export_all should be True
1654 to preserve new extids found by the matching an not present in the XML
1656 if with_binary_files = True, copy in tempFolder, binary files set by ptf-tools ( extlink(rel='icon') )
1658 results: a json file on disk
1659 """
1661 def __init__(self, params=None):
1662 self.pid = None
1663 self.export_folder = None
1664 self.force_pid = None
1665 self.export_all = True
1666 self.with_binary_files = True
1668 super().__init__(params)
1670 self.required_params.extend(["pid", "export_folder"])
1672 def get_article_extra_info(self, article, export_all=False):
1673 data = None
1675 extids_data = []
1676 for extid in article.extid_set.all():
1677 extid_data = {}
1678 if export_all or not extid.checked or extid.false_positive: 1678 ↛ 1683line 1678 didn't jump to line 1683 because the condition on line 1678 was always true
1679 extid_data["type"] = extid.id_type
1680 extid_data["value"] = extid.id_value
1681 extid_data["checked"] = extid.checked
1682 extid_data["false_positive"] = extid.false_positive
1683 if extid_data: 1683 ↛ 1676line 1683 didn't jump to line 1676 because the condition on line 1683 was always true
1684 extids_data.append(extid_data)
1686 references_data = []
1687 for bib in article.bibitem_set.all():
1688 bibids_data = []
1689 for bibid in bib.bibitemid_set.all():
1690 bibid_data = {}
1691 if export_all or not bibid.checked or bibid.false_positive: 1691 ↛ 1697line 1691 didn't jump to line 1697 because the condition on line 1691 was always true
1692 bibid_data["type"] = bibid.id_type
1693 bibid_data["value"] = bibid.id_value
1694 bibid_data["checked"] = bibid.checked
1695 bibid_data["false_positive"] = bibid.false_positive
1697 if bibid_data: 1697 ↛ 1689line 1697 didn't jump to line 1689 because the condition on line 1697 was always true
1698 bibids_data.append(bibid_data)
1700 if bibids_data:
1701 references_data.append({"seq": bib.sequence, "bibids": bibids_data})
1703 icon = None
1705 for extlink in article.extlink_set.filter(rel="icon"):
1706 if self.with_binary_files is True: 1706 ↛ 1705line 1706 didn't jump to line 1705 because the condition on line 1706 was always true
1707 icon = extlink.location
1709 # copy des imgs associées via ptf-tools
1710 from_path = os.path.join(settings.RESOURCES_ROOT, extlink.location)
1711 to_path = os.path.join(self.export_folder, extlink.location)
1712 resolver.create_folder(os.path.dirname(to_path))
1713 resolver.copy_file(from_path, to_path)
1715 if (
1716 extids_data
1717 or references_data
1718 or article.date_published
1719 or article.date_online_first
1720 or icon
1721 ):
1722 data = {
1723 "pid": article.pid,
1724 "doi": article.doi,
1725 "extids": extids_data,
1726 "references": references_data,
1727 }
1729 if export_all and icon:
1730 data["icon"] = icon
1732 if export_all and article.date_published: 1732 ↛ 1733line 1732 didn't jump to line 1733 because the condition on line 1732 was never true
1733 data["date_published"] = article.date_published
1735 if export_all and article.date_pre_published: 1735 ↛ 1736line 1735 didn't jump to line 1736 because the condition on line 1735 was never true
1736 data["date_pre_published"] = article.date_pre_published
1738 if export_all and article.date_online_first: 1738 ↛ 1739line 1738 didn't jump to line 1739 because the condition on line 1738 was never true
1739 data["date_online_first"] = article.date_online_first
1741 if export_all: 1741 ↛ 1745line 1741 didn't jump to line 1745 because the condition on line 1741 was always true
1742 data["show_body"] = article.show_body
1743 data["do_not_publish"] = article.do_not_publish
1745 if ( 1745 ↛ 1750line 1745 didn't jump to line 1750
1746 export_all
1747 and settings.SITE_NAME == "ptf_tools"
1748 and not ((len(sys.argv) > 1 and sys.argv[1] == "test") or "pytest" in sys.modules)
1749 ):
1750 try:
1751 data["doi_status"] = article.doibatch.status
1752 data["doibatch_id"] = article.doibatch.id
1753 data["doibatch_xml"] = article.doibatch.xml
1754 except ObjectDoesNotExist:
1755 data["doi_status"] = 0
1757 return data
1759 def get_container_extra_info(self, container, export_all=False):
1760 result = {"pid": container.pid}
1762 collection = container.my_collection
1763 ptfSite = model_helpers.get_site_mersenne(collection.pid)
1765 if ptfSite and not self.force_pid:
1766 # si self.force_pid on est dans le cas où on passe un article de 0_0_0 vers issue final et dans ce cas là on ne conserve pas la deployed_date du 0_0_0
1767 deployed_date = container.deployed_date(ptfSite)
1768 if deployed_date:
1769 result["deployed_date"] = deployed_date
1771 icon = None
1772 for extlink in container.extlink_set.filter(rel="icon"): 1772 ↛ 1773line 1772 didn't jump to line 1773 because the loop on line 1772 never started
1773 icon = extlink.location
1774 if self.with_binary_files is True:
1775 # copy des imgs associées via ptf-tools
1776 from_path = os.path.join(settings.MERSENNE_TEST_DATA_FOLDER, extlink.location)
1777 to_path = os.path.join(self.export_folder, extlink.location)
1778 resolver.create_folder(os.path.dirname(to_path))
1779 resolver.copy_file(from_path, to_path)
1781 if export_all and icon: 1781 ↛ 1782line 1781 didn't jump to line 1782 because the condition on line 1781 was never true
1782 result["icon"] = icon
1784 articles_data = []
1785 for article in container.article_set.all():
1786 data = self.get_article_extra_info(article, export_all)
1787 if data:
1788 articles_data.append(data)
1790 result["articles"] = articles_data
1792 return result
1794 def internal_do(self):
1795 super().internal_do()
1796 article_pid = None
1798 resource = model_helpers.get_resource(self.pid)
1799 if not resource: 1799 ↛ 1800line 1799 didn't jump to line 1800 because the condition on line 1799 was never true
1800 raise exceptions.ResourceDoesNotExist(f"Resource {self.pid} does not exist")
1802 obj = resource.cast()
1804 classname = obj.classname.lower()
1805 if classname == "article": 1805 ↛ 1806line 1805 didn't jump to line 1806 because the condition on line 1805 was never true
1806 article_pid = self.pid
1808 container = obj.get_container()
1809 container_pid = self.force_pid if self.force_pid else container.pid
1810 collection = container.get_top_collection()
1812 fct_name = f"get_{classname}_extra_info"
1813 ftor = getattr(self, fct_name, None)
1814 data = ftor(obj, self.export_all)
1816 file = resolver.get_archive_filename(
1817 self.export_folder,
1818 collection.pid,
1819 container_pid,
1820 "json",
1821 do_create_folder=True,
1822 article_pid=article_pid,
1823 )
1825 with open(file, "w", encoding="utf-8") as f:
1826 json.dump(data, f, default=myconverter)
1829class exportPtfCmd(baseCmd):
1830 """
1831 Generate the Article/Container/Collection XML
1833 Write on disk if export_folder is given as parameter
1834 Copy binary files if with_binary_files = True
1835 results: unicode string
1836 """
1838 def __init__(self, params=None):
1839 self.pid = None
1840 self.with_body = True
1841 self.with_djvu = True # No djvu in Mersenne web sites
1842 self.article_standalone = False # PCJ editor sets to True
1844 # Export le json des données internes (false_ids...).
1845 # Il faut alors un self.export_folder
1846 self.with_internal_data = False
1848 # Copie des fichiers binaires (PDF...) and l'export_folder
1849 self.with_binary_files = False
1851 self.export_folder = None
1853 # Permet de contrôler le répertoire source des fichiers binaires
1854 self.binary_files_folder = settings.RESOURCES_ROOT
1856 # Ajouter des métadonnées internes (deployed_date) ou non dans le XML
1857 self.for_archive = False
1859 # Permet au final d'exclure les articles marqués comme étant à ne pas publier
1860 self.export_to_website = False
1862 # Le XML dans l'onglet export n'a pas toutes les métadonnées
1863 self.full_xml = True
1865 super().__init__(params)
1867 self.required_params.extend(["pid"])
1869 def internal_do(self):
1870 super().internal_do()
1872 # J'AI ENLEVÉ LE SITES=fALSE donc si ça plante checker l'import de la collection dans la bdd
1873 resource = model_helpers.get_resource(self.pid)
1874 if not resource: 1874 ↛ 1875line 1874 didn't jump to line 1875 because the condition on line 1874 was never true
1875 raise exceptions.ResourceDoesNotExist(f"Resource {self.pid} does not exist")
1877 obj = resource.cast()
1879 # export Book ? need a visitor ? see oai_helpers
1880 if obj.classname == "Article":
1881 template_name = "oai/common-article_eudml-article2.xml"
1882 item_name = "article"
1883 elif obj.classname == "Container": 1883 ↛ 1893line 1883 didn't jump to line 1893 because the condition on line 1883 was always true
1884 if obj.ctype == "issue" or obj.ctype == "issue_special": 1884 ↛ 1891line 1884 didn't jump to line 1891 because the condition on line 1884 was always true
1885 template_name = "oai/common-issue_eudml-article2.xml"
1886 item_name = "container"
1887 # elif obj.ctype == "issue_special":
1888 # template_name = "oai/special_issue.xml"
1889 # item_name = "container"
1890 else:
1891 template_name = "oai/book_bits.xml"
1892 item_name = "book"
1893 elif obj.classname == "Collection":
1894 template_name = "collection.xml"
1895 item_name = "collection"
1896 else:
1897 raise ValueError("Only articles, containers or collections can be exported")
1899 if self.export_folder and self.with_internal_data and obj.classname == "Container":
1900 params = {
1901 "pid": self.pid,
1902 "export_folder": self.export_folder,
1903 "with_binary_files": self.with_binary_files,
1904 }
1905 exportExtraDataPtfCmd(params).do()
1907 p = model_helpers.get_provider("mathdoc-id")
1908 for_export = not self.for_archive
1909 safetext_xml_body = render_to_string(
1910 template_name,
1911 {
1912 item_name: obj,
1913 "no_headers": True,
1914 "provider": p.name,
1915 "with_body": self.with_body,
1916 "with_djvu": self.with_djvu,
1917 "for_disk": True,
1918 "for_export": for_export,
1919 "full_xml": self.full_xml,
1920 "export_to_website": self.export_to_website,
1921 "article_standalone": self.article_standalone,
1922 },
1923 )
1924 xml_body = str(safetext_xml_body)
1925 if not self.full_xml: 1925 ↛ 1926line 1925 didn't jump to line 1926 because the condition on line 1925 was never true
1926 parser = etree.XMLParser(
1927 huge_tree=True,
1928 recover=True,
1929 remove_blank_text=False,
1930 remove_comments=True,
1931 resolve_entities=True,
1932 )
1933 tree = etree.fromstring(xml_body.encode("utf-8"), parser=parser)
1934 xml_body = etree.tostring(tree, pretty_print=True).decode("utf-8")
1936 if self.export_folder:
1937 if obj.classname == "Collection": 1937 ↛ 1939line 1937 didn't jump to line 1939 because the condition on line 1937 was never true
1938 # Export of a collection XML: we don't attempt to write in the top collection
1939 file = resolver.get_archive_filename(
1940 self.export_folder, obj.pid, None, "xml", True
1941 )
1942 with open(file, "w", encoding="utf-8") as f:
1943 f.write(xml_body)
1945 if self.with_binary_files:
1946 resolver.copy_binary_files(obj, self.binary_files_folder, self.export_folder)
1948 elif obj.classname == "Container": 1948 ↛ 1969line 1948 didn't jump to line 1969 because the condition on line 1948 was always true
1949 issue = obj
1950 collection = obj.get_top_collection()
1952 file = resolver.get_archive_filename(
1953 self.export_folder, collection.pid, issue.pid, "xml", True
1954 )
1956 with open(file, "w", encoding="utf-8") as f:
1957 f.write(xml_body)
1959 if self.with_binary_files: 1959 ↛ 1960line 1959 didn't jump to line 1960 because the condition on line 1959 was never true
1960 resolver.copy_binary_files(issue, self.binary_files_folder, self.export_folder)
1962 qs = issue.article_set.all()
1963 if self.for_archive:
1964 qs = qs.exclude(do_not_publish=True)
1965 for article in qs:
1966 resolver.copy_binary_files(
1967 article, self.binary_files_folder, self.export_folder
1968 )
1969 elif obj.classname == "Article":
1970 collection = obj.get_top_collection()
1971 file = resolver.get_archive_filename(
1972 self.export_folder, collection.pid, None, "xml", True
1973 )
1975 with open(file, "w", encoding="utf-8") as f:
1976 f.write(xml_body)
1978 return xml_body
1981class publishResourcePtfCmd(addPtfCmd):
1982 """
1983 Publish a container <=> Create a pub-date for all articles/book-parts of the container
1984 Publish an article <=> Create a pub-date
1985 """
1987 def __init__(self, params=None):
1988 self.params = params
1989 super().__init__(params)
1991 def set_resource(self, resource):
1992 obj = resource.cast()
1993 if obj.classname.find("Article") > -1: 1993 ↛ 1994line 1993 didn't jump to line 1994 because the condition on line 1993 was never true
1994 self.cmd = publishArticleDatabaseCmd(self.params)
1995 self.cmd.set_article(obj)
1996 else:
1997 self.cmd = publishContainerDatabaseCmd(self.params)
1998 self.cmd.set_container(obj)
2001def get_or_create_publisher(name):
2002 publisher = model_helpers.get_publisher(name)
2003 if publisher is None:
2004 publisher = PublisherData()
2005 publisher.name = name
2006 publisher = addPublisherPtfCmd({"xobj": publisher}).do()
2007 return publisher