Coverage for src/ptf/cmds/solr_cmds.py: 84%
468 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-05 09:56 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-05 09:56 +0000
1import pysolr
3from django.conf import settings
5from ptf.cmds.base_cmds import baseCmd
6from ptf.cmds.base_cmds import make_int
7from ptf.display import resolver
8from ptf.site_register import SITE_REGISTER
9from ptf.solr import search_helpers
10from ptf.utils import get_display_name
12# Not used so far.
13# nlm2solr use normalize-space for volume and volume-series,
14# but make_int is called to convert into int: spaces are also trimmed
15# def normalize_whitespace(str):
16# import re
17# str = str.strip()
18# str = re.sub(r'\s+', ' ', str)
19# return str
22class solrFactory:
23 solr = None
24 solr_url = None
26 @staticmethod
27 def get_solr():
28 if solrFactory.solr is None:
29 if solrFactory.solr_url is None:
30 solrFactory.solr_url = settings.SOLR_URL
31 solrFactory.solr = pysolr.Solr(solrFactory.solr_url, timeout=10)
32 return solrFactory.solr
34 @staticmethod
35 def do_solr_commit():
36 if hasattr(settings, "IGNORE_SOLR") and settings.IGNORE_SOLR: 36 ↛ 37line 36 didn't jump to line 37 because the condition on line 36 was never true
37 return
39 solr = solrFactory.get_solr()
40 solr.commit()
42 @staticmethod
43 def do_solr_rollback():
44 if hasattr(settings, "IGNORE_SOLR") and settings.IGNORE_SOLR: 44 ↛ 45line 44 didn't jump to line 45 because the condition on line 44 was never true
45 return
47 solr = solrFactory.get_solr()
48 msg = "<rollback />"
49 solr._update(msg)
51 @staticmethod
52 def reset():
53 if solrFactory.solr: 53 ↛ 55line 53 didn't jump to line 55 because the condition on line 53 was always true
54 solrFactory.solr.get_session().close()
55 solrFactory.solr = None
58def solr_add_contributors_to_data(contributors, data):
59 if contributors is not None: 59 ↛ exitline 59 didn't return from function 'solr_add_contributors_to_data' because the condition on line 59 was always true
60 author_names = []
61 ar = []
62 aul = []
64 for c in contributors:
65 if c["role"] in ["author", "editor", "translator"]: 65 ↛ 64line 65 didn't jump to line 64 because the condition on line 65 was always true
66 display_name = get_display_name(
67 "", c["first_name"], c["last_name"], "", c["string_name"]
68 )
69 ref_name = c["mid"] if c["mid"] else display_name
71 if ref_name: 71 ↛ 73line 71 didn't jump to line 73 because the condition on line 71 was always true
72 ar.append(ref_name)
73 if display_name: 73 ↛ 75line 73 didn't jump to line 75 because the condition on line 73 was always true
74 author_names.append(display_name)
75 if c["last_name"]:
76 aul.append(c["last_name"])
78 data["au"] = "; ".join(author_names)
79 # auteurs de references
80 data["ar"] = ar
81 # Surnames / Lastnames / Nom de famille
82 data["aul"] = aul
84 if author_names:
85 data["fau"] = author_names[0]
88def solr_add_kwds_to_data(kwds, data):
89 data["kwd"] = ", ".join(
90 [kwd["value"] for kwd in kwds if kwd["type"] != "msc" and kwd["lang"] == "fr"]
91 )
92 data["trans_kwd"] = ", ".join(
93 [kwd["value"] for kwd in kwds if kwd["type"] != "msc" and kwd["lang"] != "fr"]
94 )
95 data["msc"] = [kwd["value"].upper() for kwd in kwds if kwd["type"] == "msc"]
98#####################################################################
99#
100# solrCmd: base class for Solr commands
101#
102######################################################################
103class solrCmd(baseCmd):
104 def __init__(self, params={}):
105 super().__init__(params)
107 def do(self, parent=None):
108 if hasattr(settings, "IGNORE_SOLR") and settings.IGNORE_SOLR: 108 ↛ 109line 108 didn't jump to line 109 because the condition on line 108 was never true
109 return None
111 return super().do(parent)
113 def post_do(self, resource=None):
114 super().post_do(resource)
116 def undo(self):
117 if hasattr(settings, "IGNORE_SOLR") and settings.IGNORE_SOLR: 117 ↛ 118line 117 didn't jump to line 118 because the condition on line 117 was never true
118 return None
120 return super().undo()
123#####################################################################
124#
125# solrDeleteCmd: generic to delete Solr documents, based on a query
126#
127######################################################################
128class solrDeleteCmd(solrCmd):
129 def __init__(self, params={}):
130 self.commit = True
131 self.q = None
133 super().__init__(params)
135 self.required_params.extend(["q"])
137 def internal_do(self):
138 super().internal_do()
140 solrFactory.get_solr().delete(q=self.q, commit=self.commit)
142 return None
145#####################################################################
146#
147# solrAddCmd: base class for Solr Add commands
148#
149######################################################################
150class solrAddCmd(solrCmd):
151 def __init__(self, params={}):
152 self.commit = True
153 self.db_obj = None
154 self.id = None
155 self.pid = None
156 self.data = {}
158 super().__init__(params)
160 self.required_params.extend(["id", "pid"])
161 self.required_delete_params.extend(["id"])
163 def pre_do(self):
164 super().pre_do()
166 self.data["id"] = self.id
167 self.data["pid"] = self.pid
168 # parfois, lors d'erreur et/ou upload simultané, il y a plusieurs enregistrement pour un PID
169 # pour éviter d'avoir +sieurs résultats de recherche pour un PID, on supprime tout avant le internal_do
170 cmd = solrDeleteCmd({"q": "pid:" + self.pid})
171 cmd.do()
173 def internal_do(self):
174 super().internal_do()
176 datas = [self.data]
178 solrFactory.get_solr().add(docs=datas, commit=self.commit)
180 return None
182 def internal_undo(self):
183 id = super().internal_undo()
185 solrFactory.get_solr().delete(id=self.id, commit=self.commit)
187 return id
190#####################################################################
191#
192# addResourceSolrCmd: base class for solrAddCmds adding a Resource
193#
194######################################################################
195class addResourceSolrCmd(solrAddCmd):
196 def __init__(self, params={}):
197 self.xobj = None # model_data object
199 # fields of the xobj to pass to SolR
200 self.fields = [
201 "lang",
202 "doi",
203 "title_tex",
204 "title_html",
205 "trans_title_tex",
206 "trans_title_html",
207 "abstract_tex",
208 "abstract_html",
209 "trans_abstract_tex",
210 "trans_abstract_html",
211 "collection_title_tex",
212 "collection_title_html",
213 "collection_id",
214 "year",
215 "body",
216 "bibitem",
217 ]
219 # Used to filter the articles based on their site
220 self.sites = None
222 super().__init__(params)
224 self.required_params.extend(["xobj"])
226 def add_collection(self, collection):
227 self.data["collection_id"] = collection.id
229 if "collection_title_tex" not in self.data: 229 ↛ 232line 229 didn't jump to line 232 because the condition on line 229 was always true
230 self.data["collection_title_tex"] = [collection.title_tex]
231 else:
232 self.data["collection_title_tex"].append(collection.title_tex)
234 if "collection_title_html" not in self.data: 234 ↛ 237line 234 didn't jump to line 237 because the condition on line 234 was always true
235 self.data["collection_title_html"] = [collection.title_html]
236 else:
237 self.data["collection_title_html"].append(collection.title_html)
239 # classname is used only by PCJ for the article types
240 if collection.coltype == "journal":
241 self.data["dt"] = ["Article de revue"]
242 elif collection.coltype == "acta":
243 self.data["dt"] = ["Acte de séminaire"]
244 elif collection.coltype == "thesis": 244 ↛ 245line 244 didn't jump to line 245 because the condition on line 244 was never true
245 self.data["classname"] = "Thèse"
246 self.data["dt"] = ["Thèse"]
247 elif collection.coltype == "lecture-notes": 247 ↛ 248line 247 didn't jump to line 248 because the condition on line 247 was never true
248 self.data["classname"] = "Notes de cours"
249 self.data["dt"] = ["Notes de cours"]
250 elif collection.coltype == "proceeding": 250 ↛ 251line 250 didn't jump to line 251 because the condition on line 250 was never true
251 self.data["classname"] = "Acte de rencontre"
252 self.data["dt"] = ["Acte de rencontre"]
253 else:
254 self.data["classname"] = "Livre"
255 self.data["dt"] = ["Livre"]
257 def add_abstracts_to_data(self):
258 for abstract in self.xobj.abstracts:
259 lang = abstract["lang"]
261 for field_type in ["tex", "html"]:
262 abstract_field = "value_" + field_type
263 field_name = "abstract_" + field_type
264 if lang != "fr":
265 field_name = "trans_" + field_name
267 self.data[field_name] = abstract[abstract_field]
269 def add_year_to_data(self, year):
270 if year:
271 years = str(year).split("-")
272 if len(years) > 1:
273 self.data["year_facet"] = int(years[1])
274 else:
275 self.data["year_facet"] = int(year)
277 def pre_do(self):
278 super().pre_do()
280 for field in self.fields:
281 if hasattr(self.xobj, field):
282 self.data[field] = getattr(self.xobj, field)
284 self.add_abstracts_to_data()
285 solr_add_kwds_to_data(self.xobj.kwds, self.data)
286 solr_add_contributors_to_data(self.xobj.contributors, self.data)
288 if "dt" not in self.data: 288 ↛ 289line 288 didn't jump to line 289 because the condition on line 288 was never true
289 raise ValueError(f"add SolR resource without dt - {self.xobj.pid}")
291 # year either comes directly from xobj (container) or from set_container
292 self.add_year_to_data(self.data["year"])
294 if self.db_obj is not None:
295 solr_fields = {
296 "application/pdf": "pdf",
297 "image/x.djvu": "djvu",
298 "application/x-tex": "tex",
299 }
300 for stream in self.xobj.streams:
301 mimetype = stream["mimetype"]
302 if mimetype in solr_fields:
303 href = self.db_obj.get_binary_file_href_full_path(
304 "self", mimetype, stream["location"]
305 )
306 self.data[solr_fields[mimetype]] = href
308 if self.db_obj is not None:
309 self.data["wall"] = self.db_obj.get_wall()
311 if self.sites:
312 self.data["sites"] = self.sites
313 else:
314 self.data["sites"] = [settings.SITE_ID]
317#####################################################################
318#
319# addContainerSolrCmd: adds/remove a container (issue/book)
320#
321# A container needs a collection (collection_title_tex etc.)
322#
323######################################################################
324class addContainerSolrCmd(addResourceSolrCmd):
325 def __init__(self, params={}):
326 super().__init__(params)
328 self.fields.extend(["ctype"])
329 # self.data["dt"] = ["Livre"]
331 def pre_do(self):
332 super().pre_do()
334 for field in ["volume", "number", "vseries"]:
335 if hasattr(self.xobj, field):
336 self.data["volume"] = make_int(getattr(self.xobj, field))
338 if hasattr(self.xobj, "incollection") and len(self.xobj.incollection) > 0:
339 incol = self.xobj.incollection[0]
340 self.data["vseries"] = make_int(incol.vseries)
341 self.data["volume"] = 0
342 self.data["number"] = make_int(incol.volume)
344 # if incol.coltype == "theses":
345 # self.data["dt"] = ["Thèse"]
348#####################################################################
349#
350# addArticleSolrCmd: adds/remove an article
351#
352# an article needs a container (container_id) that needs a collection (collection_id)
353#
354######################################################################
357class addArticleSolrCmd(addResourceSolrCmd):
358 def __init__(self, params={}):
359 super().__init__(params)
361 self.fields.extend(
362 ["page_range", "container_id", "volume", "number", "vseries", "article_number"]
363 )
364 # self.data["dt"] = ["Article"]
366 def set_container(self, container):
367 self.data["container_id"] = container.id
368 self.data["year"] = container.year
369 self.data["vseries"] = make_int(container.vseries)
370 self.data["volume"] = make_int(container.volume)
371 self.data["number"] = make_int(container.number)
373 def set_eprint(self, eprint):
374 self.data["dt"].append("e-print")
376 def set_source(self, source):
377 pass
379 def set_thesis(self, thesis):
380 self.data["dt"].append("thesis")
382 def set_original_article(self, article):
383 # TODO Replace some data (ie doi, pid) with the original article
384 pass
386 def pre_do(self):
387 super().pre_do()
389 self.data["classname"] = resolver.ARTICLE_TYPES.get(
390 self.xobj.atype, "Article de recherche"
391 )
393 self.data["page_range"] = ""
394 if not self.xobj.page_range:
395 self.data["page_range"] = "p. "
396 if self.xobj.fpage is not None: 396 ↛ 398line 396 didn't jump to line 398 because the condition on line 396 was always true
397 self.data["page_range"] += self.xobj.fpage
398 if self.xobj.fpage and self.xobj.lpage:
399 self.data["page_range"] += "-"
400 if self.xobj.lpage is not None: 400 ↛ exitline 400 didn't return from function 'pre_do' because the condition on line 400 was always true
401 self.data["page_range"] += self.xobj.lpage
402 elif self.xobj.page_range[0] != "p": 402 ↛ exitline 402 didn't return from function 'pre_do' because the condition on line 402 was always true
403 self.data["page_range"] = "p. " + self.xobj.page_range
406#####################################################################
407#
408# addBookPartSolrCmd: adds/remove an book part (similar to an article)
409#
410# a book part needs a collection id (array)
411#
412######################################################################
413class addBookPartSolrCmd(addResourceSolrCmd):
414 def __init__(self, params={}):
415 super().__init__(params)
417 self.fields.extend(
418 ["page_range", "container_title_tex", "container_title_html", "volume", "number"]
419 )
420 # self.data["dt"] = ["Chapitre de livre"]
422 def set_container(self, container):
423 self.data["container_id"] = container.id
424 self.data["year"] = container.year
425 self.data["volume"] = make_int(container.volume)
426 self.data["number"] = make_int(container.number)
427 self.data["container_title_tex"] = container.title_tex
428 self.data["container_title_html"] = container.title_html
430 def pre_do(self):
431 super().pre_do()
433 self.data["classname"] = "Chapitre de livre"
435 self.data["page_range"] = ""
436 if not self.xobj.page_range: 436 ↛ 444line 436 didn't jump to line 444 because the condition on line 436 was always true
437 self.data["page_range"] = "p. "
438 if self.xobj.fpage is not None: 438 ↛ 440line 438 didn't jump to line 440 because the condition on line 438 was always true
439 self.data["page_range"] += self.xobj.fpage
440 if self.xobj.fpage and self.xobj.lpage:
441 self.data["page_range"] += "-"
442 if self.xobj.lpage is not None: 442 ↛ exitline 442 didn't return from function 'pre_do' because the condition on line 442 was always true
443 self.data["page_range"] += self.xobj.lpage
444 elif self.xobj.page_range[0] != "p":
445 self.data["page_range"] = "p. " + self.xobj.page_range
448#####################################################################
449#
450# solrSearchCmd:
451#
452# called from ptf/views.py; SolrRequest(request, q, alias=alias,
453# site=site,
454# default={'sort': '-score'})
455#
456# Warning: As of July 2018, only 1 site id is stored in a SolR document
457# Although the SolR schema is already OK to store multiple sites ("sites" is an array)
458# no Solr commands have been written to add/remove sites
459# We only have add commands.
460# Search only works if the Solr instance is meant for individual or ALL sites
461#
462######################################################################
463class solrSearchCmd(solrCmd):
464 def __init__(self, params={}):
465 # self.q = '*:*'
466 self.q = ""
467 self.qs = None
468 self.filters = [] # TODO: implicit filters
469 self.start = None
470 self.rows = None
471 self.sort = "-score" # use ',' to specify multiple criteria
472 self.site = None
473 self.search_path = ""
475 super().__init__(params)
477 self.required_params.extend(["qs"])
479 def get_q(self, name, value, exclude, first, last):
480 if name == "all" and value == "*":
481 return "*:*"
483 if value == "*": 483 ↛ 484line 483 didn't jump to line 484 because the condition on line 483 was never true
484 value = ""
486 q = ""
487 if exclude:
488 q += "-"
490 if name == "date":
491 q += "year:[" + first + " TO " + last + "]"
493 else:
494 if name == "author":
495 q += "au:"
496 if name == "author_ref":
497 q += "ar:"
498 elif name == "title": 498 ↛ 499line 498 didn't jump to line 499 because the condition on line 498 was never true
499 q += "title_tex:"
500 elif name == "body":
501 q += "body:"
502 elif name == "references":
503 q += "bibitem:"
504 elif name == "abstract": 504 ↛ 505line 504 didn't jump to line 505 because the condition on line 504 was never true
505 q += "trans_abstract_tex:"
506 if len(value) > 0 and value[0] == '"' and value[-1] == '"':
507 q += value
508 elif name == "kwd": 508 ↛ 509line 508 didn't jump to line 509 because the condition on line 508 was never true
509 terms = value.split()
510 q += (
511 "(kwd:("
512 + " AND ".join(terms)
513 + ") OR trans_kwd:("
514 + " AND ".join(terms)
515 + "))"
516 )
517 else:
518 terms = value.split()
519 # new_terms = [ "*{}*".format(t for t in terms)]
520 q += "(" + " AND ".join(terms) + ")"
522 return q
524 def internal_do(self) -> search_helpers.SearchResults:
525 super().internal_do()
527 if settings.COLLECTION_PID == "CR":
528 cr_ids = ["CRMATH", "CRMECA", "CRPHYS", "CRCHIM", "CRGEOS", "CRBIOL"]
529 ids = [SITE_REGISTER[item.lower()]["site_id"] for item in cr_ids]
530 self.filters.append(f"sites:[{min(ids)} TO {max(ids)}]")
531 elif settings.COLLECTION_PID != "ALL":
532 self.filters.append(f"sites:{settings.SITE_ID}")
534 sort = "score desc"
535 if self.sort: 535 ↛ 548line 535 didn't jump to line 548 because the condition on line 535 was always true
536 sorts = []
537 sort_array = self.sort.split(",")
538 for spec in sort_array:
539 spec = spec.strip()
540 if spec[0] == "-": 540 ↛ 543line 540 didn't jump to line 543 because the condition on line 540 was always true
541 spec = f"{spec[1:]} desc"
542 else:
543 spec = f"{spec} asc"
544 sorts.append(spec)
545 sorts.append("year desc")
546 sort = ", ".join(sorts)
548 use_ar_facet = True
549 q = ""
550 qt = []
551 if self.qs: 551 ↛ 559line 551 didn't jump to line 559 because the condition on line 551 was always true
552 for qi in self.qs:
553 qt.append(qi["name"])
554 if qi["name"] == "author_ref":
555 use_ar_facet = False
556 if qi["value"] or qi["first"]: 556 ↛ 552line 556 didn't jump to line 552 because the condition on line 556 was always true
557 new_q = self.get_q(qi["name"], qi["value"], qi["not"], qi["first"], qi["last"])
558 q += new_q + " "
559 if q: 559 ↛ 562line 559 didn't jump to line 562 because the condition on line 559 was always true
560 self.q = q
562 facet_fields = ["collection_title_facet", "msc_facet", "dt", "year_facet"]
564 if use_ar_facet:
565 facet_fields.append("ar")
567 if settings.COLLECTION_PID == "CR":
568 facet_fields.append("sites")
569 elif settings.COLLECTION_PID == "PCJ": 569 ↛ 570line 569 didn't jump to line 570 because the condition on line 569 was never true
570 facet_fields.append("classname")
572 params = {
573 "q.op": "AND",
574 "sort": sort,
575 "facet.field": facet_fields,
576 # Decades are built manually because we allow the user to
577 # expand a decade and see individual years
578 "facet.range": "year_facet",
579 "f.year_facet.facet.range.start": 0,
580 "f.year_facet.facet.range.end": 3000,
581 "f.year_facet.facet.range.gap": 10,
582 "facet.mincount": 1,
583 "facet.limit": 100,
584 "facet.sort": "count",
585 # 'fl': '*,score', # pour debug
586 # 'debugQuery': 'true', # pour debug
587 "hl": "true",
588 # 'hl.fl': "*", -> par defaut, retourne les champs de qf
589 "hl.snippets": 1,
590 "hl.fragsize": 300,
591 "hl.simple.pre": "<strong>",
592 "hl.simple.post": "</strong>",
593 "defType": "edismax",
594 "tie": 0.1, # si on ne specifie pas, le score est egal au max des scores sur chaque champ : là on
595 # ajoute 0.1 x le score des autres champs
596 # "df": 'text', Not used with dismax queries
597 # We want to retrieve the highlights in both _tex ad _html.
598 # We need to specify the 2 in qf
599 "qf": [
600 "au^21",
601 "title_tex^13",
602 "title_html^13",
603 "trans_title_tex^13",
604 "trans_title_html^13",
605 "abstract_tex^8",
606 "trans_abstract_tex^8",
607 "kwd^5",
608 "trans_kwd^5",
609 "collection_title_html^3",
610 "collection_title_tex^3",
611 "body^2",
612 "bibitem",
613 ],
614 # field ar est multivalué dédié aux facettes
615 # field au est utilisé pour la recherche et pour l'affichage
616 # des resultats
617 }
619 if self.start: 619 ↛ 620line 619 didn't jump to line 620 because the condition on line 619 was never true
620 params["start"] = self.start
622 if self.rows:
623 params["rows"] = self.rows
625 if self.filters:
626 params["fq"] = self.filters
628 solr_results = solrFactory.get_solr().search(self.q, facet="true", **params)
630 search_results = search_helpers.SearchResults(
631 solr_results, self.search_path, self.filters, qt, use_ar_facet
632 )
634 return search_results
637#####################################################################
638#
639# solrInternalSearchCmd:
640#
641# called from ptf/views.py/book by author
642#
643######################################################################
644class solrInternalSearchCmd(solrCmd):
645 def __init__(self, params={}):
646 self.q = "*:*"
647 self.qs = None
648 self.filters = [] # TODO: implicit filters
649 self.start = None
650 self.rows = None
651 self.sort = None # '-score' # use ',' to specify multiple criteria
652 self.site = None
653 self.search_path = ""
654 self.facet_fields = []
655 self.facet_limit = 100
656 self.fl = None
657 self.create_facets = True
658 # 10/03/2023 - UNUSED
659 self.related_articles = False
661 super().__init__(params)
663 self.required_params.extend(["q"])
665 def internal_do(self) -> search_helpers.SearchInternalResults | pysolr.Results:
666 super().internal_do()
668 # 10/03/2023 - UNUSED
669 if self.site: 669 ↛ 670line 669 didn't jump to line 670 because the condition on line 669 was never true
670 self.fq.append(f"sites:{self.site}")
672 the_facet_fields = []
673 use_year_facet = False
674 for field in self.facet_fields:
675 if field == "firstLetter":
676 the_facet_fields.append("{!ex=firstletter}firstNameFacetLetter")
677 elif field == "author_facet":
678 the_facet_fields.append("ar")
679 else:
680 the_facet_fields.append(field)
682 if field == "year_facet":
683 use_year_facet = True
685 # 10/03/2023 - UNUSED
686 if self.related_articles: 686 ↛ 687line 686 didn't jump to line 687
687 params = {
688 "q.op": "OR",
689 "hl": "true",
690 "hl.fl": "title_tex, trans_title_tex, trans_kwd, kwd",
691 "hl.snippets": 1,
692 "hl.fragsize": 0,
693 "hl.simple.pre": "<strong>",
694 "hl.simple.post": "</strong>",
695 # "hl.method": "unified"
696 }
697 else:
698 params = {
699 "q.op": "AND",
700 # 'fl': '*,score', # pour debug
701 # 'debugQuery': 'true', # pour debug
702 "facet.field": the_facet_fields,
703 # ["{!ex=firstletter}firstNameFacetLetter", 'year_facet', 'collection_title_facet'],
704 "facet.mincount": 1,
705 "facet.limit": self.facet_limit,
706 "facet.sort": "index",
707 }
709 if use_year_facet: 709 ↛ 721line 709 didn't jump to line 721 because the condition on line 709 was always true
710 # Decades are built manually because we allow the user to expand a
711 # decade and see individual years
712 params.update(
713 {
714 "facet.range": "year_facet",
715 "f.year_facet.facet.range.start": 0,
716 "f.year_facet.facet.range.end": 3000,
717 "f.year_facet.facet.range.gap": 10,
718 }
719 )
721 if self.sort: 721 ↛ 724line 721 didn't jump to line 724 because the condition on line 721 was always true
722 params["sort"] = self.sort
724 if self.start: 724 ↛ 725line 724 didn't jump to line 725 because the condition on line 724 was never true
725 params["start"] = self.start
727 if self.rows: 727 ↛ 730line 727 didn't jump to line 730 because the condition on line 727 was always true
728 params["rows"] = self.rows
730 if self.filters:
731 params["fq"] = self.filters
733 if self.fl: 733 ↛ 734line 733 didn't jump to line 734 because the condition on line 733 was never true
734 params["fl"] = self.fl
736 solr_results = solrFactory.get_solr().search(self.q, facet="true", **params)
737 results = solr_results
739 if self.create_facets: 739 ↛ 744line 739 didn't jump to line 744 because the condition on line 739 was always true
740 results = search_helpers.SearchInternalResults(
741 solr_results, self.search_path, self.filters, self.facet_fields
742 )
744 return results
747#####################################################################
748#
749# solrGetDocumentByPidCmd:
750#
751#
752######################################################################
755class solrGetDocumentByPidCmd(solrCmd):
756 def __init__(self, params={}):
757 self.pid = None
759 super().__init__(params)
761 self.required_params.extend(["pid"])
763 def internal_do(self):
764 super().internal_do()
766 result = None
768 search = "pid:" + self.pid
769 results = solrFactory.get_solr().search(search)
771 if results is not None: 771 ↛ 777line 771 didn't jump to line 777 because the condition on line 771 was always true
772 docs = results.docs
774 if docs:
775 result = docs[0]
777 return result
780class updateResourceSolrCmd(solrAddCmd):
781 """ """
783 def __init__(self, params=None):
784 self.resource = None
786 super().__init__(params)
787 self.params = params
789 def set_resource(self, resource):
790 self.resource = resource
791 self.id = resource.id
792 self.pid = resource.pid
794 def pre_do(self):
795 doc = solrGetDocumentByPidCmd({"pid": self.pid}).do()
796 if doc:
797 self.data = {**doc, **self.params}
798 if "_version_" in self.data:
799 del self.data["_version_"]
800 if "contributors" in self.data:
801 solr_add_contributors_to_data(self.data["contributors"], self.data)
802 self.data.pop("contributors")
803 # if 'kwd_groups' in self.data:
804 # solr_add_kwd_groups_to_data(self.data['kwd_groups'], self.data)
805 # self.data.pop('kwd_groups')
806 super().pre_do()
809def research_more_like_this(article):
810 results = {"docs": []}
811 doc = solrGetDocumentByPidCmd({"pid": article.pid}).do()
812 if doc:
813 # fields = "au,kwd,trans_kwd,title_tex,trans_title_tex,abstract_tex,trans_abstract_tex,body"
814 fields = settings.MLT_FIELDS if hasattr(settings, "MLT_FIELDS") else "all"
815 boost = settings.MLT_BOOST if hasattr(settings, "MLT_BOOST") else "true"
816 min_score = 80 if boost == "true" else 40
817 min_score = settings.MLT_MIN_SCORE if hasattr(settings, "MLT_MIN_SCORE") else min_score
818 params = {"debugQuery": "true", "mlt.interestingTerms": "details"}
819 params.update({"mlt.boost": boost, "fl": "*,score"})
820 params.update({"mlt.minwl": 4, "mlt.maxwl": 100})
821 params.update({"mlt.mintf": 2, "mlt.mindf": 2})
822 params.update({"mlt.maxdfpct": 1, "mlt.maxqt": 50})
823 # params.update({"mlt.qf": "trans_kwd^90 title_tex^80 body^1.7"})
825 pid = article.pid.split("_")[0]
826 if pid[:2] == "CR":
827 # search suggested articles in all CR
828 params.update({"fq": r"pid:/CR.*/"})
829 else:
830 params.update({"fq": f"pid:/{pid}.*/"})
832 solr = solrFactory.get_solr()
833 similar = solr.more_like_this(q=f'id:{doc["id"]}', mltfl=fields, **params)
834 params.update({"q": f'id:{doc["id"]}', "mlt.fl": fields})
835 params.update({"min_score": min_score})
836 results["params"] = dict(sorted(params.items()))
837 results["docs"] = similar.docs
838 results["numFound"] = similar.raw_response["response"]["numFound"]
839 results["interestingTerms"] = similar.raw_response["interestingTerms"]
840 results["explain"] = similar.debug["explain"]
841 return results
844def is_excluded_suggested_article(title):
845 match = settings.MLT_EXCLUDED_TITLES if hasattr(settings, "MLT_EXCLUDED_TITLES") else []
846 start = (
847 settings.MLT_EXCLUDED_TITLES_START
848 if hasattr(settings, "MLT_EXCLUDED_TITLES_START")
849 else []
850 )
851 return title.startswith(tuple(start)) or title in match
854def auto_suggest_doi(suggest, article, results=None):
855 if not results: 855 ↛ 858line 855 didn't jump to line 858 because the condition on line 855 was always true
856 results = research_more_like_this(article)
858 if results and suggest.automatic_list: 858 ↛ 867line 858 didn't jump to line 867 because the condition on line 858 was always true
859 doi_list = []
860 for item in results["docs"][:3]: 860 ↛ 861line 860 didn't jump to line 861 because the loop on line 860 never started
861 if item["score"] > results["params"]["min_score"]:
862 doi = item.get("doi", "")
863 title = item.get("title_tex", "")
864 if doi not in doi_list and not is_excluded_suggested_article(title):
865 doi_list.append(doi)
866 suggest.doi_list = "\n".join(doi_list)
867 return results