Coverage for src/ptf/solr/search_helpers.py: 90%
422 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-05 09:56 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-05 09:56 +0000
1import datetime
2import re
3import string
4from urllib.parse import quote
5from urllib.parse import quote_plus
6from urllib.parse import urlparse
8from pysolr import Results
10from django.conf import settings
11from django.http import Http404
12from django.http import QueryDict
13from django.urls import reverse
14from django.utils import translation
16from ptf.display import resolver
17from ptf.site_register import SITE_REGISTER
20######################################################################
21# CLEANSEARCHURL
22######################################################################
23class CleanSearchURL:
24 """
25 CleanSearchURL: url like search /search/*-"ma collection"-qp
26 first part of the url is the path followed by criterias and last part is the encoding of criteria
27 criterias are split by CleanSearchURL.separator
28 """
30 separator = "-"
32 def __init__(self, base):
33 self._base = base
34 self._criteria = []
35 self._encoding = []
37 def append(self, criteria, type):
38 if len(self._criteria) != len(self._encoding):
39 raise
40 self._criteria.append(criteria)
41 self._encoding.append(type)
43 def remove(self, criteria, type):
44 """
45 9/03/2023 - UNUSED.
46 The class is used as an object only once in `pretty_search` method (helpers.py).
47 """
48 if len(self._criteria) != len(self._encoding):
49 raise
50 self._criteria.remove(criteria)
51 self._encoding.remove(type)
53 @staticmethod
54 def decode(clean_url: str, path="") -> tuple[str, QueryDict]:
55 """
56 decode a pretty search url encoded like :
57 search /1erterme-2erterme - 3 - Nom - 1986 -abpfg
58 path all + author + page + facetAuteur + facetDate +manière dont est encodée la requete
62 Attention : pour les recherche en NOT, la lettre est en majuscule
63 "q" : qti = all
64 "a" : qti = author
65 "b" : qti = titre
66 "c" : qti = date
67 "d" : first date/last date formulaire de recherche
68 "f" : bibliographie
69 "g" : plein texte
70 "k" : qti = keywords
71 "x" : qti = abstract (Résumé)
72 i-m : reservé pour la recherche en cas d'ajout de champs
73 "n": facet auteur
74 "o": facet year range
75 "p": facet collection
76 "r":firstletter
77 "s": facet type document
78 "t":page
79 "u": classname (article type)
80 "y": facet year
83 @param clean_url : critere(s) et encoding séparé par des - ( CleanSearchURL.separator )
84 @param path : chemin de la recherche car peut être : /search /thesis /series etc.
85 @return path, QueryDict: QueryDict : dict du type qt0: all, q0: 'larecherche' etc.
86 """
87 q_index = 0
88 my_dict = {
89 "q": ["q{}={}&qt{}=all", r".*"],
90 "a": ["q{}={}&qt{}=author", r"\D*"],
91 "b": ["q{}={}&qt{}=title", r".*"],
92 "c": ["q{}={}&qt{}=author_ref", r"\D*"],
93 "d": ["q-f-{}={}&q-l-{}={}&qt{}=date", r"\[(\d{4}|\*|) TO (\d{4}|\*|)\]"],
94 "f": ["q{}={}&qt{}=references", r".*"],
95 "g": ["q{}={}&qt{}=body", ".*"],
96 "k": ["q{}={}&qt{}=kwd", ".*"],
97 "m": ["f=msc_facet:{}", r".*"],
98 "n": ["f=ar:{}", r"\D*"],
99 "o": ["f=year_facet:{}", r"\[\d{4} TO \d{4}\]"],
100 "p": ["f=collection_title_facet:{}", r".*"],
101 # "r": ["f={{!tag=firstletter}}firstNameFacetLetter:{}", r'\"?[A-Z]{1}\"?'],
102 # FIXME : a supprimer après sûr que les " autour de firstLetter ne sont pas nécessaires
103 "r": ["f={{!tag=firstletter}}firstNameFacetLetter:{}", r"[A-Z]{1}"],
104 "s": ["f=dt:{}", r".*"],
105 "t": ["page={}", r"\d*"],
106 "u": ["f=classname:{}", r".*"],
107 "x": ["q{}={}&qt{}=abstract", r".*"],
108 "y": ["f=year_facet:{}", r"\"\d{4}\""],
109 }
111 criteria = []
112 chaine = ""
113 inQuote = False
114 # on itère sur clean_url pour séparer les critères en reconnaissant des chaînes de caractères protégées
115 # par des "
116 #
117 try:
118 for i in range(len(clean_url)):
119 c = clean_url[i]
120 # print(c)
121 if inQuote or c != CleanSearchURL.separator:
122 chaine += c
124 if (
125 c == '"'
126 and (i == 0 or clean_url[i - 1] == CleanSearchURL.separator)
127 and inQuote is False
128 ):
129 # Debut de critere entre quote
130 inQuote = True
131 elif c == '"' and clean_url[i + 1] == CleanSearchURL.separator and inQuote is True:
132 # Fin de critere entre quote
133 criteria.append(chaine)
134 inQuote = False
135 chaine = ""
136 elif (
137 c == CleanSearchURL.separator and inQuote is False and clean_url[i - 1] != '"'
138 ):
139 # Fin de critere sans quote et le critère n'était pas entouré de quote
140 criteria.append(chaine)
141 chaine = ""
143 criteria.append(chaine)
145 # encodage est le dernier critere
146 encoding = criteria[-1]
147 criteria = criteria[:-1]
149 encoding = list(encoding)
150 query = zip(encoding, criteria)
151 query_string = ""
152 # pour chaque critere, on crée la requête orientée solr associée
153 for encoding_key, criteria_value in query:
154 if criteria_value != "": 154 ↛ 153line 154 didn't jump to line 153 because the condition on line 154 was always true
155 # on test si le critere respecte la regexp associée
156 reg_str = my_dict[encoding_key.lower()]
157 p = re.compile(reg_str[1])
158 if p.match(criteria_value):
159 # criteria_value must be url encoded to pass to QueryDict
160 if encoding_key.lower() in ["d"]:
161 # on traite un intervalle de date
162 begin, end = criteria_value.strip("[]").split(" TO ")
163 query_string += "&" + my_dict[encoding_key.lower()][0].format(
164 q_index, begin, q_index, end, q_index
165 )
166 elif encoding_key.lower() in ["q", "a", "b", "c", "f", "k", "g", "x"]:
167 query_string += "&" + (
168 my_dict[encoding_key.lower()][0].format(
169 q_index, quote_plus(criteria_value), q_index
170 )
171 )
172 if encoding_key.lower() != encoding_key:
173 # on est dans le cas d'un NOT -> la clef est en
174 # majuscule
175 query_string += f"¬{q_index}=on"
176 q_index += 1
177 else:
178 query_string += "&" + (
179 my_dict[encoding_key.lower()][0].format(quote_plus(criteria_value))
180 )
182 querydict = QueryDict(query_string.encode("utf-8"))
183 return path, querydict
184 except Exception:
185 raise Http404()
187 @staticmethod
188 def encode(dict: QueryDict, path="") -> str:
189 """
190 encode QueryDict request in CleanURL
191 @param QueryDict: POST request from search form
192 @return: clean search absolute url
193 """
195 criteria = []
196 encoding = []
197 # a priori les filtres seront passés en GET uniquement
198 # filters = []
199 # filters = request.POST.getlist('f')
201 i = 0
202 qti = dict.get("qt" + str(i), None)
204 while qti:
205 qi = dict.get("q" + str(i), None)
206 if qti == "all":
207 criteria.append(qi)
208 encoding.append("q")
209 elif qti == "author":
210 criteria.append(qi)
211 encoding.append("a")
212 elif qti == "author_ref":
213 criteria.append(qi)
214 encoding.append("c")
215 elif qti == "title":
216 criteria.append(qi)
217 encoding.append("b")
218 elif qti == "date":
219 qfi = dict.get("q-f-" + str(i), "*")
220 qli = dict.get("q-l-" + str(i), "*")
221 criteria.append(f"[{qfi} TO {qli}]")
222 encoding.append("d")
223 elif qti == "references":
224 criteria.append(qi)
225 encoding.append("f")
226 elif qti == "body":
227 criteria.append(qi)
228 encoding.append("g")
229 elif qti == "kwd": 229 ↛ 232line 229 didn't jump to line 232 because the condition on line 229 was always true
230 criteria.append(qi)
231 encoding.append("k")
232 elif qti == "abstract":
233 criteria.append(qi)
234 encoding.append("x")
235 # if qti == 'author_ref':
236 # keep_qs_in_display = False
238 noti = dict.get("not" + str(i), None)
239 if noti == "on":
240 encoding[len(encoding) - 1] = encoding[len(encoding) - 1].upper()
242 i += 1
243 qti = dict.get("qt" + str(i), None)
245 # on traite les filtres
246 # "n": "f=ar:'{}'",
247 # "o": "f=year_facet:'{}'",
248 # "y": "f=year_facet:'{}'",
249 # "p": "f=collection_title_facet:'{}'",
250 # "r": "f={!tag=firstletter}firstNameFacetLetter:'{}'",
251 # "s": "f=dt:'{}'",
252 # "u": "f=classname:'{}'",
253 filters = dict.getlist("f")
254 for filter in filters:
255 key, value = filter.split(":", 1)
256 if key == "collection_title_facet":
257 criteria.append(value)
258 encoding.append("p")
259 elif key == "ar":
260 criteria.append(value)
261 encoding.append("n")
262 elif key == "year_facet":
263 criteria.append(value)
264 if value[0] == "[":
265 encoding.append("o")
266 else:
267 encoding.append("y")
268 elif key == "{!tag=firstletter}firstNameFacetLetter":
269 criteria.append(value)
270 encoding.append("r")
271 elif key == "dt":
272 criteria.append(value)
273 encoding.append("s")
274 elif key == "classname": 274 ↛ 276line 274 didn't jump to line 276 because the condition on line 274 was never true
275 # Used for article types
276 criteria.append(value)
277 encoding.append("u")
278 elif key == "msc_facet":
279 criteria.append(value)
280 encoding.append("m")
282 # on traite la pagination
283 # "t": "page={}"
284 page = dict.get("page")
285 if page:
286 criteria.append(page)
287 encoding.append("t")
289 if not criteria:
290 return path
291 for i in range(len(criteria)):
292 if criteria[i] and criteria[i][0] == '"' and criteria[i][-1] == '"': # critere protege
293 pass
294 elif CleanSearchURL.separator in criteria[i] or '"' in criteria[i]: 294 ↛ 295line 294 didn't jump to line 295 because the condition on line 294 was never true
295 criteria[i] = f'"{criteria[i]}"'
297 clean_url = "".join(
298 [
299 CleanSearchURL.separator.join(quote(item, "") for item in criteria),
300 CleanSearchURL.separator,
301 "".join(encoding),
302 ]
303 )
304 path = path.strip("/")
305 if path:
306 return "/" + path + "?" + clean_url
307 return clean_url
309 def to_href(self):
310 clean_url = (
311 self._base
312 + "/"
313 + self.separator.join(quote(item, "") for item in self._criteria)
314 + self.separator
315 + "".join(self._encoding)
316 )
317 return clean_url
320######################################################################
321# FACETS & SEARCH RESULTS
322######################################################################
323class Facet:
324 """
325 Facet: a filter that you can select to narrow your search
326 Example: "Journal article(25)" is a filter (of the colid_facets category)
328 properties:
329 name Ex: "Journal article"
330 count Ex: 25
331 active Ex: True (tells if the user has selected the filter)
332 href The url to set on the filter.
333 It concatenates the filters selected before
334 Ex: http://www.numdam.org/items/?q=au:(choquet)&f=dt:%22Journal%20article%22&f=year:1991
336 Facets are returned in the SearchResultsGenerator
337 """
339 def __init__(
340 self,
341 name: str,
342 count: int,
343 state: str,
344 filters: set[str] = set(),
345 path="/search/",
346 sub_facets=[],
347 ):
348 self.name = name
349 self.count = count
350 self.active = state
351 self.sub_facets = sub_facets
352 if filters:
353 query = "&f=".join([quote_plus(x) for x in filters])
354 # query est du type /search/?q=test&qt=all&f=....
355 query = f"{path}&f={query}"
356 else:
357 query = path
358 url = urlparse(query)
359 params = url.query
360 params = params.encode("utf-8")
361 dict = QueryDict(params, True, "utf-8")
362 href = CleanSearchURL.encode(dict, url.path)
363 if hasattr(settings, "SITE_URL_PREFIX"):
364 href = f"/{settings.SITE_URL_PREFIX}" + href
365 self.href = href
368def create_facets_in_category(
369 solr_results: Results,
370 category: str,
371 active_filters: set[str],
372 path: str,
373 sort=False,
374 reverse=False,
375) -> list[Facet]:
376 # Solr returns filters in a flat list
377 # Example: facet_fields : { "year": [ "1878",1,"1879",0,"1912",3,"1971",5] }
378 # To simplify the creation of Facet objects, we need a list of pairs: [ ('1878',1),('1879',2),...]
379 # To do so, we use
380 # 1) The python slice syntax on lists [start:stop:step]
381 # f[0::2] => [ '1878', '1879',...]
382 # f[1::2] => [1,2,...]
383 # 2) The python list comprehensions [ expression for ... if ... ]
384 # [f[i::2 for i in range(2)] => [ ['1878','1879',...], [1,2,...] ]
385 # 3) zip(*list) to unzip a list (see more details below)
386 # => [ ('1878',1), ('1879',2), ... ]
387 #
388 # zip(*list) <=> unzip. Why ?
389 # zip() pairs up the elements from all inputs
390 # zip( lista, listb, ... listz ) => ( (a1,b1,...z1), (a2,b2,...,z2), ..., (an,bn,...,zn) )
391 # The output is a tuple (unmutable list)
392 # To recreate the lista, listb, you can re-apply zip on the elements of the tuple.
393 # But you have to unpack the tuple first (to recreate multiple arguments)
394 # *(tuple) creates multiple (ai,bi,...zi) lists
395 # zip(*tuple) combines the list
396 # The output is ( (a1,a2,...,an), (b1,b2,...bn), ..., (z1,z2,...,zn) )
398 if category not in solr_results.facets["facet_fields"]:
399 return []
401 f = solr_results.facets["facet_fields"][category]
402 solr_facets = list(zip(*[f[i::2] for i in range(2)]))
404 if sort:
405 solr_facets = sorted(solr_facets, key=lambda x: x[0], reverse=reverse)
407 results = []
408 active_filters = active_filters.copy()
410 if category == "year_facet":
411 # Selecting a year facet clears the active year range facet (if any)
412 mylist = [v for v in active_filters if "year_facet:[" in v]
413 if mylist:
414 active_filters.remove(mylist[0])
416 if category == "ar":
417 my_list = [v for v in active_filters if "ar:" in v]
418 if my_list:
419 ar_active_filter = my_list[0]
420 else:
421 ar_active_filter = None
423 for facet_name, count in solr_facets:
424 this_filters = active_filters.copy()
425 v = '{}:"{}"'.format(category, facet_name.replace('"', '\\"'))
426 if category == "sites":
427 facet_name = [
428 SITE_REGISTER[key]["name"]
429 for key in SITE_REGISTER
430 if str(SITE_REGISTER[key]["site_id"]) == facet_name
431 ][0]
432 if v in active_filters:
433 this_filters.remove(v)
434 results.append(Facet(facet_name, count, "active", this_filters, path))
435 else:
436 # on n'autorise pas la multiple selection de facet auteur
437 if category == "ar" and ar_active_filter is not None:
438 this_filters.remove(ar_active_filter)
439 this_filters.add(v)
440 results.append(Facet(facet_name, count, "not-active", this_filters, path))
442 return results
445def create_year_range_facets(
446 solr_results: Results, year_facets: list[Facet], active_filters: set[str], path: str
447) -> list[Facet]:
448 gap = solr_results.facets["facet_ranges"]["year_facet"]["gap"]
449 f = solr_results.facets["facet_ranges"]["year_facet"]["counts"]
450 solr_facets = list(zip(*[f[i::2] for i in range(2)]))
452 solr_facets = sorted(solr_facets, key=lambda x: x[0], reverse=True)
454 results = []
456 now = datetime.datetime.now()
457 i = 0 # current year_facet index
458 year_facets_size = len(year_facets)
460 for facet_name, count in solr_facets:
461 start = facet_name
462 start_i = int(start)
463 end_i = int(facet_name) + gap - 1
464 end = str(end_i)
466 if end_i > now.year: 466 ↛ 467line 466 didn't jump to line 467 because the condition on line 466 was never true
467 end = str(now.year)
469 # year_facets become sub_facets of a year_range_facet
470 # We need to find the year_facets that are inside the year_range_facet
471 if i < year_facets_size: 471 ↛ 475line 471 didn't jump to line 475 because the condition on line 471 was always true
472 yf = year_facets[i]
473 year = int(yf.name)
475 sub_year_facets = []
476 this_filters = active_filters.copy()
478 while i < year_facets_size and year >= start_i:
479 sub_year_facets.append(yf)
481 # If we click on a year range facet, we clear the active year facet
482 # (if any)
483 v = 'year_facet:"' + yf.name + '"'
484 if v in active_filters: 484 ↛ 485line 484 didn't jump to line 485 because the condition on line 484 was never true
485 this_filters.remove(v)
487 i += 1
488 if i < year_facets_size:
489 yf = year_facets[i]
490 year = int(yf.name)
492 facet_name = facet_name + "-" + str(end)
493 v = "year_facet:[" + start + " TO " + end + "]"
495 if v in active_filters: 495 ↛ 496line 495 didn't jump to line 496 because the condition on line 495 was never true
496 this_filters.remove(v)
497 results.append(Facet(facet_name, count, "active", this_filters, path, sub_year_facets))
498 else:
499 this_filters.add(v)
500 results.append(
501 Facet(facet_name, count, "not-active", this_filters, path, sub_year_facets)
502 )
504 return results
507def create_facets(
508 solr_results: Results, path: str, filters: list[str], use_ar_facet=True
509) -> dict[str, list[Facet]]:
510 active_filters = set(filters)
512 atype_facets = create_facets_in_category(solr_results, "classname", active_filters, path)
513 author_facets = []
514 if use_ar_facet:
515 author_facets = create_facets_in_category(solr_results, "ar", active_filters, path)
516 dt_facets = create_facets_in_category(solr_results, "dt", active_filters, path)
517 msc_facets = create_facets_in_category(solr_results, "msc_facet", active_filters, path)
518 collection_facets = create_facets_in_category(
519 solr_results, "collection_title_facet", active_filters, path
520 )
521 sites_facets = create_facets_in_category(solr_results, "sites", active_filters, path)
523 year_facets = create_facets_in_category(
524 solr_results, "year_facet", active_filters, path, sort=True, reverse=True
525 )
526 if len(year_facets) == 1 and year_facets[0].active == "active": 526 ↛ 527line 526 didn't jump to line 527 because the condition on line 526 was never true
527 year_range_facets = year_facets
528 else:
529 year_range_facets = create_year_range_facets(
530 solr_results, year_facets, active_filters, path
531 )
533 return {
534 "author_facets": author_facets,
535 "msc_facets": msc_facets,
536 "year_range_facets": year_range_facets,
537 "dt_facets": dt_facets,
538 "atype_facets": atype_facets,
539 "collection_facets": collection_facets,
540 "sites_facets": sites_facets,
541 }
544class SearchResults:
545 """
546 Search results.
547 Hold data returned by Solr
548 Intermediary between solr_results and the Django template to display the results
549 """
551 # def __init__(self, solr_results, path, filters, sort): -> si activation
552 # du tri
554 def fix_truncated_value(self, value: str):
555 """
556 Highlighting may produce an HTML string truncated at the end.
557 To display the search keywords in bold, we add <strong> around them.
558 But we ask the template to display the highlight as |safe such that
559 unclosed HTML tags will damage the HTML page layout.
560 => fix_trunctated_value attempt to add missing HTML end tags.
562 9/03/2023 - This cannot work properly. We should use a parser or something
563 to correctly do this.
564 """
565 keywords = []
566 i = 0
567 quote = ""
568 while i < len(value):
569 if value[i] == '"':
570 if quote == '"':
571 quote = ""
572 else:
573 quote = '"'
574 elif value[i] == "'": 574 ↛ 575line 574 didn't jump to line 575 because the condition on line 574 was never true
575 if quote == "'":
576 quote = ""
577 else:
578 quote = "'"
580 keyword = ""
581 end_keyword = False
582 if not quote and value[i] == "<":
583 i += 1
585 if i < len(value) and value[i] == "/":
586 end_keyword = True
587 i += 1
589 while i < len(value) and value[i] != " " and value[i] != ">":
590 keyword += value[i]
591 i += 1
593 if keyword and end_keyword:
594 if len(keywords) > 0 and keywords[-1] == keyword: 594 ↛ 599line 594 didn't jump to line 599 because the condition on line 594 was always true
595 keywords.pop(-1)
596 elif keyword: 596 ↛ 599line 596 didn't jump to line 599 because the condition on line 596 was always true
597 keywords.append(keyword)
599 i += 1
601 if quote: 601 ↛ 602line 601 didn't jump to line 602 because the condition on line 601 was never true
602 value += quote
604 while len(keywords) > 0:
605 keyword = keywords.pop(-1)
606 value += "</" + keyword + ">"
608 return value
610 def __init__(
611 self,
612 solr_results: Results,
613 path: str,
614 filters: list[str],
615 qt: list[str],
616 use_ar_facet=True,
617 ):
618 self.facets = create_facets(solr_results, path, filters, use_ar_facet)
619 self.hits = solr_results.hits
620 self.docs = solr_results.docs
622 cur_language = translation.get_language()
623 preferred_highlight_keywords = [
624 "abstract_tex",
625 "trans_abstract_tex",
626 "kwd",
627 "trans_kwd",
628 "body",
629 "bibitem",
630 ]
631 if cur_language != "fr":
632 preferred_highlight_keywords = [
633 "trans_abstract_tex",
634 "abstract_tex",
635 "trans_kwd",
636 "kwd",
637 "body",
638 "bibitem",
639 ]
641 # References is bibitem
642 qt = [s.replace("references", "bibitem") for s in qt]
644 if any("kwd" in s for s in qt) and not any("trans_kwd" in s for s in qt):
645 qt.append("trans_kwd")
646 elif any("trans_kwd" in s for s in qt) and not any("kwd" in s for s in qt): 646 ↛ 647line 646 didn't jump to line 647 because the condition on line 646 was never true
647 qt.append("kwd")
649 if any("abstract_tex" in s for s in qt) and not any("trans_abstract_tex" in s for s in qt):
650 qt.append("trans_abstract_tex")
651 elif any("trans_abstract_tex" in s for s in qt) and not any( 651 ↛ 654line 651 didn't jump to line 654 because the condition on line 651 was never true
652 "abstract_tex" in s for s in qt
653 ):
654 qt.append("abstract_tex")
656 # We do not call the translation mechanism on a specific language
657 # try:
658 # translation.activate('en')
659 # text = translation.gettext(u"Résumé")
660 # finally:
661 # translation.activate(cur_language)
663 # We get the translation based on the current language
664 abstract_text = translation.gettext("Résumé")
665 reference_text = translation.gettext("Bibliographie")
666 keywords_text = translation.gettext("Mots clés")
667 fulltext_text = translation.gettext("Plein texte")
669 correspondance = {
670 "abstract_tex": abstract_text,
671 "trans_abstract_tex": abstract_text,
672 "kwd": keywords_text,
673 "trans_kwd": keywords_text,
674 "body": fulltext_text,
675 "bibitem": reference_text,
676 }
678 for index, doc in enumerate(self.docs):
679 id_doc = doc["id"]
680 doc["embargo"] = resolver.embargo(doc["wall"], doc["year"])
681 hl = solr_results.highlighting[id_doc]
682 for key in ["au", "year"]:
683 if key in hl:
684 the_hl = hl[key][0]
685 the_hl = the_hl.replace("<strong>", "")
686 the_hl = the_hl.replace("</strong>", "")
687 value = doc[key]
688 pos = value.find(the_hl)
689 if pos > -1: 689 ↛ 692line 689 didn't jump to line 692 because the condition on line 689 was always true
690 value = value.replace(the_hl, hl[key][0])
692 doc[key] = value
694 for key in ["collection_title_tex", "collection_title_html"]:
695 value = doc[key][0]
696 if key in hl:
697 the_hl = hl[key][0]
698 the_hl = the_hl.replace("<strong>", "")
699 the_hl = the_hl.replace("</strong>", "")
700 pos = value.find(the_hl)
701 if pos > -1: 701 ↛ 704line 701 didn't jump to line 704 because the condition on line 701 was always true
702 value = value.replace(the_hl, hl[key][0])
704 doc[key] = value
706 for key in preferred_highlight_keywords:
707 doc["highlighting"] = {}
708 doc["highlighting"]["value"] = ""
709 if key in hl and ("all" in qt or key in qt):
710 doc["highlighting"]["field"] = correspondance[key]
711 for value in hl[key]:
712 if key == "bibitem":
713 value = self.fix_truncated_value(value)
714 doc["highlighting"]["value"] = (
715 doc["highlighting"]["value"] + "... " + value + " ...<br>"
716 )
717 break
718 # TODO: on ne veut le hl sur bibitem voire plein text que
719 # si il n'y a que ca qui matche
720 if settings.SITE_NAME == "cr" and "sites" in doc and doc["sites"]:
721 site_id = doc["sites"][0]
722 site_domain = [
723 SITE_REGISTER[key]["site_domain"]
724 for key in SITE_REGISTER
725 if SITE_REGISTER[key]["site_id"] == site_id
726 ][0]
727 prefix = site_domain.split("/")[1]
728 if "doi" in doc: 728 ↛ 729line 728 didn't jump to line 729 because the condition on line 728 was never true
729 url = reverse("article", kwargs={"aid": doc["doi"]})
730 else:
731 url = reverse("item_id", kwargs={"pid": doc["pid"]})
732 doc_url = "/" + prefix + url
733 doc["item_url"] = doc_url
734 if doc["pdf"].find("/" + prefix) != 0: 734 ↛ 744line 734 didn't jump to line 744 because the condition on line 734 was always true
735 doc["pdf"] = "/" + prefix + doc["pdf"]
736 if "tex" in doc:
737 doc["tex"] = "/" + prefix + doc["tex"]
738 elif hasattr(settings, "SITE_URL_PREFIX"):
739 if doc["pdf"].find("/" + settings.SITE_URL_PREFIX) != 0: 739 ↛ 744line 739 didn't jump to line 744 because the condition on line 739 was always true
740 doc["pdf"] = "/" + settings.SITE_URL_PREFIX + doc["pdf"]
741 if "tex" in doc: 741 ↛ 744line 741 didn't jump to line 744 because the condition on line 741 was always true
742 doc["tex"] = "/" + settings.SITE_URL_PREFIX + doc["tex"]
744 self.docs[index] = doc
746 self.filters = "&f=".join(filters)
747 # self.sort = sort -> si activation du tri
750class SearchInternalResults:
751 """
752 Search results for sorted Books.
753 Hold data returned by Solr
754 Intermediary between solr_results and the Django template to display the results
755 """
757 # def __init__(self, solr_results, path, filters, sort): -> si activation
758 # du tri
759 def __init__(
760 self, solr_results: Results, path: str, filters: list[str], facet_fields: list[str]
761 ):
762 year_range_facets = None
763 letter_facets = None
764 collection_facets = None
765 author_facets = None
767 firstletterFilter = ""
768 this_filters = set(filters).copy()
770 if "collection_title_facet" in facet_fields: 770 ↛ 775line 770 didn't jump to line 775 because the condition on line 770 was always true
771 collection_facets = create_facets_in_category(
772 solr_results, "collection_title_facet", this_filters, path
773 )
775 if "author_facet" in facet_fields:
776 author_facets = create_facets_in_category(solr_results, "ar", this_filters, path)
778 if "year_facet" in facet_fields: 778 ↛ 789line 778 didn't jump to line 789 because the condition on line 778 was always true
779 year_facets = create_facets_in_category(
780 solr_results, "year_facet", this_filters, path, sort=True, reverse=True
781 )
782 if len(year_facets) == 1 and year_facets[0].active == "active": 782 ↛ 783line 782 didn't jump to line 783 because the condition on line 782 was never true
783 year_range_facets = year_facets
784 else:
785 year_range_facets = create_year_range_facets(
786 solr_results, year_facets, this_filters, path
787 )
789 if "firstLetter" in facet_fields: 789 ↛ 822line 789 didn't jump to line 822
790 for filter in filters:
791 if filter.startswith("{!tag=firstletter}firstNameFacetLetter:"):
792 this_filters.remove(filter)
793 firstletterFilter = filter
795 f = solr_results.facets["facet_fields"]["firstNameFacetLetter"]
796 solr_facets = dict(zip(f[0::2], f[1::2]))
798 letter_facets = []
799 no_letter_selected = True
800 for lettre in string.ascii_uppercase:
801 v = f"{{!tag=firstletter}}firstNameFacetLetter:{lettre}"
802 if lettre in solr_facets and v == firstletterFilter:
803 # on est dans le cas où la lettre est dans les résultats de
804 # recherche et le filtre est activé
805 letter_facets.append(
806 Facet(lettre, solr_facets[lettre], "active", this_filters, path)
807 )
808 no_letter_selected = False
809 elif lettre in solr_facets:
810 my_filters = this_filters.copy()
811 my_filters.add(v)
812 letter_facets.append(
813 Facet(lettre, solr_facets[lettre], "not-active", my_filters, path)
814 )
815 else:
816 letter_facets.append(Facet(lettre, 0, "disabled", "", path))
817 if no_letter_selected:
818 letter_facets.append(Facet("All", 1, "active", this_filters, path))
819 else:
820 letter_facets.append(Facet("All", 1, "not-active", this_filters, path))
822 self.facets = {
823 "year_range_facets": year_range_facets,
824 "letter_facets": letter_facets,
825 "collection_title_facets": collection_facets,
826 "author_facets": author_facets,
827 }
829 self.hits = solr_results.hits
830 self.docs = solr_results.docs
831 self.filters = "&f=".join(filters)
832 # self.sort = sort -> si activation du tri
834 for index, doc in enumerate(self.docs):
835 doc["embargo"] = resolver.embargo(doc["wall"], doc["year"])
836 self.docs[index] = doc