Coverage for src/ptf/solr/search

1import datetime

2import re

3import string

4from urllib.parse import quote

5from urllib.parse import quote_plus

6from urllib.parse import urlparse

8from pysolr import Results

10from django.conf import settings

11from django.http import Http404

12from django.http import QueryDict

13from django.urls import reverse

14from django.utils import translation

16from ptf.display import resolver

17from ptf.site_register import SITE_REGISTER

20######################################################################

21# CLEANSEARCHURL

22######################################################################

23class CleanSearchURL:

24 """

25 CleanSearchURL: url like search /search/*-"ma collection"-qp

26 first part of the url is the path followed by criterias and last part is the encoding of criteria

27 criterias are split by CleanSearchURL.separator

28 """

30 separator = "-"

32 def __init__(self, base):

33 self._base = base

34 self._criteria = []

35 self._encoding = []

37 def append(self, criteria, type):

38 if len(self._criteria) != len(self._encoding):

39 raise

40 self._criteria.append(criteria)

41 self._encoding.append(type)

43 def remove(self, criteria, type):

44 """

45 9/03/2023 - UNUSED.

46 The class is used as an object only once in `pretty_search` method (helpers.py).

47 """

48 if len(self._criteria) != len(self._encoding):

49 raise

50 self._criteria.remove(criteria)

51 self._encoding.remove(type)

53 @staticmethod

54 def decode(clean_url: str, path="") -> tuple[str, QueryDict]:

55 """

56 decode a pretty search url encoded like :

57 search /1erterme-2erterme - 3 - Nom - 1986 -abpfg

58 path all + author + page + facetAuteur + facetDate +manière dont est encodée la requete

62 Attention : pour les recherche en NOT, la lettre est en majuscule

63 "q" : qti = all

64 "a" : qti = author

65 "b" : qti = titre

66 "c" : qti = date

67 "d" : first date/last date formulaire de recherche

68 "f" : bibliographie

69 "g" : plein texte

70 "k" : qti = keywords

71 "x" : qti = abstract (Résumé)

72 i-m : reservé pour la recherche en cas d'ajout de champs

73 "n": facet auteur

74 "o": facet year range

75 "p": facet collection

76 "r":firstletter

77 "s": facet type document

78 "t":page

79 "u": classname (article type)

80 "y": facet year

83 @param clean_url : critere(s) et encoding séparé par des - ( CleanSearchURL.separator )

84 @param path : chemin de la recherche car peut être : /search /thesis /series etc.

85 @return path, QueryDict: QueryDict : dict du type qt0: all, q0: 'larecherche' etc.

86 """

87 q_index = 0

88 my_dict = {

89 "q": ["q{}={}&qt{}=all", r".*"],

90 "a": ["q{}={}&qt{}=author", r"\D*"],

91 "b": ["q{}={}&qt{}=title", r".*"],

92 "c": ["q{}={}&qt{}=author_ref", r"\D*"],

93 "d": ["q-f-{}={}&q-l-{}={}&qt{}=date", r"\[(\d{4}|\*|) TO (\d{4}|\*|)\]"],

94 "f": ["q{}={}&qt{}=references", r".*"],

95 "g": ["q{}={}&qt{}=body", ".*"],

96 "k": ["q{}={}&qt{}=kwd", ".*"],

97 "m": ["f=msc_facet:{}", r".*"],

98 "n": ["f=ar:{}", r"\D*"],

99 "o": ["f=year_facet:{}", r"\[\d{4} TO \d{4}\]"],

100 "p": ["f=collection_title_facet:{}", r".*"],

101 # "r": ["f={{!tag=firstletter}}firstNameFacetLetter:{}", r'\"?[A-Z]{1}\"?'],

102 # FIXME : a supprimer après sûr que les " autour de firstLetter ne sont pas nécessaires

103 "r": ["f={{!tag=firstletter}}firstNameFacetLetter:{}", r"[A-Z]{1}"],

104 "s": ["f=dt:{}", r".*"],

105 "t": ["page={}", r"\d*"],

106 "u": ["f=classname:{}", r".*"],

107 "x": ["q{}={}&qt{}=abstract", r".*"],

108 "y": ["f=year_facet:{}", r"\"\d{4}\""],

109 }

110

111 criteria = []

112 chaine = ""

113 inQuote = False

114 # on itère sur clean_url pour séparer les critères en reconnaissant des chaînes de caractères protégées

115 # par des "

116 #

117 try:

118 for i in range(len(clean_url)):

119 c = clean_url[i]

120 # print(c)

121 if inQuote or c != CleanSearchURL.separator:

122 chaine += c

123

124 if (

125 c == '"'

126 and (i == 0 or clean_url[i - 1] == CleanSearchURL.separator)

127 and inQuote is False

128 ):

129 # Debut de critere entre quote

130 inQuote = True

131 elif c == '"' and clean_url[i + 1] == CleanSearchURL.separator and inQuote is True:

132 # Fin de critere entre quote

133 criteria.append(chaine)

134 inQuote = False

135 chaine = ""

136 elif (

137 c == CleanSearchURL.separator and inQuote is False and clean_url[i - 1] != '"'

138 ):

139 # Fin de critere sans quote et le critère n'était pas entouré de quote

140 criteria.append(chaine)

141 chaine = ""

142

143 criteria.append(chaine)

144

145 # encodage est le dernier critere

146 encoding = criteria[-1]

147 criteria = criteria[:-1]

148

149 encoding = list(encoding)

150 query = zip(encoding, criteria)

151 query_string = ""

152 # pour chaque critere, on crée la requête orientée solr associée

153 for encoding_key, criteria_value in query:

154 if criteria_value != "": 154 ↛ 153line 154 didn't jump to line 153 because the condition on line 154 was always true

155 # on test si le critere respecte la regexp associée

156 reg_str = my_dict[encoding_key.lower()]

157 p = re.compile(reg_str[1])

158 if p.match(criteria_value):

159 # criteria_value must be url encoded to pass to QueryDict

160 if encoding_key.lower() in ["d"]:

161 # on traite un intervalle de date

162 begin, end = criteria_value.strip("[]").split(" TO ")

163 query_string += "&" + my_dict[encoding_key.lower()][0].format(

164 q_index, begin, q_index, end, q_index

165 )

166 elif encoding_key.lower() in ["q", "a", "b", "c", "f", "k", "g", "x"]:

167 query_string += "&" + (

168 my_dict[encoding_key.lower()][0].format(

169 q_index, quote_plus(criteria_value), q_index

170 )

171 )

172 if encoding_key.lower() != encoding_key:

173 # on est dans le cas d'un NOT -> la clef est en

174 # majuscule

175 query_string += f"&not{q_index}=on"

176 q_index += 1

177 else:

178 query_string += "&" + (

179 my_dict[encoding_key.lower()][0].format(quote_plus(criteria_value))

180 )

181

182 querydict = QueryDict(query_string.encode("utf-8"))

183 return path, querydict

184 except Exception:

185 raise Http404()

186

187 @staticmethod

188 def encode(dict: QueryDict, path="") -> str:

189 """

190 encode QueryDict request in CleanURL

191 @param QueryDict: POST request from search form

192 @return: clean search absolute url

193 """

194

195 criteria = []

196 encoding = []

197 # a priori les filtres seront passés en GET uniquement

198 # filters = []

199 # filters = request.POST.getlist('f')

200

201 i = 0

202 qti = dict.get("qt" + str(i), None)

203

204 while qti:

205 qi = dict.get("q" + str(i), None)

206 if qti == "all":

207 criteria.append(qi)

208 encoding.append("q")

209 elif qti == "author":

210 criteria.append(qi)

211 encoding.append("a")

212 elif qti == "author_ref":

213 criteria.append(qi)

214 encoding.append("c")

215 elif qti == "title":

216 criteria.append(qi)

217 encoding.append("b")

218 elif qti == "date":

219 qfi = dict.get("q-f-" + str(i), "*")

220 qli = dict.get("q-l-" + str(i), "*")

221 criteria.append(f"[{qfi} TO {qli}]")

222 encoding.append("d")

223 elif qti == "references":

224 criteria.append(qi)

225 encoding.append("f")

226 elif qti == "body":

227 criteria.append(qi)

228 encoding.append("g")

229 elif qti == "kwd": 229 ↛ 232line 229 didn't jump to line 232 because the condition on line 229 was always true

230 criteria.append(qi)

231 encoding.append("k")

232 elif qti == "abstract":

233 criteria.append(qi)

234 encoding.append("x")

235 # if qti == 'author_ref':

236 # keep_qs_in_display = False

237

238 noti = dict.get("not" + str(i), None)

239 if noti == "on":

240 encoding[len(encoding) - 1] = encoding[len(encoding) - 1].upper()

241

242 i += 1

243 qti = dict.get("qt" + str(i), None)

244

245 # on traite les filtres

246 # "n": "f=ar:'{}'",

247 # "o": "f=year_facet:'{}'",

248 # "y": "f=year_facet:'{}'",

249 # "p": "f=collection_title_facet:'{}'",

250 # "r": "f={!tag=firstletter}firstNameFacetLetter:'{}'",

251 # "s": "f=dt:'{}'",

252 # "u": "f=classname:'{}'",

253 filters = dict.getlist("f")

254 for filter in filters:

255 key, value = filter.split(":", 1)

256 if key == "collection_title_facet":

257 criteria.append(value)

258 encoding.append("p")

259 elif key == "ar":

260 criteria.append(value)

261 encoding.append("n")

262 elif key == "year_facet":

263 criteria.append(value)

264 if value[0] == "[":

265 encoding.append("o")

266 else:

267 encoding.append("y")

268 elif key == "{!tag=firstletter}firstNameFacetLetter":

269 criteria.append(value)

270 encoding.append("r")

271 elif key == "dt":

272 criteria.append(value)

273 encoding.append("s")

274 elif key == "classname": 274 ↛ 276line 274 didn't jump to line 276 because the condition on line 274 was never true

275 # Used for article types

276 criteria.append(value)

277 encoding.append("u")

278 elif key == "msc_facet":

279 criteria.append(value)

280 encoding.append("m")

281

282 # on traite la pagination

283 # "t": "page={}"

284 page = dict.get("page")

285 if page:

286 criteria.append(page)

287 encoding.append("t")

288

289 if not criteria:

290 return path

291 for i in range(len(criteria)):

292 if criteria[i] and criteria[i][0] == '"' and criteria[i][-1] == '"': # critere protege

293 pass

294 elif CleanSearchURL.separator in criteria[i] or '"' in criteria[i]: 294 ↛ 295line 294 didn't jump to line 295 because the condition on line 294 was never true

295 criteria[i] = f'"{criteria[i]}"'

296

297 clean_url = "".join(

298 [

299 CleanSearchURL.separator.join(quote(item, "") for item in criteria),

300 CleanSearchURL.separator,

301 "".join(encoding),

302 ]

303 )

304 path = path.strip("/")

305 if path:

306 return "/" + path + "?" + clean_url

307 return clean_url

308

309 def to_href(self):

310 clean_url = (

311 self._base

312 + "/"

313 + self.separator.join(quote(item, "") for item in self._criteria)

314 + self.separator

315 + "".join(self._encoding)

316 )

317 return clean_url

318

319

320######################################################################

321# FACETS & SEARCH RESULTS

322######################################################################

323class Facet:

324 """

325 Facet: a filter that you can select to narrow your search

326 Example: "Journal article(25)" is a filter (of the colid_facets category)

327

328 properties:

329 name Ex: "Journal article"

330 count Ex: 25

331 active Ex: True (tells if the user has selected the filter)

332 href The url to set on the filter.

333 It concatenates the filters selected before

334 Ex: http://www.numdam.org/items/?q=au:(choquet)&f=dt:%22Journal%20article%22&f=year:1991

335

336 Facets are returned in the SearchResultsGenerator

337 """

338

339 def __init__(

340 self,

341 name: str,

342 count: int,

343 state: str,

344 filters: set[str] = set(),

345 path="/search/",

346 sub_facets=[],

347 ):

348 self.name = name

349 self.count = count

350 self.active = state

351 self.sub_facets = sub_facets

352 if filters:

353 query = "&f=".join([quote_plus(x) for x in filters])

354 # query est du type /search/?q=test&qt=all&f=....

355 query = f"{path}&f={query}"

356 else:

357 query = path

358 url = urlparse(query)

359 params = url.query

360 params = params.encode("utf-8")

361 dict = QueryDict(params, True, "utf-8")

362 href = CleanSearchURL.encode(dict, url.path)

363 if hasattr(settings, "SITE_URL_PREFIX"):

364 href = f"/{settings.SITE_URL_PREFIX}" + href

365 self.href = href

366

367

368def create_facets_in_category(

369 solr_results: Results,

370 category: str,

371 active_filters: set[str],

372 path: str,

373 sort=False,

374 reverse=False,

375) -> list[Facet]:

376 # Solr returns filters in a flat list

377 # Example: facet_fields : { "year": [ "1878",1,"1879",0,"1912",3,"1971",5] }

378 # To simplify the creation of Facet objects, we need a list of pairs: [ ('1878',1),('1879',2),...]

379 # To do so, we use

380 # 1) The python slice syntax on lists [start:stop:step]

381 # f[0::2] => [ '1878', '1879',...]

382 # f[1::2] => [1,2,...]

383 # 2) The python list comprehensions [ expression for ... if ... ]

384 # [f[i::2 for i in range(2)] => [ ['1878','1879',...], [1,2,...] ]

385 # 3) zip(*list) to unzip a list (see more details below)

386 # => [ ('1878',1), ('1879',2), ... ]

387 #

388 # zip(*list) <=> unzip. Why ?

389 # zip() pairs up the elements from all inputs

390 # zip( lista, listb, ... listz ) => ( (a1,b1,...z1), (a2,b2,...,z2), ..., (an,bn,...,zn) )

391 # The output is a tuple (unmutable list)

392 # To recreate the lista, listb, you can re-apply zip on the elements of the tuple.

393 # But you have to unpack the tuple first (to recreate multiple arguments)

394 # *(tuple) creates multiple (ai,bi,...zi) lists

395 # zip(*tuple) combines the list

396 # The output is ( (a1,a2,...,an), (b1,b2,...bn), ..., (z1,z2,...,zn) )

397

398 if category not in solr_results.facets["facet_fields"]:

399 return []

400

401 f = solr_results.facets["facet_fields"][category]

402 solr_facets = list(zip(*[f[i::2] for i in range(2)]))

403

404 if sort:

405 solr_facets = sorted(solr_facets, key=lambda x: x[0], reverse=reverse)

406

407 results = []

408 active_filters = active_filters.copy()

409

410 if category == "year_facet":

411 # Selecting a year facet clears the active year range facet (if any)

412 mylist = [v for v in active_filters if "year_facet:[" in v]

413 if mylist:

414 active_filters.remove(mylist[0])

415

416 if category == "ar":

417 my_list = [v for v in active_filters if "ar:" in v]

418 if my_list:

419 ar_active_filter = my_list[0]

420 else:

421 ar_active_filter = None

422

423 for facet_name, count in solr_facets:

424 this_filters = active_filters.copy()

425 v = '{}:"{}"'.format(category, facet_name.replace('"', '\\"'))

426 if category == "sites":

427 facet_name = [

428 SITE_REGISTER[key]["name"]

429 for key in SITE_REGISTER

430 if str(SITE_REGISTER[key]["site_id"]) == facet_name

431 ][0]

432 if v in active_filters:

433 this_filters.remove(v)

434 results.append(Facet(facet_name, count, "active", this_filters, path))

435 else:

436 # on n'autorise pas la multiple selection de facet auteur

437 if category == "ar" and ar_active_filter is not None:

438 this_filters.remove(ar_active_filter)

439 this_filters.add(v)

440 results.append(Facet(facet_name, count, "not-active", this_filters, path))

441

442 return results

443

444

445def create_year_range_facets(

446 solr_results: Results, year_facets: list[Facet], active_filters: set[str], path: str

447) -> list[Facet]:

448 gap = solr_results.facets["facet_ranges"]["year_facet"]["gap"]

449 f = solr_results.facets["facet_ranges"]["year_facet"]["counts"]

450 solr_facets = list(zip(*[f[i::2] for i in range(2)]))

451

452 solr_facets = sorted(solr_facets, key=lambda x: x[0], reverse=True)

453

454 results = []

455

456 now = datetime.datetime.now()

457 i = 0 # current year_facet index

458 year_facets_size = len(year_facets)

459

460 for facet_name, count in solr_facets:

461 start = facet_name

462 start_i = int(start)

463 end_i = int(facet_name) + gap - 1

464 end = str(end_i)

465

466 if end_i > now.year: 466 ↛ 467line 466 didn't jump to line 467 because the condition on line 466 was never true

467 end = str(now.year)

468

469 # year_facets become sub_facets of a year_range_facet

470 # We need to find the year_facets that are inside the year_range_facet

471 if i < year_facets_size: 471 ↛ 475line 471 didn't jump to line 475 because the condition on line 471 was always true

472 yf = year_facets[i]

473 year = int(yf.name)

474

475 sub_year_facets = []

476 this_filters = active_filters.copy()

477

478 while i < year_facets_size and year >= start_i:

479 sub_year_facets.append(yf)

480

481 # If we click on a year range facet, we clear the active year facet

482 # (if any)

483 v = 'year_facet:"' + yf.name + '"'

484 if v in active_filters: 484 ↛ 485line 484 didn't jump to line 485 because the condition on line 484 was never true

485 this_filters.remove(v)

486

487 i += 1

488 if i < year_facets_size:

489 yf = year_facets[i]

490 year = int(yf.name)

491

492 facet_name = facet_name + "-" + str(end)

493 v = "year_facet:[" + start + " TO " + end + "]"

494

495 if v in active_filters: 495 ↛ 496line 495 didn't jump to line 496 because the condition on line 495 was never true

496 this_filters.remove(v)

497 results.append(Facet(facet_name, count, "active", this_filters, path, sub_year_facets))

498 else:

499 this_filters.add(v)

500 results.append(

501 Facet(facet_name, count, "not-active", this_filters, path, sub_year_facets)

502 )

503

504 return results

505

506

507def create_facets(

508 solr_results: Results, path: str, filters: list[str], use_ar_facet=True

509) -> dict[str, list[Facet]]:

510 active_filters = set(filters)

511

512 atype_facets = create_facets_in_category(solr_results, "classname", active_filters, path)

513 author_facets = []

514 if use_ar_facet:

515 author_facets = create_facets_in_category(solr_results, "ar", active_filters, path)

516 dt_facets = create_facets_in_category(solr_results, "dt", active_filters, path)

517 msc_facets = create_facets_in_category(solr_results, "msc_facet", active_filters, path)

518 collection_facets = create_facets_in_category(

519 solr_results, "collection_title_facet", active_filters, path

520 )

521 sites_facets = create_facets_in_category(solr_results, "sites", active_filters, path)

522

523 year_facets = create_facets_in_category(

524 solr_results, "year_facet", active_filters, path, sort=True, reverse=True

525 )

526 if len(year_facets) == 1 and year_facets[0].active == "active": 526 ↛ 527line 526 didn't jump to line 527 because the condition on line 526 was never true

527 year_range_facets = year_facets

528 else:

529 year_range_facets = create_year_range_facets(

530 solr_results, year_facets, active_filters, path

531 )

532

533 return {

534 "author_facets": author_facets,

535 "msc_facets": msc_facets,

536 "year_range_facets": year_range_facets,

537 "dt_facets": dt_facets,

538 "atype_facets": atype_facets,

539 "collection_facets": collection_facets,

540 "sites_facets": sites_facets,

541 }

542

543

544class SearchResults:

545 """

546 Search results.

547 Hold data returned by Solr

548 Intermediary between solr_results and the Django template to display the results

549 """

550

551 # def __init__(self, solr_results, path, filters, sort): -> si activation

552 # du tri

553

554 def fix_truncated_value(self, value: str):

555 """

556 Highlighting may produce an HTML string truncated at the end.

557 To display the search keywords in bold, we add around them.

558 But we ask the template to display the highlight as |safe such that

559 unclosed HTML tags will damage the HTML page layout.

560 => fix_trunctated_value attempt to add missing HTML end tags.

561

562 9/03/2023 - This cannot work properly. We should use a parser or something

563 to correctly do this.

564 """

565 keywords = []

566 i = 0

567 quote = ""

568 while i < len(value):

569 if value[i] == '"':

570 if quote == '"':

571 quote = ""

572 else:

573 quote = '"'

574 elif value[i] == "'": 574 ↛ 575line 574 didn't jump to line 575 because the condition on line 574 was never true

575 if quote == "'":

576 quote = ""

577 else:

578 quote = "'"

579

580 keyword = ""

581 end_keyword = False

582 if not quote and value[i] == "<":

583 i += 1

584

585 if i < len(value) and value[i] == "/":

586 end_keyword = True

587 i += 1

588

589 while i < len(value) and value[i] != " " and value[i] != ">":

590 keyword += value[i]

591 i += 1

592

593 if keyword and end_keyword:

594 if len(keywords) > 0 and keywords[-1] == keyword: 594 ↛ 599line 594 didn't jump to line 599 because the condition on line 594 was always true

595 keywords.pop(-1)

596 elif keyword: 596 ↛ 599line 596 didn't jump to line 599 because the condition on line 596 was always true

597 keywords.append(keyword)

598

599 i += 1

600

601 if quote: 601 ↛ 602line 601 didn't jump to line 602 because the condition on line 601 was never true

602 value += quote

603

604 while len(keywords) > 0:

605 keyword = keywords.pop(-1)

606 value += "</" + keyword + ">"

607

608 return value

609

610 def __init__(

611 self,

612 solr_results: Results,

613 path: str,

614 filters: list[str],

615 qt: list[str],

616 use_ar_facet=True,

617 ):

618 self.facets = create_facets(solr_results, path, filters, use_ar_facet)

619 self.hits = solr_results.hits

620 self.docs = solr_results.docs

621

622 cur_language = translation.get_language()

623 preferred_highlight_keywords = [

624 "abstract_tex",

625 "trans_abstract_tex",

626 "kwd",

627 "trans_kwd",

628 "body",

629 "bibitem",

630 ]

631 if cur_language != "fr":

632 preferred_highlight_keywords = [

633 "trans_abstract_tex",

634 "abstract_tex",

635 "trans_kwd",

636 "kwd",

637 "body",

638 "bibitem",

639 ]

640

641 # References is bibitem

642 qt = [s.replace("references", "bibitem") for s in qt]

643

644 if any("kwd" in s for s in qt) and not any("trans_kwd" in s for s in qt):

645 qt.append("trans_kwd")

646 elif any("trans_kwd" in s for s in qt) and not any("kwd" in s for s in qt): 646 ↛ 647line 646 didn't jump to line 647 because the condition on line 646 was never true

647 qt.append("kwd")

648

649 if any("abstract_tex" in s for s in qt) and not any("trans_abstract_tex" in s for s in qt):

650 qt.append("trans_abstract_tex")

651 elif any("trans_abstract_tex" in s for s in qt) and not any( 651 ↛ 654line 651 didn't jump to line 654 because the condition on line 651 was never true

652 "abstract_tex" in s for s in qt

653 ):

654 qt.append("abstract_tex")

655

656 # We do not call the translation mechanism on a specific language

657 # try:

658 # translation.activate('en')

659 # text = translation.gettext(u"Résumé")

660 # finally:

661 # translation.activate(cur_language)

662

663 # We get the translation based on the current language

664 abstract_text = translation.gettext("Résumé")

665 reference_text = translation.gettext("Bibliographie")

666 keywords_text = translation.gettext("Mots clés")

667 fulltext_text = translation.gettext("Plein texte")

668

669 correspondance = {

670 "abstract_tex": abstract_text,

671 "trans_abstract_tex": abstract_text,

672 "kwd": keywords_text,

673 "trans_kwd": keywords_text,

674 "body": fulltext_text,

675 "bibitem": reference_text,

676 }

677

678 for index, doc in enumerate(self.docs):

679 id_doc = doc["id"]

680 doc["embargo"] = resolver.embargo(doc["wall"], doc["year"])

681 hl = solr_results.highlighting[id_doc]

682 for key in ["au", "year"]:

683 if key in hl:

684 the_hl = hl[key][0]

685 the_hl = the_hl.replace("", "")

686 the_hl = the_hl.replace("", "")

687 value = doc[key]

688 pos = value.find(the_hl)

689 if pos > -1: 689 ↛ 692line 689 didn't jump to line 692 because the condition on line 689 was always true

690 value = value.replace(the_hl, hl[key][0])

691

692 doc[key] = value

693

694 for key in ["collection_title_tex", "collection_title_html"]:

695 value = doc[key][0]

696 if key in hl:

697 the_hl = hl[key][0]

698 the_hl = the_hl.replace("", "")

699 the_hl = the_hl.replace("", "")

700 pos = value.find(the_hl)

701 if pos > -1: 701 ↛ 704line 701 didn't jump to line 704 because the condition on line 701 was always true

702 value = value.replace(the_hl, hl[key][0])

703

704 doc[key] = value

705

706 for key in preferred_highlight_keywords:

707 doc["highlighting"] = {}

708 doc["highlighting"]["value"] = ""

709 if key in hl and ("all" in qt or key in qt):

710 doc["highlighting"]["field"] = correspondance[key]

711 for value in hl[key]:

712 if key == "bibitem":

713 value = self.fix_truncated_value(value)

714 doc["highlighting"]["value"] = (

715 doc["highlighting"]["value"] + "... " + value + " ... "

716 )

717 break

718 # TODO: on ne veut le hl sur bibitem voire plein text que

719 # si il n'y a que ca qui matche

720 if settings.SITE_NAME == "cr" and "sites" in doc and doc["sites"]:

721 site_id = doc["sites"][0]

722 site_domain = [

723 SITE_REGISTER[key]["site_domain"]

724 for key in SITE_REGISTER

725 if SITE_REGISTER[key]["site_id"] == site_id

726 ][0]

727 prefix = site_domain.split("/")[1]

728 if "doi" in doc: 728 ↛ 729line 728 didn't jump to line 729 because the condition on line 728 was never true

729 url = reverse("article", kwargs={"aid": doc["doi"]})

730 else:

731 url = reverse("item_id", kwargs={"pid": doc["pid"]})

732 doc_url = "/" + prefix + url

733 doc["item_url"] = doc_url

734 if doc["pdf"].find("/" + prefix) != 0: 734 ↛ 744line 734 didn't jump to line 744 because the condition on line 734 was always true

735 doc["pdf"] = "/" + prefix + doc["pdf"]

736 if "tex" in doc:

737 doc["tex"] = "/" + prefix + doc["tex"]

738 elif hasattr(settings, "SITE_URL_PREFIX"):

739 if doc["pdf"].find("/" + settings.SITE_URL_PREFIX) != 0: 739 ↛ 744line 739 didn't jump to line 744 because the condition on line 739 was always true

740 doc["pdf"] = "/" + settings.SITE_URL_PREFIX + doc["pdf"]

741 if "tex" in doc: 741 ↛ 744line 741 didn't jump to line 744 because the condition on line 741 was always true

742 doc["tex"] = "/" + settings.SITE_URL_PREFIX + doc["tex"]

743

744 self.docs[index] = doc

745

746 self.filters = "&f=".join(filters)

747 # self.sort = sort -> si activation du tri

748

749

750class SearchInternalResults:

751 """

752 Search results for sorted Books.

753 Hold data returned by Solr

754 Intermediary between solr_results and the Django template to display the results

755 """

756

757 # def __init__(self, solr_results, path, filters, sort): -> si activation

758 # du tri

759 def __init__(

760 self, solr_results: Results, path: str, filters: list[str], facet_fields: list[str]

761 ):

762 year_range_facets = None

763 letter_facets = None

764 collection_facets = None

765 author_facets = None

766

767 firstletterFilter = ""

768 this_filters = set(filters).copy()

769

770 if "collection_title_facet" in facet_fields: 770 ↛ 775line 770 didn't jump to line 775 because the condition on line 770 was always true

771 collection_facets = create_facets_in_category(

772 solr_results, "collection_title_facet", this_filters, path

773 )

774

775 if "author_facet" in facet_fields:

776 author_facets = create_facets_in_category(solr_results, "ar", this_filters, path)

777

778 if "year_facet" in facet_fields: 778 ↛ 789line 778 didn't jump to line 789 because the condition on line 778 was always true

779 year_facets = create_facets_in_category(

780 solr_results, "year_facet", this_filters, path, sort=True, reverse=True

781 )

782 if len(year_facets) == 1 and year_facets[0].active == "active": 782 ↛ 783line 782 didn't jump to line 783 because the condition on line 782 was never true

783 year_range_facets = year_facets

784 else:

785 year_range_facets = create_year_range_facets(

786 solr_results, year_facets, this_filters, path

787 )

788

789 if "firstLetter" in facet_fields: 789 ↛ 822line 789 didn't jump to line 822

790 for filter in filters:

791 if filter.startswith("{!tag=firstletter}firstNameFacetLetter:"):

792 this_filters.remove(filter)

793 firstletterFilter = filter

794

795 f = solr_results.facets["facet_fields"]["firstNameFacetLetter"]

796 solr_facets = dict(zip(f[0::2], f[1::2]))

797

798 letter_facets = []

799 no_letter_selected = True

800 for lettre in string.ascii_uppercase:

801 v = f"{{!tag=firstletter}}firstNameFacetLetter:{lettre}"

802 if lettre in solr_facets and v == firstletterFilter:

803 # on est dans le cas où la lettre est dans les résultats de

804 # recherche et le filtre est activé

805 letter_facets.append(

806 Facet(lettre, solr_facets[lettre], "active", this_filters, path)

807 )

808 no_letter_selected = False

809 elif lettre in solr_facets:

810 my_filters = this_filters.copy()

811 my_filters.add(v)

812 letter_facets.append(

813 Facet(lettre, solr_facets[lettre], "not-active", my_filters, path)

814 )

815 else:

816 letter_facets.append(Facet(lettre, 0, "disabled", "", path))

817 if no_letter_selected:

818 letter_facets.append(Facet("All", 1, "active", this_filters, path))

819 else:

820 letter_facets.append(Facet("All", 1, "not-active", this_filters, path))

821

822 self.facets = {

823 "year_range_facets": year_range_facets,

824 "letter_facets": letter_facets,

825 "collection_title_facets": collection_facets,

826 "author_facets": author_facets,

827 }

828

829 self.hits = solr_results.hits

830 self.docs = solr_results.docs

831 self.filters = "&f=".join(filters)

832 # self.sort = sort -> si activation du tri

833

834 for index, doc in enumerate(self.docs):

835 doc["embargo"] = resolver.embargo(doc["wall"], doc["year"])

836 self.docs[index] = doc

Coverage for src/ptf/solr/search_helpers.py: 90%

422 statements