Coverage for src/ptf/cmds/solr_cmds.py: 84%

468 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-11-05 09:56 +0000

1import pysolr 

2 

3from django.conf import settings 

4 

5from ptf.cmds.base_cmds import baseCmd 

6from ptf.cmds.base_cmds import make_int 

7from ptf.display import resolver 

8from ptf.site_register import SITE_REGISTER 

9from ptf.solr import search_helpers 

10from ptf.utils import get_display_name 

11 

12# Not used so far. 

13# nlm2solr use normalize-space for volume and volume-series, 

14# but make_int is called to convert into int: spaces are also trimmed 

15# def normalize_whitespace(str): 

16# import re 

17# str = str.strip() 

18# str = re.sub(r'\s+', ' ', str) 

19# return str 

20 

21 

22class solrFactory: 

23 solr = None 

24 solr_url = None 

25 

26 @staticmethod 

27 def get_solr(): 

28 if solrFactory.solr is None: 

29 if solrFactory.solr_url is None: 

30 solrFactory.solr_url = settings.SOLR_URL 

31 solrFactory.solr = pysolr.Solr(solrFactory.solr_url, timeout=10) 

32 return solrFactory.solr 

33 

34 @staticmethod 

35 def do_solr_commit(): 

36 if hasattr(settings, "IGNORE_SOLR") and settings.IGNORE_SOLR: 36 ↛ 37line 36 didn't jump to line 37 because the condition on line 36 was never true

37 return 

38 

39 solr = solrFactory.get_solr() 

40 solr.commit() 

41 

42 @staticmethod 

43 def do_solr_rollback(): 

44 if hasattr(settings, "IGNORE_SOLR") and settings.IGNORE_SOLR: 44 ↛ 45line 44 didn't jump to line 45 because the condition on line 44 was never true

45 return 

46 

47 solr = solrFactory.get_solr() 

48 msg = "<rollback />" 

49 solr._update(msg) 

50 

51 @staticmethod 

52 def reset(): 

53 if solrFactory.solr: 53 ↛ 55line 53 didn't jump to line 55 because the condition on line 53 was always true

54 solrFactory.solr.get_session().close() 

55 solrFactory.solr = None 

56 

57 

58def solr_add_contributors_to_data(contributors, data): 

59 if contributors is not None: 59 ↛ exitline 59 didn't return from function 'solr_add_contributors_to_data' because the condition on line 59 was always true

60 author_names = [] 

61 ar = [] 

62 aul = [] 

63 

64 for c in contributors: 

65 if c["role"] in ["author", "editor", "translator"]: 65 ↛ 64line 65 didn't jump to line 64 because the condition on line 65 was always true

66 display_name = get_display_name( 

67 "", c["first_name"], c["last_name"], "", c["string_name"] 

68 ) 

69 ref_name = c["mid"] if c["mid"] else display_name 

70 

71 if ref_name: 71 ↛ 73line 71 didn't jump to line 73 because the condition on line 71 was always true

72 ar.append(ref_name) 

73 if display_name: 73 ↛ 75line 73 didn't jump to line 75 because the condition on line 73 was always true

74 author_names.append(display_name) 

75 if c["last_name"]: 

76 aul.append(c["last_name"]) 

77 

78 data["au"] = "; ".join(author_names) 

79 # auteurs de references 

80 data["ar"] = ar 

81 # Surnames / Lastnames / Nom de famille 

82 data["aul"] = aul 

83 

84 if author_names: 

85 data["fau"] = author_names[0] 

86 

87 

88def solr_add_kwds_to_data(kwds, data): 

89 data["kwd"] = ", ".join( 

90 [kwd["value"] for kwd in kwds if kwd["type"] != "msc" and kwd["lang"] == "fr"] 

91 ) 

92 data["trans_kwd"] = ", ".join( 

93 [kwd["value"] for kwd in kwds if kwd["type"] != "msc" and kwd["lang"] != "fr"] 

94 ) 

95 data["msc"] = [kwd["value"].upper() for kwd in kwds if kwd["type"] == "msc"] 

96 

97 

98##################################################################### 

99# 

100# solrCmd: base class for Solr commands 

101# 

102###################################################################### 

103class solrCmd(baseCmd): 

104 def __init__(self, params={}): 

105 super().__init__(params) 

106 

107 def do(self, parent=None): 

108 if hasattr(settings, "IGNORE_SOLR") and settings.IGNORE_SOLR: 108 ↛ 109line 108 didn't jump to line 109 because the condition on line 108 was never true

109 return None 

110 

111 return super().do(parent) 

112 

113 def post_do(self, resource=None): 

114 super().post_do(resource) 

115 

116 def undo(self): 

117 if hasattr(settings, "IGNORE_SOLR") and settings.IGNORE_SOLR: 117 ↛ 118line 117 didn't jump to line 118 because the condition on line 117 was never true

118 return None 

119 

120 return super().undo() 

121 

122 

123##################################################################### 

124# 

125# solrDeleteCmd: generic to delete Solr documents, based on a query 

126# 

127###################################################################### 

128class solrDeleteCmd(solrCmd): 

129 def __init__(self, params={}): 

130 self.commit = True 

131 self.q = None 

132 

133 super().__init__(params) 

134 

135 self.required_params.extend(["q"]) 

136 

137 def internal_do(self): 

138 super().internal_do() 

139 

140 solrFactory.get_solr().delete(q=self.q, commit=self.commit) 

141 

142 return None 

143 

144 

145##################################################################### 

146# 

147# solrAddCmd: base class for Solr Add commands 

148# 

149###################################################################### 

150class solrAddCmd(solrCmd): 

151 def __init__(self, params={}): 

152 self.commit = True 

153 self.db_obj = None 

154 self.id = None 

155 self.pid = None 

156 self.data = {} 

157 

158 super().__init__(params) 

159 

160 self.required_params.extend(["id", "pid"]) 

161 self.required_delete_params.extend(["id"]) 

162 

163 def pre_do(self): 

164 super().pre_do() 

165 

166 self.data["id"] = self.id 

167 self.data["pid"] = self.pid 

168 # parfois, lors d'erreur et/ou upload simultané, il y a plusieurs enregistrement pour un PID 

169 # pour éviter d'avoir +sieurs résultats de recherche pour un PID, on supprime tout avant le internal_do 

170 cmd = solrDeleteCmd({"q": "pid:" + self.pid}) 

171 cmd.do() 

172 

173 def internal_do(self): 

174 super().internal_do() 

175 

176 datas = [self.data] 

177 

178 solrFactory.get_solr().add(docs=datas, commit=self.commit) 

179 

180 return None 

181 

182 def internal_undo(self): 

183 id = super().internal_undo() 

184 

185 solrFactory.get_solr().delete(id=self.id, commit=self.commit) 

186 

187 return id 

188 

189 

190##################################################################### 

191# 

192# addResourceSolrCmd: base class for solrAddCmds adding a Resource 

193# 

194###################################################################### 

195class addResourceSolrCmd(solrAddCmd): 

196 def __init__(self, params={}): 

197 self.xobj = None # model_data object 

198 

199 # fields of the xobj to pass to SolR 

200 self.fields = [ 

201 "lang", 

202 "doi", 

203 "title_tex", 

204 "title_html", 

205 "trans_title_tex", 

206 "trans_title_html", 

207 "abstract_tex", 

208 "abstract_html", 

209 "trans_abstract_tex", 

210 "trans_abstract_html", 

211 "collection_title_tex", 

212 "collection_title_html", 

213 "collection_id", 

214 "year", 

215 "body", 

216 "bibitem", 

217 ] 

218 

219 # Used to filter the articles based on their site 

220 self.sites = None 

221 

222 super().__init__(params) 

223 

224 self.required_params.extend(["xobj"]) 

225 

226 def add_collection(self, collection): 

227 self.data["collection_id"] = collection.id 

228 

229 if "collection_title_tex" not in self.data: 229 ↛ 232line 229 didn't jump to line 232 because the condition on line 229 was always true

230 self.data["collection_title_tex"] = [collection.title_tex] 

231 else: 

232 self.data["collection_title_tex"].append(collection.title_tex) 

233 

234 if "collection_title_html" not in self.data: 234 ↛ 237line 234 didn't jump to line 237 because the condition on line 234 was always true

235 self.data["collection_title_html"] = [collection.title_html] 

236 else: 

237 self.data["collection_title_html"].append(collection.title_html) 

238 

239 # classname is used only by PCJ for the article types 

240 if collection.coltype == "journal": 

241 self.data["dt"] = ["Article de revue"] 

242 elif collection.coltype == "acta": 

243 self.data["dt"] = ["Acte de séminaire"] 

244 elif collection.coltype == "thesis": 244 ↛ 245line 244 didn't jump to line 245 because the condition on line 244 was never true

245 self.data["classname"] = "Thèse" 

246 self.data["dt"] = ["Thèse"] 

247 elif collection.coltype == "lecture-notes": 247 ↛ 248line 247 didn't jump to line 248 because the condition on line 247 was never true

248 self.data["classname"] = "Notes de cours" 

249 self.data["dt"] = ["Notes de cours"] 

250 elif collection.coltype == "proceeding": 250 ↛ 251line 250 didn't jump to line 251 because the condition on line 250 was never true

251 self.data["classname"] = "Acte de rencontre" 

252 self.data["dt"] = ["Acte de rencontre"] 

253 else: 

254 self.data["classname"] = "Livre" 

255 self.data["dt"] = ["Livre"] 

256 

257 def add_abstracts_to_data(self): 

258 for abstract in self.xobj.abstracts: 

259 lang = abstract["lang"] 

260 

261 for field_type in ["tex", "html"]: 

262 abstract_field = "value_" + field_type 

263 field_name = "abstract_" + field_type 

264 if lang != "fr": 

265 field_name = "trans_" + field_name 

266 

267 self.data[field_name] = abstract[abstract_field] 

268 

269 def add_year_to_data(self, year): 

270 if year: 

271 years = str(year).split("-") 

272 if len(years) > 1: 

273 self.data["year_facet"] = int(years[1]) 

274 else: 

275 self.data["year_facet"] = int(year) 

276 

277 def pre_do(self): 

278 super().pre_do() 

279 

280 for field in self.fields: 

281 if hasattr(self.xobj, field): 

282 self.data[field] = getattr(self.xobj, field) 

283 

284 self.add_abstracts_to_data() 

285 solr_add_kwds_to_data(self.xobj.kwds, self.data) 

286 solr_add_contributors_to_data(self.xobj.contributors, self.data) 

287 

288 if "dt" not in self.data: 288 ↛ 289line 288 didn't jump to line 289 because the condition on line 288 was never true

289 raise ValueError(f"add SolR resource without dt - {self.xobj.pid}") 

290 

291 # year either comes directly from xobj (container) or from set_container 

292 self.add_year_to_data(self.data["year"]) 

293 

294 if self.db_obj is not None: 

295 solr_fields = { 

296 "application/pdf": "pdf", 

297 "image/x.djvu": "djvu", 

298 "application/x-tex": "tex", 

299 } 

300 for stream in self.xobj.streams: 

301 mimetype = stream["mimetype"] 

302 if mimetype in solr_fields: 

303 href = self.db_obj.get_binary_file_href_full_path( 

304 "self", mimetype, stream["location"] 

305 ) 

306 self.data[solr_fields[mimetype]] = href 

307 

308 if self.db_obj is not None: 

309 self.data["wall"] = self.db_obj.get_wall() 

310 

311 if self.sites: 

312 self.data["sites"] = self.sites 

313 else: 

314 self.data["sites"] = [settings.SITE_ID] 

315 

316 

317##################################################################### 

318# 

319# addContainerSolrCmd: adds/remove a container (issue/book) 

320# 

321# A container needs a collection (collection_title_tex etc.) 

322# 

323###################################################################### 

324class addContainerSolrCmd(addResourceSolrCmd): 

325 def __init__(self, params={}): 

326 super().__init__(params) 

327 

328 self.fields.extend(["ctype"]) 

329 # self.data["dt"] = ["Livre"] 

330 

331 def pre_do(self): 

332 super().pre_do() 

333 

334 for field in ["volume", "number", "vseries"]: 

335 if hasattr(self.xobj, field): 

336 self.data["volume"] = make_int(getattr(self.xobj, field)) 

337 

338 if hasattr(self.xobj, "incollection") and len(self.xobj.incollection) > 0: 

339 incol = self.xobj.incollection[0] 

340 self.data["vseries"] = make_int(incol.vseries) 

341 self.data["volume"] = 0 

342 self.data["number"] = make_int(incol.volume) 

343 

344 # if incol.coltype == "theses": 

345 # self.data["dt"] = ["Thèse"] 

346 

347 

348##################################################################### 

349# 

350# addArticleSolrCmd: adds/remove an article 

351# 

352# an article needs a container (container_id) that needs a collection (collection_id) 

353# 

354###################################################################### 

355 

356 

357class addArticleSolrCmd(addResourceSolrCmd): 

358 def __init__(self, params={}): 

359 super().__init__(params) 

360 

361 self.fields.extend( 

362 ["page_range", "container_id", "volume", "number", "vseries", "article_number"] 

363 ) 

364 # self.data["dt"] = ["Article"] 

365 

366 def set_container(self, container): 

367 self.data["container_id"] = container.id 

368 self.data["year"] = container.year 

369 self.data["vseries"] = make_int(container.vseries) 

370 self.data["volume"] = make_int(container.volume) 

371 self.data["number"] = make_int(container.number) 

372 

373 def set_eprint(self, eprint): 

374 self.data["dt"].append("e-print") 

375 

376 def set_source(self, source): 

377 pass 

378 

379 def set_thesis(self, thesis): 

380 self.data["dt"].append("thesis") 

381 

382 def set_original_article(self, article): 

383 # TODO Replace some data (ie doi, pid) with the original article 

384 pass 

385 

386 def pre_do(self): 

387 super().pre_do() 

388 

389 self.data["classname"] = resolver.ARTICLE_TYPES.get( 

390 self.xobj.atype, "Article de recherche" 

391 ) 

392 

393 self.data["page_range"] = "" 

394 if not self.xobj.page_range: 

395 self.data["page_range"] = "p. " 

396 if self.xobj.fpage is not None: 396 ↛ 398line 396 didn't jump to line 398 because the condition on line 396 was always true

397 self.data["page_range"] += self.xobj.fpage 

398 if self.xobj.fpage and self.xobj.lpage: 

399 self.data["page_range"] += "-" 

400 if self.xobj.lpage is not None: 400 ↛ exitline 400 didn't return from function 'pre_do' because the condition on line 400 was always true

401 self.data["page_range"] += self.xobj.lpage 

402 elif self.xobj.page_range[0] != "p": 402 ↛ exitline 402 didn't return from function 'pre_do' because the condition on line 402 was always true

403 self.data["page_range"] = "p. " + self.xobj.page_range 

404 

405 

406##################################################################### 

407# 

408# addBookPartSolrCmd: adds/remove an book part (similar to an article) 

409# 

410# a book part needs a collection id (array) 

411# 

412###################################################################### 

413class addBookPartSolrCmd(addResourceSolrCmd): 

414 def __init__(self, params={}): 

415 super().__init__(params) 

416 

417 self.fields.extend( 

418 ["page_range", "container_title_tex", "container_title_html", "volume", "number"] 

419 ) 

420 # self.data["dt"] = ["Chapitre de livre"] 

421 

422 def set_container(self, container): 

423 self.data["container_id"] = container.id 

424 self.data["year"] = container.year 

425 self.data["volume"] = make_int(container.volume) 

426 self.data["number"] = make_int(container.number) 

427 self.data["container_title_tex"] = container.title_tex 

428 self.data["container_title_html"] = container.title_html 

429 

430 def pre_do(self): 

431 super().pre_do() 

432 

433 self.data["classname"] = "Chapitre de livre" 

434 

435 self.data["page_range"] = "" 

436 if not self.xobj.page_range: 436 ↛ 444line 436 didn't jump to line 444 because the condition on line 436 was always true

437 self.data["page_range"] = "p. " 

438 if self.xobj.fpage is not None: 438 ↛ 440line 438 didn't jump to line 440 because the condition on line 438 was always true

439 self.data["page_range"] += self.xobj.fpage 

440 if self.xobj.fpage and self.xobj.lpage: 

441 self.data["page_range"] += "-" 

442 if self.xobj.lpage is not None: 442 ↛ exitline 442 didn't return from function 'pre_do' because the condition on line 442 was always true

443 self.data["page_range"] += self.xobj.lpage 

444 elif self.xobj.page_range[0] != "p": 

445 self.data["page_range"] = "p. " + self.xobj.page_range 

446 

447 

448##################################################################### 

449# 

450# solrSearchCmd: 

451# 

452# called from ptf/views.py; SolrRequest(request, q, alias=alias, 

453# site=site, 

454# default={'sort': '-score'}) 

455# 

456# Warning: As of July 2018, only 1 site id is stored in a SolR document 

457# Although the SolR schema is already OK to store multiple sites ("sites" is an array) 

458# no Solr commands have been written to add/remove sites 

459# We only have add commands. 

460# Search only works if the Solr instance is meant for individual or ALL sites 

461# 

462###################################################################### 

463class solrSearchCmd(solrCmd): 

464 def __init__(self, params={}): 

465 # self.q = '*:*' 

466 self.q = "" 

467 self.qs = None 

468 self.filters = [] # TODO: implicit filters 

469 self.start = None 

470 self.rows = None 

471 self.sort = "-score" # use ',' to specify multiple criteria 

472 self.site = None 

473 self.search_path = "" 

474 

475 super().__init__(params) 

476 

477 self.required_params.extend(["qs"]) 

478 

479 def get_q(self, name, value, exclude, first, last): 

480 if name == "all" and value == "*": 

481 return "*:*" 

482 

483 if value == "*": 483 ↛ 484line 483 didn't jump to line 484 because the condition on line 483 was never true

484 value = "" 

485 

486 q = "" 

487 if exclude: 

488 q += "-" 

489 

490 if name == "date": 

491 q += "year:[" + first + " TO " + last + "]" 

492 

493 else: 

494 if name == "author": 

495 q += "au:" 

496 if name == "author_ref": 

497 q += "ar:" 

498 elif name == "title": 498 ↛ 499line 498 didn't jump to line 499 because the condition on line 498 was never true

499 q += "title_tex:" 

500 elif name == "body": 

501 q += "body:" 

502 elif name == "references": 

503 q += "bibitem:" 

504 elif name == "abstract": 504 ↛ 505line 504 didn't jump to line 505 because the condition on line 504 was never true

505 q += "trans_abstract_tex:" 

506 if len(value) > 0 and value[0] == '"' and value[-1] == '"': 

507 q += value 

508 elif name == "kwd": 508 ↛ 509line 508 didn't jump to line 509 because the condition on line 508 was never true

509 terms = value.split() 

510 q += ( 

511 "(kwd:(" 

512 + " AND ".join(terms) 

513 + ") OR trans_kwd:(" 

514 + " AND ".join(terms) 

515 + "))" 

516 ) 

517 else: 

518 terms = value.split() 

519 # new_terms = [ "*{}*".format(t for t in terms)] 

520 q += "(" + " AND ".join(terms) + ")" 

521 

522 return q 

523 

524 def internal_do(self) -> search_helpers.SearchResults: 

525 super().internal_do() 

526 

527 if settings.COLLECTION_PID == "CR": 

528 cr_ids = ["CRMATH", "CRMECA", "CRPHYS", "CRCHIM", "CRGEOS", "CRBIOL"] 

529 ids = [SITE_REGISTER[item.lower()]["site_id"] for item in cr_ids] 

530 self.filters.append(f"sites:[{min(ids)} TO {max(ids)}]") 

531 elif settings.COLLECTION_PID != "ALL": 

532 self.filters.append(f"sites:{settings.SITE_ID}") 

533 

534 sort = "score desc" 

535 if self.sort: 535 ↛ 548line 535 didn't jump to line 548 because the condition on line 535 was always true

536 sorts = [] 

537 sort_array = self.sort.split(",") 

538 for spec in sort_array: 

539 spec = spec.strip() 

540 if spec[0] == "-": 540 ↛ 543line 540 didn't jump to line 543 because the condition on line 540 was always true

541 spec = f"{spec[1:]} desc" 

542 else: 

543 spec = f"{spec} asc" 

544 sorts.append(spec) 

545 sorts.append("year desc") 

546 sort = ", ".join(sorts) 

547 

548 use_ar_facet = True 

549 q = "" 

550 qt = [] 

551 if self.qs: 551 ↛ 559line 551 didn't jump to line 559 because the condition on line 551 was always true

552 for qi in self.qs: 

553 qt.append(qi["name"]) 

554 if qi["name"] == "author_ref": 

555 use_ar_facet = False 

556 if qi["value"] or qi["first"]: 556 ↛ 552line 556 didn't jump to line 552 because the condition on line 556 was always true

557 new_q = self.get_q(qi["name"], qi["value"], qi["not"], qi["first"], qi["last"]) 

558 q += new_q + " " 

559 if q: 559 ↛ 562line 559 didn't jump to line 562 because the condition on line 559 was always true

560 self.q = q 

561 

562 facet_fields = ["collection_title_facet", "msc_facet", "dt", "year_facet"] 

563 

564 if use_ar_facet: 

565 facet_fields.append("ar") 

566 

567 if settings.COLLECTION_PID == "CR": 

568 facet_fields.append("sites") 

569 elif settings.COLLECTION_PID == "PCJ": 569 ↛ 570line 569 didn't jump to line 570 because the condition on line 569 was never true

570 facet_fields.append("classname") 

571 

572 params = { 

573 "q.op": "AND", 

574 "sort": sort, 

575 "facet.field": facet_fields, 

576 # Decades are built manually because we allow the user to 

577 # expand a decade and see individual years 

578 "facet.range": "year_facet", 

579 "f.year_facet.facet.range.start": 0, 

580 "f.year_facet.facet.range.end": 3000, 

581 "f.year_facet.facet.range.gap": 10, 

582 "facet.mincount": 1, 

583 "facet.limit": 100, 

584 "facet.sort": "count", 

585 # 'fl': '*,score', # pour debug 

586 # 'debugQuery': 'true', # pour debug 

587 "hl": "true", 

588 # 'hl.fl': "*", -> par defaut, retourne les champs de qf 

589 "hl.snippets": 1, 

590 "hl.fragsize": 300, 

591 "hl.simple.pre": "<strong>", 

592 "hl.simple.post": "</strong>", 

593 "defType": "edismax", 

594 "tie": 0.1, # si on ne specifie pas, le score est egal au max des scores sur chaque champ : là on 

595 # ajoute 0.1 x le score des autres champs 

596 # "df": 'text', Not used with dismax queries 

597 # We want to retrieve the highlights in both _tex ad _html. 

598 # We need to specify the 2 in qf 

599 "qf": [ 

600 "au^21", 

601 "title_tex^13", 

602 "title_html^13", 

603 "trans_title_tex^13", 

604 "trans_title_html^13", 

605 "abstract_tex^8", 

606 "trans_abstract_tex^8", 

607 "kwd^5", 

608 "trans_kwd^5", 

609 "collection_title_html^3", 

610 "collection_title_tex^3", 

611 "body^2", 

612 "bibitem", 

613 ], 

614 # field ar est multivalué dédié aux facettes 

615 # field au est utilisé pour la recherche et pour l'affichage 

616 # des resultats 

617 } 

618 

619 if self.start: 619 ↛ 620line 619 didn't jump to line 620 because the condition on line 619 was never true

620 params["start"] = self.start 

621 

622 if self.rows: 

623 params["rows"] = self.rows 

624 

625 if self.filters: 

626 params["fq"] = self.filters 

627 

628 solr_results = solrFactory.get_solr().search(self.q, facet="true", **params) 

629 

630 search_results = search_helpers.SearchResults( 

631 solr_results, self.search_path, self.filters, qt, use_ar_facet 

632 ) 

633 

634 return search_results 

635 

636 

637##################################################################### 

638# 

639# solrInternalSearchCmd: 

640# 

641# called from ptf/views.py/book by author 

642# 

643###################################################################### 

644class solrInternalSearchCmd(solrCmd): 

645 def __init__(self, params={}): 

646 self.q = "*:*" 

647 self.qs = None 

648 self.filters = [] # TODO: implicit filters 

649 self.start = None 

650 self.rows = None 

651 self.sort = None # '-score' # use ',' to specify multiple criteria 

652 self.site = None 

653 self.search_path = "" 

654 self.facet_fields = [] 

655 self.facet_limit = 100 

656 self.fl = None 

657 self.create_facets = True 

658 # 10/03/2023 - UNUSED 

659 self.related_articles = False 

660 

661 super().__init__(params) 

662 

663 self.required_params.extend(["q"]) 

664 

665 def internal_do(self) -> search_helpers.SearchInternalResults | pysolr.Results: 

666 super().internal_do() 

667 

668 # 10/03/2023 - UNUSED 

669 if self.site: 669 ↛ 670line 669 didn't jump to line 670 because the condition on line 669 was never true

670 self.fq.append(f"sites:{self.site}") 

671 

672 the_facet_fields = [] 

673 use_year_facet = False 

674 for field in self.facet_fields: 

675 if field == "firstLetter": 

676 the_facet_fields.append("{!ex=firstletter}firstNameFacetLetter") 

677 elif field == "author_facet": 

678 the_facet_fields.append("ar") 

679 else: 

680 the_facet_fields.append(field) 

681 

682 if field == "year_facet": 

683 use_year_facet = True 

684 

685 # 10/03/2023 - UNUSED 

686 if self.related_articles: 686 ↛ 687line 686 didn't jump to line 687

687 params = { 

688 "q.op": "OR", 

689 "hl": "true", 

690 "hl.fl": "title_tex, trans_title_tex, trans_kwd, kwd", 

691 "hl.snippets": 1, 

692 "hl.fragsize": 0, 

693 "hl.simple.pre": "<strong>", 

694 "hl.simple.post": "</strong>", 

695 # "hl.method": "unified" 

696 } 

697 else: 

698 params = { 

699 "q.op": "AND", 

700 # 'fl': '*,score', # pour debug 

701 # 'debugQuery': 'true', # pour debug 

702 "facet.field": the_facet_fields, 

703 # ["{!ex=firstletter}firstNameFacetLetter", 'year_facet', 'collection_title_facet'], 

704 "facet.mincount": 1, 

705 "facet.limit": self.facet_limit, 

706 "facet.sort": "index", 

707 } 

708 

709 if use_year_facet: 709 ↛ 721line 709 didn't jump to line 721 because the condition on line 709 was always true

710 # Decades are built manually because we allow the user to expand a 

711 # decade and see individual years 

712 params.update( 

713 { 

714 "facet.range": "year_facet", 

715 "f.year_facet.facet.range.start": 0, 

716 "f.year_facet.facet.range.end": 3000, 

717 "f.year_facet.facet.range.gap": 10, 

718 } 

719 ) 

720 

721 if self.sort: 721 ↛ 724line 721 didn't jump to line 724 because the condition on line 721 was always true

722 params["sort"] = self.sort 

723 

724 if self.start: 724 ↛ 725line 724 didn't jump to line 725 because the condition on line 724 was never true

725 params["start"] = self.start 

726 

727 if self.rows: 727 ↛ 730line 727 didn't jump to line 730 because the condition on line 727 was always true

728 params["rows"] = self.rows 

729 

730 if self.filters: 

731 params["fq"] = self.filters 

732 

733 if self.fl: 733 ↛ 734line 733 didn't jump to line 734 because the condition on line 733 was never true

734 params["fl"] = self.fl 

735 

736 solr_results = solrFactory.get_solr().search(self.q, facet="true", **params) 

737 results = solr_results 

738 

739 if self.create_facets: 739 ↛ 744line 739 didn't jump to line 744 because the condition on line 739 was always true

740 results = search_helpers.SearchInternalResults( 

741 solr_results, self.search_path, self.filters, self.facet_fields 

742 ) 

743 

744 return results 

745 

746 

747##################################################################### 

748# 

749# solrGetDocumentByPidCmd: 

750# 

751# 

752###################################################################### 

753 

754 

755class solrGetDocumentByPidCmd(solrCmd): 

756 def __init__(self, params={}): 

757 self.pid = None 

758 

759 super().__init__(params) 

760 

761 self.required_params.extend(["pid"]) 

762 

763 def internal_do(self): 

764 super().internal_do() 

765 

766 result = None 

767 

768 search = "pid:" + self.pid 

769 results = solrFactory.get_solr().search(search) 

770 

771 if results is not None: 771 ↛ 777line 771 didn't jump to line 777 because the condition on line 771 was always true

772 docs = results.docs 

773 

774 if docs: 

775 result = docs[0] 

776 

777 return result 

778 

779 

780class updateResourceSolrCmd(solrAddCmd): 

781 """ """ 

782 

783 def __init__(self, params=None): 

784 self.resource = None 

785 

786 super().__init__(params) 

787 self.params = params 

788 

789 def set_resource(self, resource): 

790 self.resource = resource 

791 self.id = resource.id 

792 self.pid = resource.pid 

793 

794 def pre_do(self): 

795 doc = solrGetDocumentByPidCmd({"pid": self.pid}).do() 

796 if doc: 

797 self.data = {**doc, **self.params} 

798 if "_version_" in self.data: 

799 del self.data["_version_"] 

800 if "contributors" in self.data: 

801 solr_add_contributors_to_data(self.data["contributors"], self.data) 

802 self.data.pop("contributors") 

803 # if 'kwd_groups' in self.data: 

804 # solr_add_kwd_groups_to_data(self.data['kwd_groups'], self.data) 

805 # self.data.pop('kwd_groups') 

806 super().pre_do() 

807 

808 

809def research_more_like_this(article): 

810 results = {"docs": []} 

811 doc = solrGetDocumentByPidCmd({"pid": article.pid}).do() 

812 if doc: 

813 # fields = "au,kwd,trans_kwd,title_tex,trans_title_tex,abstract_tex,trans_abstract_tex,body" 

814 fields = settings.MLT_FIELDS if hasattr(settings, "MLT_FIELDS") else "all" 

815 boost = settings.MLT_BOOST if hasattr(settings, "MLT_BOOST") else "true" 

816 min_score = 80 if boost == "true" else 40 

817 min_score = settings.MLT_MIN_SCORE if hasattr(settings, "MLT_MIN_SCORE") else min_score 

818 params = {"debugQuery": "true", "mlt.interestingTerms": "details"} 

819 params.update({"mlt.boost": boost, "fl": "*,score"}) 

820 params.update({"mlt.minwl": 4, "mlt.maxwl": 100}) 

821 params.update({"mlt.mintf": 2, "mlt.mindf": 2}) 

822 params.update({"mlt.maxdfpct": 1, "mlt.maxqt": 50}) 

823 # params.update({"mlt.qf": "trans_kwd^90 title_tex^80 body^1.7"}) 

824 

825 pid = article.pid.split("_")[0] 

826 if pid[:2] == "CR": 

827 # search suggested articles in all CR 

828 params.update({"fq": r"pid:/CR.*/"}) 

829 else: 

830 params.update({"fq": f"pid:/{pid}.*/"}) 

831 

832 solr = solrFactory.get_solr() 

833 similar = solr.more_like_this(q=f'id:{doc["id"]}', mltfl=fields, **params) 

834 params.update({"q": f'id:{doc["id"]}', "mlt.fl": fields}) 

835 params.update({"min_score": min_score}) 

836 results["params"] = dict(sorted(params.items())) 

837 results["docs"] = similar.docs 

838 results["numFound"] = similar.raw_response["response"]["numFound"] 

839 results["interestingTerms"] = similar.raw_response["interestingTerms"] 

840 results["explain"] = similar.debug["explain"] 

841 return results 

842 

843 

844def is_excluded_suggested_article(title): 

845 match = settings.MLT_EXCLUDED_TITLES if hasattr(settings, "MLT_EXCLUDED_TITLES") else [] 

846 start = ( 

847 settings.MLT_EXCLUDED_TITLES_START 

848 if hasattr(settings, "MLT_EXCLUDED_TITLES_START") 

849 else [] 

850 ) 

851 return title.startswith(tuple(start)) or title in match 

852 

853 

854def auto_suggest_doi(suggest, article, results=None): 

855 if not results: 855 ↛ 858line 855 didn't jump to line 858 because the condition on line 855 was always true

856 results = research_more_like_this(article) 

857 

858 if results and suggest.automatic_list: 858 ↛ 867line 858 didn't jump to line 867 because the condition on line 858 was always true

859 doi_list = [] 

860 for item in results["docs"][:3]: 860 ↛ 861line 860 didn't jump to line 861 because the loop on line 860 never started

861 if item["score"] > results["params"]["min_score"]: 

862 doi = item.get("doi", "") 

863 title = item.get("title_tex", "") 

864 if doi not in doi_list and not is_excluded_suggested_article(title): 

865 doi_list.append(doi) 

866 suggest.doi_list = "\n".join(doi_list) 

867 return results