Coverage for src/ptf/cmds/solr

1import pysolr

3from django.conf import settings

5from ptf.cmds.base_cmds import baseCmd

6from ptf.cmds.base_cmds import make_int

7from ptf.display import resolver

8from ptf.site_register import SITE_REGISTER

9from ptf.solr import search_helpers

10from ptf.utils import get_display_name

12# Not used so far.

13# nlm2solr use normalize-space for volume and volume-series,

14# but make_int is called to convert into int: spaces are also trimmed

15# def normalize_whitespace(str):

16# import re

17# str = str.strip()

18# str = re.sub(r'\s+', ' ', str)

19# return str

22class solrFactory:

23 solr = None

24 solr_url = None

26 @staticmethod

27 def get_solr():

28 if solrFactory.solr is None:

29 if solrFactory.solr_url is None:

30 solrFactory.solr_url = settings.SOLR_URL

31 solrFactory.solr = pysolr.Solr(solrFactory.solr_url, timeout=10)

32 return solrFactory.solr

34 @staticmethod

35 def do_solr_commit():

36 if hasattr(settings, "IGNORE_SOLR") and settings.IGNORE_SOLR: 36 ↛ 37line 36 didn't jump to line 37 because the condition on line 36 was never true

37 return

39 solr = solrFactory.get_solr()

40 solr.commit()

42 @staticmethod

43 def do_solr_rollback():

44 if hasattr(settings, "IGNORE_SOLR") and settings.IGNORE_SOLR: 44 ↛ 45line 44 didn't jump to line 45 because the condition on line 44 was never true

45 return

47 solr = solrFactory.get_solr()

48 msg = "<rollback />"

49 solr._update(msg)

51 @staticmethod

52 def reset():

53 if solrFactory.solr: 53 ↛ 55line 53 didn't jump to line 55 because the condition on line 53 was always true

54 solrFactory.solr.get_session().close()

55 solrFactory.solr = None

58def solr_add_contributors_to_data(contributors, data):

59 if contributors is not None: 59 ↛ exitline 59 didn't return from function 'solr_add_contributors_to_data' because the condition on line 59 was always true

60 author_names = []

61 ar = []

62 aul = []

64 for c in contributors:

65 if c["role"] in ["author", "editor", "translator"]: 65 ↛ 64line 65 didn't jump to line 64 because the condition on line 65 was always true

66 display_name = get_display_name(

67 "", c["first_name"], c["last_name"], "", c["string_name"]

68 )

69 ref_name = c["mid"] if c["mid"] else display_name

71 if ref_name: 71 ↛ 73line 71 didn't jump to line 73 because the condition on line 71 was always true

72 ar.append(ref_name)

73 if display_name: 73 ↛ 75line 73 didn't jump to line 75 because the condition on line 73 was always true

74 author_names.append(display_name)

75 if c["last_name"]:

76 aul.append(c["last_name"])

78 data["au"] = "; ".join(author_names)

79 # auteurs de references

80 data["ar"] = ar

81 # Surnames / Lastnames / Nom de famille

82 data["aul"] = aul

84 if author_names:

85 data["fau"] = author_names[0]

88def solr_add_kwds_to_data(kwds, data):

89 data["kwd"] = ", ".join(

90 [kwd["value"] for kwd in kwds if kwd["type"] != "msc" and kwd["lang"] == "fr"]

91 )

92 data["trans_kwd"] = ", ".join(

93 [kwd["value"] for kwd in kwds if kwd["type"] != "msc" and kwd["lang"] != "fr"]

94 )

95 data["msc"] = [kwd["value"].upper() for kwd in kwds if kwd["type"] == "msc"]

98#####################################################################

99#

100# solrCmd: base class for Solr commands

101#

102######################################################################

103class solrCmd(baseCmd):

104 def __init__(self, params={}):

105 super().__init__(params)

106

107 def do(self, parent=None):

108 if hasattr(settings, "IGNORE_SOLR") and settings.IGNORE_SOLR: 108 ↛ 109line 108 didn't jump to line 109 because the condition on line 108 was never true

109 return None

110

111 return super().do(parent)

112

113 def post_do(self, resource=None):

114 super().post_do(resource)

115

116 def undo(self):

117 if hasattr(settings, "IGNORE_SOLR") and settings.IGNORE_SOLR: 117 ↛ 118line 117 didn't jump to line 118 because the condition on line 117 was never true

118 return None

119

120 return super().undo()

121

122

123#####################################################################

124#

125# solrDeleteCmd: generic to delete Solr documents, based on a query

126#

127######################################################################

128class solrDeleteCmd(solrCmd):

129 def __init__(self, params={}):

130 self.commit = True

131 self.q = None

132

133 super().__init__(params)

134

135 self.required_params.extend(["q"])

136

137 def internal_do(self):

138 super().internal_do()

139

140 solrFactory.get_solr().delete(q=self.q, commit=self.commit)

141

142 return None

143

144

145#####################################################################

146#

147# solrAddCmd: base class for Solr Add commands

148#

149######################################################################

150class solrAddCmd(solrCmd):

151 def __init__(self, params={}):

152 self.commit = True

153 self.db_obj = None

154 self.id = None

155 self.pid = None

156 self.data = {}

157

158 super().__init__(params)

159

160 self.required_params.extend(["id", "pid"])

161 self.required_delete_params.extend(["id"])

162

163 def pre_do(self):

164 super().pre_do()

165

166 self.data["id"] = self.id

167 self.data["pid"] = self.pid

168 # parfois, lors d'erreur et/ou upload simultané, il y a plusieurs enregistrement pour un PID

169 # pour éviter d'avoir +sieurs résultats de recherche pour un PID, on supprime tout avant le internal_do

170 cmd = solrDeleteCmd({"q": "pid:" + self.pid})

171 cmd.do()

172

173 def internal_do(self):

174 super().internal_do()

175

176 datas = [self.data]

177

178 solrFactory.get_solr().add(docs=datas, commit=self.commit)

179

180 return None

181

182 def internal_undo(self):

183 id = super().internal_undo()

184

185 solrFactory.get_solr().delete(id=self.id, commit=self.commit)

186

187 return id

188

189

190#####################################################################

191#

192# addResourceSolrCmd: base class for solrAddCmds adding a Resource

193#

194######################################################################

195class addResourceSolrCmd(solrAddCmd):

196 def __init__(self, params={}):

197 self.xobj = None # model_data object

198

199 # fields of the xobj to pass to SolR

200 self.fields = [

201 "lang",

202 "doi",

203 "title_tex",

204 "title_html",

205 "trans_title_tex",

206 "trans_title_html",

207 "abstract_tex",

208 "abstract_html",

209 "trans_abstract_tex",

210 "trans_abstract_html",

211 "collection_title_tex",

212 "collection_title_html",

213 "collection_id",

214 "year",

215 "body",

216 "bibitem",

217 ]

218

219 # Used to filter the articles based on their site

220 self.sites = None

221

222 super().__init__(params)

223

224 self.required_params.extend(["xobj"])

225

226 def add_collection(self, collection):

227 self.data["collection_id"] = collection.id

228

229 if "collection_title_tex" not in self.data: 229 ↛ 232line 229 didn't jump to line 232 because the condition on line 229 was always true

230 self.data["collection_title_tex"] = [collection.title_tex]

231 else:

232 self.data["collection_title_tex"].append(collection.title_tex)

233

234 if "collection_title_html" not in self.data: 234 ↛ 237line 234 didn't jump to line 237 because the condition on line 234 was always true

235 self.data["collection_title_html"] = [collection.title_html]

236 else:

237 self.data["collection_title_html"].append(collection.title_html)

238

239 # classname is used only by PCJ for the article types

240 if collection.coltype == "journal":

241 self.data["dt"] = ["Article de revue"]

242 elif collection.coltype == "acta":

243 self.data["dt"] = ["Acte de séminaire"]

244 elif collection.coltype == "thesis": 244 ↛ 245line 244 didn't jump to line 245 because the condition on line 244 was never true

245 self.data["classname"] = "Thèse"

246 self.data["dt"] = ["Thèse"]

247 elif collection.coltype == "lecture-notes": 247 ↛ 248line 247 didn't jump to line 248 because the condition on line 247 was never true

248 self.data["classname"] = "Notes de cours"

249 self.data["dt"] = ["Notes de cours"]

250 elif collection.coltype == "proceeding": 250 ↛ 251line 250 didn't jump to line 251 because the condition on line 250 was never true

251 self.data["classname"] = "Acte de rencontre"

252 self.data["dt"] = ["Acte de rencontre"]

253 else:

254 self.data["classname"] = "Livre"

255 self.data["dt"] = ["Livre"]

256

257 def add_abstracts_to_data(self):

258 for abstract in self.xobj.abstracts:

259 lang = abstract["lang"]

260

261 for field_type in ["tex", "html"]:

262 abstract_field = "value_" + field_type

263 field_name = "abstract_" + field_type

264 if lang != "fr":

265 field_name = "trans_" + field_name

266

267 self.data[field_name] = abstract[abstract_field]

268

269 def add_year_to_data(self, year):

270 if year:

271 years = str(year).split("-")

272 if len(years) > 1:

273 self.data["year_facet"] = int(years[1])

274 else:

275 self.data["year_facet"] = int(year)

276

277 def pre_do(self):

278 super().pre_do()

279

280 for field in self.fields:

281 if hasattr(self.xobj, field):

282 self.data[field] = getattr(self.xobj, field)

283

284 self.add_abstracts_to_data()

285 solr_add_kwds_to_data(self.xobj.kwds, self.data)

286 solr_add_contributors_to_data(self.xobj.contributors, self.data)

287

288 if "dt" not in self.data: 288 ↛ 289line 288 didn't jump to line 289 because the condition on line 288 was never true

289 raise ValueError(f"add SolR resource without dt - {self.xobj.pid}")

290

291 # year either comes directly from xobj (container) or from set_container

292 self.add_year_to_data(self.data["year"])

293

294 if self.db_obj is not None:

295 solr_fields = {

296 "application/pdf": "pdf",

297 "image/x.djvu": "djvu",

298 "application/x-tex": "tex",

299 }

300 for stream in self.xobj.streams:

301 mimetype = stream["mimetype"]

302 if mimetype in solr_fields:

303 href = self.db_obj.get_binary_file_href_full_path(

304 "self", mimetype, stream["location"]

305 )

306 self.data[solr_fields[mimetype]] = href

307

308 if self.db_obj is not None:

309 self.data["wall"] = self.db_obj.get_wall()

310

311 if self.sites:

312 self.data["sites"] = self.sites

313 else:

314 self.data["sites"] = [settings.SITE_ID]

315

316

317#####################################################################

318#

319# addContainerSolrCmd: adds/remove a container (issue/book)

320#

321# A container needs a collection (collection_title_tex etc.)

322#

323######################################################################

324class addContainerSolrCmd(addResourceSolrCmd):

325 def __init__(self, params={}):

326 super().__init__(params)

327

328 self.fields.extend(["ctype"])

329 # self.data["dt"] = ["Livre"]

330

331 def pre_do(self):

332 super().pre_do()

333

334 for field in ["volume", "number", "vseries"]:

335 if hasattr(self.xobj, field):

336 self.data["volume"] = make_int(getattr(self.xobj, field))

337

338 if hasattr(self.xobj, "incollection") and len(self.xobj.incollection) > 0:

339 incol = self.xobj.incollection[0]

340 self.data["vseries"] = make_int(incol.vseries)

341 self.data["volume"] = 0

342 self.data["number"] = make_int(incol.volume)

343

344 # if incol.coltype == "theses":

345 # self.data["dt"] = ["Thèse"]

346

347

348#####################################################################

349#

350# addArticleSolrCmd: adds/remove an article

351#

352# an article needs a container (container_id) that needs a collection (collection_id)

353#

354######################################################################

355

356

357class addArticleSolrCmd(addResourceSolrCmd):

358 def __init__(self, params={}):

359 super().__init__(params)

360

361 self.fields.extend(

362 ["page_range", "container_id", "volume", "number", "vseries", "article_number"]

363 )

364 # self.data["dt"] = ["Article"]

365

366 def set_container(self, container):

367 self.data["container_id"] = container.id

368 self.data["year"] = container.year

369 self.data["vseries"] = make_int(container.vseries)

370 self.data["volume"] = make_int(container.volume)

371 self.data["number"] = make_int(container.number)

372

373 def set_eprint(self, eprint):

374 self.data["dt"].append("e-print")

375

376 def set_source(self, source):

377 pass

378

379 def set_thesis(self, thesis):

380 self.data["dt"].append("thesis")

381

382 def set_original_article(self, article):

383 # TODO Replace some data (ie doi, pid) with the original article

384 pass

385

386 def pre_do(self):

387 super().pre_do()

388

389 self.data["classname"] = resolver.ARTICLE_TYPES.get(

390 self.xobj.atype, "Article de recherche"

391 )

392

393 self.data["page_range"] = ""

394 if not self.xobj.page_range:

395 self.data["page_range"] = "p. "

396 if self.xobj.fpage is not None: 396 ↛ 398line 396 didn't jump to line 398 because the condition on line 396 was always true

397 self.data["page_range"] += self.xobj.fpage

398 if self.xobj.fpage and self.xobj.lpage:

399 self.data["page_range"] += "-"

400 if self.xobj.lpage is not None: 400 ↛ exitline 400 didn't return from function 'pre_do' because the condition on line 400 was always true

401 self.data["page_range"] += self.xobj.lpage

402 elif self.xobj.page_range[0] != "p": 402 ↛ exitline 402 didn't return from function 'pre_do' because the condition on line 402 was always true

403 self.data["page_range"] = "p. " + self.xobj.page_range

404

405

406#####################################################################

407#

408# addBookPartSolrCmd: adds/remove an book part (similar to an article)

409#

410# a book part needs a collection id (array)

411#

412######################################################################

413class addBookPartSolrCmd(addResourceSolrCmd):

414 def __init__(self, params={}):

415 super().__init__(params)

416

417 self.fields.extend(

418 ["page_range", "container_title_tex", "container_title_html", "volume", "number"]

419 )

420 # self.data["dt"] = ["Chapitre de livre"]

421

422 def set_container(self, container):

423 self.data["container_id"] = container.id

424 self.data["year"] = container.year

425 self.data["volume"] = make_int(container.volume)

426 self.data["number"] = make_int(container.number)

427 self.data["container_title_tex"] = container.title_tex

428 self.data["container_title_html"] = container.title_html

429

430 def pre_do(self):

431 super().pre_do()

432

433 self.data["classname"] = "Chapitre de livre"

434

435 self.data["page_range"] = ""

436 if not self.xobj.page_range: 436 ↛ 444line 436 didn't jump to line 444 because the condition on line 436 was always true

437 self.data["page_range"] = "p. "

438 if self.xobj.fpage is not None: 438 ↛ 440line 438 didn't jump to line 440 because the condition on line 438 was always true

439 self.data["page_range"] += self.xobj.fpage

440 if self.xobj.fpage and self.xobj.lpage:

441 self.data["page_range"] += "-"

442 if self.xobj.lpage is not None: 442 ↛ exitline 442 didn't return from function 'pre_do' because the condition on line 442 was always true

443 self.data["page_range"] += self.xobj.lpage

444 elif self.xobj.page_range[0] != "p":

445 self.data["page_range"] = "p. " + self.xobj.page_range

446

447

448#####################################################################

449#

450# solrSearchCmd:

451#

452# called from ptf/views.py; SolrRequest(request, q, alias=alias,

453# site=site,

454# default={'sort': '-score'})

455#

456# Warning: As of July 2018, only 1 site id is stored in a SolR document

457# Although the SolR schema is already OK to store multiple sites ("sites" is an array)

458# no Solr commands have been written to add/remove sites

459# We only have add commands.

460# Search only works if the Solr instance is meant for individual or ALL sites

461#

462######################################################################

463class solrSearchCmd(solrCmd):

464 def __init__(self, params={}):

465 # self.q = '*:*'

466 self.q = ""

467 self.qs = None

468 self.filters = [] # TODO: implicit filters

469 self.start = None

470 self.rows = None

471 self.sort = "-score" # use ',' to specify multiple criteria

472 self.site = None

473 self.search_path = ""

474

475 super().__init__(params)

476

477 self.required_params.extend(["qs"])

478

479 def get_q(self, name, value, exclude, first, last):

480 if name == "all" and value == "*":

481 return "*:*"

482

483 if value == "*": 483 ↛ 484line 483 didn't jump to line 484 because the condition on line 483 was never true

484 value = ""

485

486 q = ""

487 if exclude:

488 q += "-"

489

490 if name == "date":

491 q += "year:[" + first + " TO " + last + "]"

492

493 else:

494 if name == "author":

495 q += "au:"

496 if name == "author_ref":

497 q += "ar:"

498 elif name == "title": 498 ↛ 499line 498 didn't jump to line 499 because the condition on line 498 was never true

499 q += "title_tex:"

500 elif name == "body":

501 q += "body:"

502 elif name == "references":

503 q += "bibitem:"

504 elif name == "abstract": 504 ↛ 505line 504 didn't jump to line 505 because the condition on line 504 was never true

505 q += "trans_abstract_tex:"

506 if len(value) > 0 and value[0] == '"' and value[-1] == '"':

507 q += value

508 elif name == "kwd": 508 ↛ 509line 508 didn't jump to line 509 because the condition on line 508 was never true

509 terms = value.split()

510 q += (

511 "(kwd:("

512 + " AND ".join(terms)

513 + ") OR trans_kwd:("

514 + " AND ".join(terms)

515 + "))"

516 )

517 else:

518 terms = value.split()

519 # new_terms = [ "*{}*".format(t for t in terms)]

520 q += "(" + " AND ".join(terms) + ")"

521

522 return q

523

524 def internal_do(self) -> search_helpers.SearchResults:

525 super().internal_do()

526

527 if settings.COLLECTION_PID == "CR":

528 cr_ids = ["CRMATH", "CRMECA", "CRPHYS", "CRCHIM", "CRGEOS", "CRBIOL"]

529 ids = [SITE_REGISTER[item.lower()]["site_id"] for item in cr_ids]

530 self.filters.append(f"sites:[{min(ids)} TO {max(ids)}]")

531 elif settings.COLLECTION_PID != "ALL":

532 self.filters.append(f"sites:{settings.SITE_ID}")

533

534 sort = "score desc"

535 if self.sort: 535 ↛ 548line 535 didn't jump to line 548 because the condition on line 535 was always true

536 sorts = []

537 sort_array = self.sort.split(",")

538 for spec in sort_array:

539 spec = spec.strip()

540 if spec[0] == "-": 540 ↛ 543line 540 didn't jump to line 543 because the condition on line 540 was always true

541 spec = f"{spec[1:]} desc"

542 else:

543 spec = f"{spec} asc"

544 sorts.append(spec)

545 sorts.append("year desc")

546 sort = ", ".join(sorts)

547

548 use_ar_facet = True

549 q = ""

550 qt = []

551 if self.qs: 551 ↛ 559line 551 didn't jump to line 559 because the condition on line 551 was always true

552 for qi in self.qs:

553 qt.append(qi["name"])

554 if qi["name"] == "author_ref":

555 use_ar_facet = False

556 if qi["value"] or qi["first"]: 556 ↛ 552line 556 didn't jump to line 552 because the condition on line 556 was always true

557 new_q = self.get_q(qi["name"], qi["value"], qi["not"], qi["first"], qi["last"])

558 q += new_q + " "

559 if q: 559 ↛ 562line 559 didn't jump to line 562 because the condition on line 559 was always true

560 self.q = q

561

562 facet_fields = ["collection_title_facet", "msc_facet", "dt", "year_facet"]

563

564 if use_ar_facet:

565 facet_fields.append("ar")

566

567 if settings.COLLECTION_PID == "CR":

568 facet_fields.append("sites")

569 elif settings.COLLECTION_PID == "PCJ": 569 ↛ 570line 569 didn't jump to line 570 because the condition on line 569 was never true

570 facet_fields.append("classname")

571

572 params = {

573 "q.op": "AND",

574 "sort": sort,

575 "facet.field": facet_fields,

576 # Decades are built manually because we allow the user to

577 # expand a decade and see individual years

578 "facet.range": "year_facet",

579 "f.year_facet.facet.range.start": 0,

580 "f.year_facet.facet.range.end": 3000,

581 "f.year_facet.facet.range.gap": 10,

582 "facet.mincount": 1,

583 "facet.limit": 100,

584 "facet.sort": "count",

585 # 'fl': '*,score', # pour debug

586 # 'debugQuery': 'true', # pour debug

587 "hl": "true",

588 # 'hl.fl': "*", -> par defaut, retourne les champs de qf

589 "hl.snippets": 1,

590 "hl.fragsize": 300,

591 "hl.simple.pre": "<strong>",

592 "hl.simple.post": "</strong>",

593 "defType": "edismax",

594 "tie": 0.1, # si on ne specifie pas, le score est egal au max des scores sur chaque champ : là on

595 # ajoute 0.1 x le score des autres champs

596 # "df": 'text', Not used with dismax queries

597 # We want to retrieve the highlights in both _tex ad _html.

598 # We need to specify the 2 in qf

599 "qf": [

600 "au^21",

601 "title_tex^13",

602 "title_html^13",

603 "trans_title_tex^13",

604 "trans_title_html^13",

605 "abstract_tex^8",

606 "trans_abstract_tex^8",

607 "kwd^5",

608 "trans_kwd^5",

609 "collection_title_html^3",

610 "collection_title_tex^3",

611 "body^2",

612 "bibitem",

613 ],

614 # field ar est multivalué dédié aux facettes

615 # field au est utilisé pour la recherche et pour l'affichage

616 # des resultats

617 }

618

619 if self.start: 619 ↛ 620line 619 didn't jump to line 620 because the condition on line 619 was never true

620 params["start"] = self.start

621

622 if self.rows:

623 params["rows"] = self.rows

624

625 if self.filters:

626 params["fq"] = self.filters

627

628 solr_results = solrFactory.get_solr().search(self.q, facet="true", **params)

629

630 search_results = search_helpers.SearchResults(

631 solr_results, self.search_path, self.filters, qt, use_ar_facet

632 )

633

634 return search_results

635

636

637#####################################################################

638#

639# solrInternalSearchCmd:

640#

641# called from ptf/views.py/book by author

642#

643######################################################################

644class solrInternalSearchCmd(solrCmd):

645 def __init__(self, params={}):

646 self.q = "*:*"

647 self.qs = None

648 self.filters = [] # TODO: implicit filters

649 self.start = None

650 self.rows = None

651 self.sort = None # '-score' # use ',' to specify multiple criteria

652 self.site = None

653 self.search_path = ""

654 self.facet_fields = []

655 self.facet_limit = 100

656 self.fl = None

657 self.create_facets = True

658 # 10/03/2023 - UNUSED

659 self.related_articles = False

660

661 super().__init__(params)

662

663 self.required_params.extend(["q"])

664

665 def internal_do(self) -> search_helpers.SearchInternalResults | pysolr.Results:

666 super().internal_do()

667

668 # 10/03/2023 - UNUSED

669 if self.site: 669 ↛ 670line 669 didn't jump to line 670 because the condition on line 669 was never true

670 self.fq.append(f"sites:{self.site}")

671

672 the_facet_fields = []

673 use_year_facet = False

674 for field in self.facet_fields:

675 if field == "firstLetter":

676 the_facet_fields.append("{!ex=firstletter}firstNameFacetLetter")

677 elif field == "author_facet":

678 the_facet_fields.append("ar")

679 else:

680 the_facet_fields.append(field)

681

682 if field == "year_facet":

683 use_year_facet = True

684

685 # 10/03/2023 - UNUSED

686 if self.related_articles: 686 ↛ 687line 686 didn't jump to line 687

687 params = {

688 "q.op": "OR",

689 "hl": "true",

690 "hl.fl": "title_tex, trans_title_tex, trans_kwd, kwd",

691 "hl.snippets": 1,

692 "hl.fragsize": 0,

693 "hl.simple.pre": "<strong>",

694 "hl.simple.post": "</strong>",

695 # "hl.method": "unified"

696 }

697 else:

698 params = {

699 "q.op": "AND",

700 # 'fl': '*,score', # pour debug

701 # 'debugQuery': 'true', # pour debug

702 "facet.field": the_facet_fields,

703 # ["{!ex=firstletter}firstNameFacetLetter", 'year_facet', 'collection_title_facet'],

704 "facet.mincount": 1,

705 "facet.limit": self.facet_limit,

706 "facet.sort": "index",

707 }

708

709 if use_year_facet: 709 ↛ 721line 709 didn't jump to line 721 because the condition on line 709 was always true

710 # Decades are built manually because we allow the user to expand a

711 # decade and see individual years

712 params.update(

713 {

714 "facet.range": "year_facet",

715 "f.year_facet.facet.range.start": 0,

716 "f.year_facet.facet.range.end": 3000,

717 "f.year_facet.facet.range.gap": 10,

718 }

719 )

720

721 if self.sort: 721 ↛ 724line 721 didn't jump to line 724 because the condition on line 721 was always true

722 params["sort"] = self.sort

723

724 if self.start: 724 ↛ 725line 724 didn't jump to line 725 because the condition on line 724 was never true

725 params["start"] = self.start

726

727 if self.rows: 727 ↛ 730line 727 didn't jump to line 730 because the condition on line 727 was always true

728 params["rows"] = self.rows

729

730 if self.filters:

731 params["fq"] = self.filters

732

733 if self.fl: 733 ↛ 734line 733 didn't jump to line 734 because the condition on line 733 was never true

734 params["fl"] = self.fl

735

736 solr_results = solrFactory.get_solr().search(self.q, facet="true", **params)

737 results = solr_results

738

739 if self.create_facets: 739 ↛ 744line 739 didn't jump to line 744 because the condition on line 739 was always true

740 results = search_helpers.SearchInternalResults(

741 solr_results, self.search_path, self.filters, self.facet_fields

742 )

743

744 return results

745

746

747#####################################################################

748#

749# solrGetDocumentByPidCmd:

750#

751#

752######################################################################

753

754

755class solrGetDocumentByPidCmd(solrCmd):

756 def __init__(self, params={}):

757 self.pid = None

758

759 super().__init__(params)

760

761 self.required_params.extend(["pid"])

762

763 def internal_do(self):

764 super().internal_do()

765

766 result = None

767

768 search = "pid:" + self.pid

769 results = solrFactory.get_solr().search(search)

770

771 if results is not None: 771 ↛ 777line 771 didn't jump to line 777 because the condition on line 771 was always true

772 docs = results.docs

773

774 if docs:

775 result = docs[0]

776

777 return result

778

779

780class updateResourceSolrCmd(solrAddCmd):

781 """ """

782

783 def __init__(self, params=None):

784 self.resource = None

785

786 super().__init__(params)

787 self.params = params

788

789 def set_resource(self, resource):

790 self.resource = resource

791 self.id = resource.id

792 self.pid = resource.pid

793

794 def pre_do(self):

795 doc = solrGetDocumentByPidCmd({"pid": self.pid}).do()

796 if doc:

797 self.data = {**doc, **self.params}

798 if "_version_" in self.data:

799 del self.data["_version_"]

800 if "contributors" in self.data:

801 solr_add_contributors_to_data(self.data["contributors"], self.data)

802 self.data.pop("contributors")

803 # if 'kwd_groups' in self.data:

804 # solr_add_kwd_groups_to_data(self.data['kwd_groups'], self.data)

805 # self.data.pop('kwd_groups')

806 super().pre_do()

807

808

809def research_more_like_this(article):

810 results = {"docs": []}

811 doc = solrGetDocumentByPidCmd({"pid": article.pid}).do()

812 if doc:

813 # fields = "au,kwd,trans_kwd,title_tex,trans_title_tex,abstract_tex,trans_abstract_tex,body"

814 fields = settings.MLT_FIELDS if hasattr(settings, "MLT_FIELDS") else "all"

815 boost = settings.MLT_BOOST if hasattr(settings, "MLT_BOOST") else "true"

816 min_score = 80 if boost == "true" else 40

817 min_score = settings.MLT_MIN_SCORE if hasattr(settings, "MLT_MIN_SCORE") else min_score

818 params = {"debugQuery": "true", "mlt.interestingTerms": "details"}

819 params.update({"mlt.boost": boost, "fl": "*,score"})

820 params.update({"mlt.minwl": 4, "mlt.maxwl": 100})

821 params.update({"mlt.mintf": 2, "mlt.mindf": 2})

822 params.update({"mlt.maxdfpct": 1, "mlt.maxqt": 50})

823 # params.update({"mlt.qf": "trans_kwd^90 title_tex^80 body^1.7"})

824

825 pid = article.pid.split("_")[0]

826 if pid[:2] == "CR":

827 # search suggested articles in all CR

828 params.update({"fq": r"pid:/CR.*/"})

829 else:

830 params.update({"fq": f"pid:/{pid}.*/"})

831

832 solr = solrFactory.get_solr()

833 similar = solr.more_like_this(q=f'id:{doc["id"]}', mltfl=fields, **params)

834 params.update({"q": f'id:{doc["id"]}', "mlt.fl": fields})

835 params.update({"min_score": min_score})

836 results["params"] = dict(sorted(params.items()))

837 results["docs"] = similar.docs

838 results["numFound"] = similar.raw_response["response"]["numFound"]

839 results["interestingTerms"] = similar.raw_response["interestingTerms"]

840 results["explain"] = similar.debug["explain"]

841 return results

842

843

844def is_excluded_suggested_article(title):

845 match = settings.MLT_EXCLUDED_TITLES if hasattr(settings, "MLT_EXCLUDED_TITLES") else []

846 start = (

847 settings.MLT_EXCLUDED_TITLES_START

848 if hasattr(settings, "MLT_EXCLUDED_TITLES_START")

849 else []

850 )

851 return title.startswith(tuple(start)) or title in match

852

853

854def auto_suggest_doi(suggest, article, results=None):

855 if not results: 855 ↛ 858line 855 didn't jump to line 858 because the condition on line 855 was always true

856 results = research_more_like_this(article)

857

858 if results and suggest.automatic_list: 858 ↛ 867line 858 didn't jump to line 867 because the condition on line 858 was always true

859 doi_list = []

860 for item in results["docs"][:3]: 860 ↛ 861line 860 didn't jump to line 861 because the loop on line 860 never started

861 if item["score"] > results["params"]["min_score"]:

862 doi = item.get("doi", "")

863 title = item.get("title_tex", "")

864 if doi not in doi_list and not is_excluded_suggested_article(title):

865 doi_list.append(doi)

866 suggest.doi_list = "\n".join(doi_list)

867 return results

Coverage for src/ptf/cmds/solr_cmds.py: 84%

468 statements