brotherton-erpnext/erpnext/e_commerce/legacy_search.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

128 lines
3.4 KiB
Python
Raw Normal View History

2020-11-19 06:35:56 +00:00
import frappe
from frappe.search.full_text_search import FullTextSearch
from frappe.utils import strip_html_tags
from whoosh.analysis import StemmingAnalyzer
from whoosh.fields import ID, KEYWORD, TEXT, Schema
from whoosh.qparser import FieldsPlugin, MultifieldParser, WildcardPlugin
from whoosh.query import Prefix
# TODO: Make obsolete
2020-11-19 06:35:56 +00:00
INDEX_NAME = "products"
class ProductSearch(FullTextSearch):
""" Wrapper for WebsiteSearch """
def get_schema(self):
return Schema(
title=TEXT(stored=True, field_boost=1.5),
name=ID(stored=True),
path=ID(stored=True),
content=TEXT(stored=True, analyzer=StemmingAnalyzer()),
keywords=KEYWORD(stored=True, scorable=True, commas=True),
)
def get_id(self):
return "name"
def get_items_to_index(self):
"""Get all routes to be indexed, this includes the static pages
in www/ and routes from published documents
Returns:
self (object): FullTextSearch Instance
"""
items = get_all_published_items()
documents = [self.get_document_to_index(item) for item in items]
return documents
def get_document_to_index(self, item):
try:
item = frappe.get_doc("Item", item)
title = item.item_name
keywords = [item.item_group]
if item.brand:
keywords.append(item.brand)
if item.website_image_alt:
keywords.append(item.website_image_alt)
if item.has_variants and item.variant_based_on == "Item Attribute":
keywords = keywords + [attr.attribute for attr in item.attributes]
if item.web_long_description:
content = strip_html_tags(item.web_long_description)
2021-02-01 14:20:27 +00:00
elif item.description:
2020-11-19 06:35:56 +00:00
content = strip_html_tags(item.description)
return frappe._dict(
title=title,
name=item.name,
path=item.route,
content=content,
keywords=", ".join(keywords),
)
except Exception:
pass
def search(self, text, scope=None, limit=20):
"""Search from the current index
Args:
text (str): String to search for
scope (str, optional): Scope to limit the search. Defaults to None.
limit (int, optional): Limit number of search results. Defaults to 20.
Returns:
[List(_dict)]: Search results
"""
ix = self.get_index()
results = None
out = []
with ix.searcher() as searcher:
parser = MultifieldParser(["title", "content", "keywords"], ix.schema)
parser.remove_plugin_class(FieldsPlugin)
parser.remove_plugin_class(WildcardPlugin)
query = parser.parse(text)
filter_scoped = None
if scope:
filter_scoped = Prefix(self.id, scope)
results = searcher.search(query, limit=limit, filter=filter_scoped)
for r in results:
out.append(self.parse_result(r))
return out
def parse_result(self, result):
title_highlights = result.highlights("title")
content_highlights = result.highlights("content")
keyword_highlights = result.highlights("keywords")
return frappe._dict(
title=result["title"],
path=result["path"],
keywords=result["keywords"],
title_highlights=title_highlights,
content_highlights=content_highlights,
keyword_highlights=keyword_highlights,
)
def get_all_published_items():
return frappe.get_all("Website Item", filters={"variant_of": "", "published": 1}, pluck="item_code")
2020-11-19 06:35:56 +00:00
def update_index_for_path(path):
search = ProductSearch(INDEX_NAME)
return search.update_index_by_name(path)
def remove_document_from_index(path):
search = ProductSearch(INDEX_NAME)
return search.remove_document_from_index(path)
def build_index_for_all_routes():
search = ProductSearch(INDEX_NAME)
return search.build()