135 lines
		
	
	
		
			3.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			135 lines
		
	
	
		
			3.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import frappe
 | |
| from frappe.search.full_text_search import FullTextSearch
 | |
| from frappe.utils import strip_html_tags
 | |
| from whoosh.analysis import StemmingAnalyzer
 | |
| from whoosh.fields import ID, KEYWORD, TEXT, Schema
 | |
| from whoosh.qparser import FieldsPlugin, MultifieldParser, WildcardPlugin
 | |
| from whoosh.query import Prefix
 | |
| 
 | |
| # TODO: Make obsolete
 | |
| INDEX_NAME = "products"
 | |
| 
 | |
| 
 | |
| class ProductSearch(FullTextSearch):
 | |
| 	"""Wrapper for WebsiteSearch"""
 | |
| 
 | |
| 	def get_schema(self):
 | |
| 		return Schema(
 | |
| 			title=TEXT(stored=True, field_boost=1.5),
 | |
| 			name=ID(stored=True),
 | |
| 			path=ID(stored=True),
 | |
| 			content=TEXT(stored=True, analyzer=StemmingAnalyzer()),
 | |
| 			keywords=KEYWORD(stored=True, scorable=True, commas=True),
 | |
| 		)
 | |
| 
 | |
| 	def get_id(self):
 | |
| 		return "name"
 | |
| 
 | |
| 	def get_items_to_index(self):
 | |
| 		"""Get all routes to be indexed, this includes the static pages
 | |
| 		in www/ and routes from published documents
 | |
| 
 | |
| 		Returns:
 | |
| 		        self (object): FullTextSearch Instance
 | |
| 		"""
 | |
| 		items = get_all_published_items()
 | |
| 		documents = [self.get_document_to_index(item) for item in items]
 | |
| 		return documents
 | |
| 
 | |
| 	def get_document_to_index(self, item):
 | |
| 		try:
 | |
| 			item = frappe.get_doc("Item", item)
 | |
| 			title = item.item_name
 | |
| 			keywords = [item.item_group]
 | |
| 
 | |
| 			if item.brand:
 | |
| 				keywords.append(item.brand)
 | |
| 
 | |
| 			if item.website_image_alt:
 | |
| 				keywords.append(item.website_image_alt)
 | |
| 
 | |
| 			if item.has_variants and item.variant_based_on == "Item Attribute":
 | |
| 				keywords = keywords + [attr.attribute for attr in item.attributes]
 | |
| 
 | |
| 			if item.web_long_description:
 | |
| 				content = strip_html_tags(item.web_long_description)
 | |
| 			elif item.description:
 | |
| 				content = strip_html_tags(item.description)
 | |
| 
 | |
| 			return frappe._dict(
 | |
| 				title=title,
 | |
| 				name=item.name,
 | |
| 				path=item.route,
 | |
| 				content=content,
 | |
| 				keywords=", ".join(keywords),
 | |
| 			)
 | |
| 		except Exception:
 | |
| 			pass
 | |
| 
 | |
| 	def search(self, text, scope=None, limit=20):
 | |
| 		"""Search from the current index
 | |
| 
 | |
| 		Args:
 | |
| 		        text (str): String to search for
 | |
| 		        scope (str, optional): Scope to limit the search. Defaults to None.
 | |
| 		        limit (int, optional): Limit number of search results. Defaults to 20.
 | |
| 
 | |
| 		Returns:
 | |
| 		        [List(_dict)]: Search results
 | |
| 		"""
 | |
| 		ix = self.get_index()
 | |
| 
 | |
| 		results = None
 | |
| 		out = []
 | |
| 
 | |
| 		with ix.searcher() as searcher:
 | |
| 			parser = MultifieldParser(["title", "content", "keywords"], ix.schema)
 | |
| 			parser.remove_plugin_class(FieldsPlugin)
 | |
| 			parser.remove_plugin_class(WildcardPlugin)
 | |
| 			query = parser.parse(text)
 | |
| 
 | |
| 			filter_scoped = None
 | |
| 			if scope:
 | |
| 				filter_scoped = Prefix(self.id, scope)
 | |
| 			results = searcher.search(query, limit=limit, filter=filter_scoped)
 | |
| 
 | |
| 			for r in results:
 | |
| 				out.append(self.parse_result(r))
 | |
| 
 | |
| 		return out
 | |
| 
 | |
| 	def parse_result(self, result):
 | |
| 		title_highlights = result.highlights("title")
 | |
| 		content_highlights = result.highlights("content")
 | |
| 		keyword_highlights = result.highlights("keywords")
 | |
| 
 | |
| 		return frappe._dict(
 | |
| 			title=result["title"],
 | |
| 			path=result["path"],
 | |
| 			keywords=result["keywords"],
 | |
| 			title_highlights=title_highlights,
 | |
| 			content_highlights=content_highlights,
 | |
| 			keyword_highlights=keyword_highlights,
 | |
| 		)
 | |
| 
 | |
| 
 | |
| def get_all_published_items():
 | |
| 	return frappe.get_all(
 | |
| 		"Website Item", filters={"variant_of": "", "published": 1}, pluck="item_code"
 | |
| 	)
 | |
| 
 | |
| 
 | |
| def update_index_for_path(path):
 | |
| 	search = ProductSearch(INDEX_NAME)
 | |
| 	return search.update_index_by_name(path)
 | |
| 
 | |
| 
 | |
| def remove_document_from_index(path):
 | |
| 	search = ProductSearch(INDEX_NAME)
 | |
| 	return search.remove_document_from_index(path)
 | |
| 
 | |
| 
 | |
| def build_index_for_all_routes():
 | |
| 	search = ProductSearch(INDEX_NAME)
 | |
| 	return search.build()
 |