From 4364fb9628854a27942304e55c26f62d916cd46c Mon Sep 17 00:00:00 2001 From: marination Date: Wed, 17 May 2023 19:45:03 +0530 Subject: [PATCH] feat: Optional Fuzzy Matching & Skip Matches for multiple similar matches - Fuzzy matching can be enabled optionally in the settings - If a query gets multiple matches with the same score, do not set a party as it is an extremely close call - misc: Add 'cancelled' status to Bank transaction - Test for skipping matching with extremely close matches --- .../accounts_settings/accounts_settings.json | 13 +++- .../bank_transaction/auto_match_party.py | 77 ++++++++++++++----- .../bank_transaction/bank_transaction.json | 4 +- .../bank_transaction/test_auto_match_party.py | 18 +++++ 4 files changed, 88 insertions(+), 24 deletions(-) diff --git a/erpnext/accounts/doctype/accounts_settings/accounts_settings.json b/erpnext/accounts/doctype/accounts_settings/accounts_settings.json index ff07de3467..05f1169d96 100644 --- a/erpnext/accounts/doctype/accounts_settings/accounts_settings.json +++ b/erpnext/accounts/doctype/accounts_settings/accounts_settings.json @@ -64,7 +64,8 @@ "tab_break_dpet", "show_balance_in_coa", "banking_tab", - "enable_party_matching" + "enable_party_matching", + "enable_fuzzy_matching" ], "fields": [ { @@ -404,6 +405,14 @@ "fieldname": "enable_party_matching", "fieldtype": "Check", "label": "Enable Automatic Party Matching" + }, + { + "default": "0", + "depends_on": "enable_party_matching", + "description": "Approximately match the description/party name against parties", + "fieldname": "enable_fuzzy_matching", + "fieldtype": "Check", + "label": "Enable Fuzzy Matching" } ], "icon": "icon-cog", @@ -411,7 +420,7 @@ "index_web_pages_for_search": 1, "issingle": 1, "links": [], - "modified": "2023-04-21 13:11:37.130743", + "modified": "2023-05-17 12:20:04.107641", "modified_by": "Administrator", "module": "Accounts", "name": "Accounts Settings", diff --git a/erpnext/accounts/doctype/bank_transaction/auto_match_party.py b/erpnext/accounts/doctype/bank_transaction/auto_match_party.py index 79d52c6565..753f0c1171 100644 --- a/erpnext/accounts/doctype/bank_transaction/auto_match_party.py +++ b/erpnext/accounts/doctype/bank_transaction/auto_match_party.py @@ -35,13 +35,15 @@ class AutoMatchParty: return self.__dict__.get(key, None) def match(self) -> Union[Tuple, None]: + result = None result = AutoMatchbyAccountIBAN( bank_party_account_number=self.bank_party_account_number, bank_party_iban=self.bank_party_iban, deposit=self.deposit, ).match() - if not result: + fuzzy_matching_enabled = frappe.db.get_single_value("Accounts Settings", "enable_fuzzy_matching") + if not result and fuzzy_matching_enabled: result = AutoMatchbyPartyDescription( bank_party_name=self.bank_party_name, description=self.description, deposit=self.deposit ).match() @@ -184,31 +186,66 @@ class AutoMatchbyPartyDescription: for party in parties: name_field = party.lower() + "_name" filters = {"status": "Active"} if party == "Employee" else {"disabled": 0} - names = frappe.get_all(party, filters=filters, pluck=name_field) for field in ["bank_party_name", "description"]: - if not result and self.get(field): - result = self.fuzzy_search_and_return_result(party, names, field) - if result: - break + if not self.get(field): + continue + + result, skip = self.fuzzy_search_and_return_result(party, names, field) + if result or skip: + break + + if result or skip: + # We skip if: + # If it was hard to distinguish between close matches and so match is None + # OR if the right match was found + break return result def fuzzy_search_and_return_result(self, party, names, field) -> Union[Tuple, None]: - result = process.extractOne(query=self.get(field), choices=names, scorer=fuzz.token_set_ratio) + skip = False - if result: - party_name, score, index = result - if score > 75: - # Dont set description as a key in Bank Party Mapper due to its volatility - mapper = {"bank_party_name": self.get(field)} if field == "bank_party_name" else None - return ( - party, - party_name, - mapper, - ) - else: - return None + result = process.extract(query=self.get(field), choices=names, scorer=fuzz.token_set_ratio) + party_name, skip = self.process_fuzzy_result(result) - return result + if not party_name: + return None, skip + + # Dont set description as a key in Bank Party Mapper due to its volatility + mapper = {"bank_party_name": self.get(field)} if field == "bank_party_name" else None + return ( + party, + party_name, + mapper, + ), skip + + def process_fuzzy_result(self, result: Union[list, None]): + """ + If there are multiple valid close matches return None as result may be faulty. + Return the result only if one accurate match stands out. + + Returns: Result, Skip (whether or not to continue matching) + """ + PARTY, SCORE, CUTOFF = 0, 1, 80 + + if not result or not len(result): + return None, False + + first_result = result[0] + + if len(result) == 1: + return (result[0][PARTY] if first_result[SCORE] > CUTOFF else None), True + + second_result = result[1] + + if first_result[SCORE] > CUTOFF: + # If multiple matches with the same score, return None but discontinue matching + # Matches were found but were too closes to distinguish between + if first_result[SCORE] == second_result[SCORE]: + return None, True + + return first_result[PARTY], True + else: + return None, False diff --git a/erpnext/accounts/doctype/bank_transaction/bank_transaction.json b/erpnext/accounts/doctype/bank_transaction/bank_transaction.json index d3dc5b5845..e7de71aef4 100644 --- a/erpnext/accounts/doctype/bank_transaction/bank_transaction.json +++ b/erpnext/accounts/doctype/bank_transaction/bank_transaction.json @@ -68,7 +68,7 @@ "fieldtype": "Select", "in_standard_filter": 1, "label": "Status", - "options": "\nPending\nSettled\nUnreconciled\nReconciled" + "options": "\nPending\nSettled\nUnreconciled\nReconciled\nCancelled" }, { "fieldname": "bank_account", @@ -238,7 +238,7 @@ ], "is_submittable": 1, "links": [], - "modified": "2023-04-04 15:47:20.620006", + "modified": "2023-05-17 14:56:10.547480", "modified_by": "Administrator", "module": "Accounts", "name": "Bank Transaction", diff --git a/erpnext/accounts/doctype/bank_transaction/test_auto_match_party.py b/erpnext/accounts/doctype/bank_transaction/test_auto_match_party.py index 2f94516ac2..8c6dc9d766 100644 --- a/erpnext/accounts/doctype/bank_transaction/test_auto_match_party.py +++ b/erpnext/accounts/doctype/bank_transaction/test_auto_match_party.py @@ -13,11 +13,13 @@ class TestAutoMatchParty(FrappeTestCase): def setUpClass(cls): create_bank_account() frappe.db.set_single_value("Accounts Settings", "enable_party_matching", 1) + frappe.db.set_single_value("Accounts Settings", "enable_fuzzy_matching", 1) return super().setUpClass() @classmethod def tearDownClass(cls): frappe.db.set_single_value("Accounts Settings", "enable_party_matching", 0) + frappe.db.set_single_value("Accounts Settings", "enable_fuzzy_matching", 0) def test_match_by_account_number(self): """Test if transaction matches with existing (Bank Party Mapper) or new match.""" @@ -145,6 +147,22 @@ class TestAutoMatchParty(FrappeTestCase): self.assertEqual(bank_party_mapper.bank_party_name, "Amazn Co.") self.assertEqual(doc_2.party, "Amazon") + def test_skip_match_if_multiple_close_results(self): + create_supplier_for_match(supplier_name="Adithya Medical & General Stores") + create_supplier_for_match(supplier_name="Adithya Medical And General Stores") + + doc = create_bank_transaction( + description="Paracetamol Consignment, SINV-0009", + withdrawal=24.85, + transaction_id="3a1da4ee2dc5a980138d56ef3460cbd9", + party_name="Adithya Medical & General", + ) + + # Mapping is skipped as both Supplier names have the same match score + self.assertEqual(doc.party_type, None) + self.assertEqual(doc.party, None) + self.assertFalse(doc.bank_party_mapper) + def create_supplier_for_match(supplier_name="John Doe & Co.", iban=None, account_no=None): if frappe.db.exists("Supplier", {"supplier_name": supplier_name}):