feat: Optional Fuzzy Matching & Skip Matches for multiple similar matches

- Fuzzy matching can be enabled optionally in the settings
- If a query gets multiple matches with the same score, do not set a party as it is an extremely close call
- misc: Add 'cancelled' status to Bank transaction
- Test for skipping matching with extremely close matches
This commit is contained in:
marination 2023-05-17 19:45:03 +05:30
parent 4a14e9ea4e
commit 4364fb9628
4 changed files with 88 additions and 24 deletions

View File

@ -64,7 +64,8 @@
"tab_break_dpet",
"show_balance_in_coa",
"banking_tab",
"enable_party_matching"
"enable_party_matching",
"enable_fuzzy_matching"
],
"fields": [
{
@ -404,6 +405,14 @@
"fieldname": "enable_party_matching",
"fieldtype": "Check",
"label": "Enable Automatic Party Matching"
},
{
"default": "0",
"depends_on": "enable_party_matching",
"description": "Approximately match the description/party name against parties",
"fieldname": "enable_fuzzy_matching",
"fieldtype": "Check",
"label": "Enable Fuzzy Matching"
}
],
"icon": "icon-cog",
@ -411,7 +420,7 @@
"index_web_pages_for_search": 1,
"issingle": 1,
"links": [],
"modified": "2023-04-21 13:11:37.130743",
"modified": "2023-05-17 12:20:04.107641",
"modified_by": "Administrator",
"module": "Accounts",
"name": "Accounts Settings",

View File

@ -35,13 +35,15 @@ class AutoMatchParty:
return self.__dict__.get(key, None)
def match(self) -> Union[Tuple, None]:
result = None
result = AutoMatchbyAccountIBAN(
bank_party_account_number=self.bank_party_account_number,
bank_party_iban=self.bank_party_iban,
deposit=self.deposit,
).match()
if not result:
fuzzy_matching_enabled = frappe.db.get_single_value("Accounts Settings", "enable_fuzzy_matching")
if not result and fuzzy_matching_enabled:
result = AutoMatchbyPartyDescription(
bank_party_name=self.bank_party_name, description=self.description, deposit=self.deposit
).match()
@ -184,31 +186,66 @@ class AutoMatchbyPartyDescription:
for party in parties:
name_field = party.lower() + "_name"
filters = {"status": "Active"} if party == "Employee" else {"disabled": 0}
names = frappe.get_all(party, filters=filters, pluck=name_field)
for field in ["bank_party_name", "description"]:
if not result and self.get(field):
result = self.fuzzy_search_and_return_result(party, names, field)
if result:
if not self.get(field):
continue
result, skip = self.fuzzy_search_and_return_result(party, names, field)
if result or skip:
break
if result or skip:
# We skip if:
# If it was hard to distinguish between close matches and so match is None
# OR if the right match was found
break
return result
def fuzzy_search_and_return_result(self, party, names, field) -> Union[Tuple, None]:
result = process.extractOne(query=self.get(field), choices=names, scorer=fuzz.token_set_ratio)
skip = False
result = process.extract(query=self.get(field), choices=names, scorer=fuzz.token_set_ratio)
party_name, skip = self.process_fuzzy_result(result)
if not party_name:
return None, skip
if result:
party_name, score, index = result
if score > 75:
# Dont set description as a key in Bank Party Mapper due to its volatility
mapper = {"bank_party_name": self.get(field)} if field == "bank_party_name" else None
return (
party,
party_name,
mapper,
)
else:
return None
), skip
return result
def process_fuzzy_result(self, result: Union[list, None]):
"""
If there are multiple valid close matches return None as result may be faulty.
Return the result only if one accurate match stands out.
Returns: Result, Skip (whether or not to continue matching)
"""
PARTY, SCORE, CUTOFF = 0, 1, 80
if not result or not len(result):
return None, False
first_result = result[0]
if len(result) == 1:
return (result[0][PARTY] if first_result[SCORE] > CUTOFF else None), True
second_result = result[1]
if first_result[SCORE] > CUTOFF:
# If multiple matches with the same score, return None but discontinue matching
# Matches were found but were too closes to distinguish between
if first_result[SCORE] == second_result[SCORE]:
return None, True
return first_result[PARTY], True
else:
return None, False

View File

@ -68,7 +68,7 @@
"fieldtype": "Select",
"in_standard_filter": 1,
"label": "Status",
"options": "\nPending\nSettled\nUnreconciled\nReconciled"
"options": "\nPending\nSettled\nUnreconciled\nReconciled\nCancelled"
},
{
"fieldname": "bank_account",
@ -238,7 +238,7 @@
],
"is_submittable": 1,
"links": [],
"modified": "2023-04-04 15:47:20.620006",
"modified": "2023-05-17 14:56:10.547480",
"modified_by": "Administrator",
"module": "Accounts",
"name": "Bank Transaction",

View File

@ -13,11 +13,13 @@ class TestAutoMatchParty(FrappeTestCase):
def setUpClass(cls):
create_bank_account()
frappe.db.set_single_value("Accounts Settings", "enable_party_matching", 1)
frappe.db.set_single_value("Accounts Settings", "enable_fuzzy_matching", 1)
return super().setUpClass()
@classmethod
def tearDownClass(cls):
frappe.db.set_single_value("Accounts Settings", "enable_party_matching", 0)
frappe.db.set_single_value("Accounts Settings", "enable_fuzzy_matching", 0)
def test_match_by_account_number(self):
"""Test if transaction matches with existing (Bank Party Mapper) or new match."""
@ -145,6 +147,22 @@ class TestAutoMatchParty(FrappeTestCase):
self.assertEqual(bank_party_mapper.bank_party_name, "Amazn Co.")
self.assertEqual(doc_2.party, "Amazon")
def test_skip_match_if_multiple_close_results(self):
create_supplier_for_match(supplier_name="Adithya Medical & General Stores")
create_supplier_for_match(supplier_name="Adithya Medical And General Stores")
doc = create_bank_transaction(
description="Paracetamol Consignment, SINV-0009",
withdrawal=24.85,
transaction_id="3a1da4ee2dc5a980138d56ef3460cbd9",
party_name="Adithya Medical & General",
)
# Mapping is skipped as both Supplier names have the same match score
self.assertEqual(doc.party_type, None)
self.assertEqual(doc.party, None)
self.assertFalse(doc.bank_party_mapper)
def create_supplier_for_match(supplier_name="John Doe & Co.", iban=None, account_no=None):
if frappe.db.exists("Supplier", {"supplier_name": supplier_name}):