perf: reduce memory usage while migrating remarks

Page through records using primary key
This commit is contained in:
ruthra kumar 2023-01-21 13:28:22 +05:30
parent adaeba1554
commit 9bb64107c5

View File

@ -1,81 +1,98 @@
import frappe
from frappe import qb
from frappe.utils import create_batch
def remove_duplicate_entries(pl_entries):
unique_vouchers = set()
for x in pl_entries:
unique_vouchers.add(
(x.company, x.account, x.party_type, x.party, x.voucher_type, x.voucher_no, x.gle_remarks)
)
entries = []
for x in unique_vouchers:
entries.append(
frappe._dict(
company=x[0],
account=x[1],
party_type=x[2],
party=x[3],
voucher_type=x[4],
voucher_no=x[5],
gle_remarks=x[6],
)
)
return entries
from frappe.query_builder import CustomFunction
from frappe.query_builder.functions import Count, IfNull
from frappe.utils import flt
def execute():
"""
Migrate 'remarks' field from 'tabGL Entry' to 'tabPayment Ledger Entry'
"""
if frappe.reload_doc("accounts", "doctype", "payment_ledger_entry"):
gle = qb.DocType("GL Entry")
ple = qb.DocType("Payment Ledger Entry")
# get ple and their remarks from GL Entry
pl_entries = (
qb.from_(ple)
.left_join(gle)
.on(
(ple.account == gle.account)
& (ple.party_type == gle.party_type)
& (ple.party == gle.party)
& (ple.voucher_type == gle.voucher_type)
& (ple.voucher_no == gle.voucher_no)
& (ple.company == gle.company)
)
.select(
ple.company,
ple.account,
ple.party_type,
ple.party,
ple.voucher_type,
ple.voucher_no,
gle.remarks.as_("gle_remarks"),
)
.where((ple.delinked == 0) & (gle.is_cancelled == 0))
.run(as_dict=True)
)
# Get empty PLE records
un_processed = (
qb.from_(ple).select(Count(ple.name)).where((ple.remarks.isnull()) & (ple.delinked == 0)).run()
)[0][0]
pl_entries = remove_duplicate_entries(pl_entries)
if un_processed:
print(f"Remarks for {un_processed} Payment Ledger records will be updated from GL Entry")
if pl_entries:
# split into multiple batches, update and commit for each batch
ifelse = CustomFunction("IF", ["condition", "then", "else"])
processed = 0
last_percent_update = 0
batch_size = 1000
for batch in create_batch(pl_entries, batch_size):
for entry in batch:
query = (
qb.update(ple)
.set(ple.remarks, entry.gle_remarks)
.where(
(ple.company == entry.company)
& (ple.account == entry.account)
& (ple.party_type == entry.party_type)
& (ple.party == entry.party)
& (ple.voucher_type == entry.voucher_type)
& (ple.voucher_no == entry.voucher_no)
)
)
query.run()
last_name = None
frappe.db.commit()
while True:
if last_name:
where_clause = (ple.name.gt(last_name)) & (ple.remarks.isnull()) & (ple.delinked == 0)
else:
where_clause = (ple.remarks.isnull()) & (ple.delinked == 0)
# results are deterministic
names = (
qb.from_(ple).select(ple.name).where(where_clause).orderby(ple.name).limit(batch_size).run()
)
if names:
last_name = names[-1][0]
pl_entries = (
qb.from_(ple)
.left_join(gle)
.on(
(ple.account == gle.account)
& (ple.party_type == gle.party_type)
& (ple.party == gle.party)
& (ple.voucher_type == gle.voucher_type)
& (ple.voucher_no == gle.voucher_no)
& (
ple.against_voucher_type
== IfNull(
ifelse(gle.against_voucher_type == "", None, gle.against_voucher_type), gle.voucher_type
)
)
& (
ple.against_voucher_no
== IfNull(ifelse(gle.against_voucher == "", None, gle.against_voucher), gle.voucher_no)
)
& (ple.company == gle.company)
& (
((ple.account_type == "Receivable") & (ple.amount == (gle.debit - gle.credit)))
| (ple.account_type == "Payable") & (ple.amount == (gle.credit - gle.debit))
)
& (gle.remarks.notnull())
& (gle.is_cancelled == 0)
)
.select(ple.name)
.distinct()
.select(
gle.remarks.as_("gle_remarks"),
)
.where(ple.name.isin(names))
.run(as_dict=True)
)
if pl_entries:
for entry in pl_entries:
query = qb.update(ple).set(ple.remarks, entry.gle_remarks).where((ple.name == entry.name))
query.run()
frappe.db.commit()
processed += len(pl_entries)
percentage = flt((processed / un_processed) * 100, 2)
if percentage - last_percent_update > 1:
print(f"{percentage}% ({processed}) PLE records updated")
last_percent_update = percentage
else:
break
print("Remarks succesfully migrated")