From 67e647232cf289858d1aa3dbea8fe94d5ba746d2 Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Fri, 16 Apr 2021 21:44:49 +0530 Subject: [PATCH] ci(semgrep): Add semgrep testing (#24871) Adds semgrep testing in CI. Refer to: - https://github.com/frappe/frappe/pull/12524 - https://github.com/frappe/frappe/pull/12577 --- .github/helper/semgrep_rules/README.md | 38 +++++++++++ .../semgrep_rules/frappe_correctness.py | 28 +++++++++ .../semgrep_rules/frappe_correctness.yml | 56 +++++++++++++++++ .github/helper/semgrep_rules/security.py | 6 ++ .github/helper/semgrep_rules/security.yml | 25 ++++++++ .github/helper/semgrep_rules/translate.js | 37 +++++++++++ .github/helper/semgrep_rules/translate.py | 53 ++++++++++++++++ .github/helper/semgrep_rules/translate.yml | 63 +++++++++++++++++++ .github/helper/semgrep_rules/ux.py | 31 +++++++++ .github/helper/semgrep_rules/ux.yml | 15 +++++ .github/workflows/semgrep.yml | 24 +++++++ 11 files changed, 376 insertions(+) create mode 100644 .github/helper/semgrep_rules/README.md create mode 100644 .github/helper/semgrep_rules/frappe_correctness.py create mode 100644 .github/helper/semgrep_rules/frappe_correctness.yml create mode 100644 .github/helper/semgrep_rules/security.py create mode 100644 .github/helper/semgrep_rules/security.yml create mode 100644 .github/helper/semgrep_rules/translate.js create mode 100644 .github/helper/semgrep_rules/translate.py create mode 100644 .github/helper/semgrep_rules/translate.yml create mode 100644 .github/helper/semgrep_rules/ux.py create mode 100644 .github/helper/semgrep_rules/ux.yml create mode 100644 .github/workflows/semgrep.yml diff --git a/.github/helper/semgrep_rules/README.md b/.github/helper/semgrep_rules/README.md new file mode 100644 index 0000000000..670d8d280f --- /dev/null +++ b/.github/helper/semgrep_rules/README.md @@ -0,0 +1,38 @@ +# Semgrep linting + +## What is semgrep? +Semgrep or "semantic grep" is language agnostic static analysis tool. In simple terms semgrep is syntax-aware `grep`, so unlike regex it doesn't get confused by different ways of writing same thing or whitespaces or code split in multiple lines etc. + +Example: + +To check if a translate function is using f-string or not the regex would be `r"_\(\s*f[\"']"` while equivalent rule in semgrep would be `_(f"...")`. As semgrep knows grammer of language it takes care of unnecessary whitespace, type of quotation marks etc. + +You can read more such examples in `.github/helper/semgrep_rules` directory. + +# Why/when to use this? +We want to maintain quality of contributions, at the same time remembering all the good practices can be pain to deal with while evaluating contributions. Using semgrep if you can translate "best practice" into a rule then it can automate the task for us. + +## Running locally + +Install semgrep using homebrew `brew install semgrep` or pip `pip install semgrep`. + +To run locally use following command: + +`semgrep --config=.github/helper/semgrep_rules [file/folder names]` + +## Testing +semgrep allows testing the tests. Refer to this page: https://semgrep.dev/docs/writing-rules/testing-rules/ + +When writing new rules you should write few positive and few negative cases as shown in the guide and current tests. + +To run current tests: `semgrep --test --test-ignore-todo .github/helper/semgrep_rules` + + +## Reference + +If you are new to Semgrep read following pages to get started on writing/modifying rules: + +- https://semgrep.dev/docs/getting-started/ +- https://semgrep.dev/docs/writing-rules/rule-syntax +- https://semgrep.dev/docs/writing-rules/pattern-examples/ +- https://semgrep.dev/docs/writing-rules/rule-ideas/#common-use-cases diff --git a/.github/helper/semgrep_rules/frappe_correctness.py b/.github/helper/semgrep_rules/frappe_correctness.py new file mode 100644 index 0000000000..4798b927f8 --- /dev/null +++ b/.github/helper/semgrep_rules/frappe_correctness.py @@ -0,0 +1,28 @@ +import frappe +from frappe import _, flt + +from frappe.model.document import Document + + +def on_submit(self): + if self.value_of_goods == 0: + frappe.throw(_('Value of goods cannot be 0')) + # ruleid: frappe-modifying-after-submit + self.status = 'Submitted' + +def on_submit(self): + if flt(self.per_billed) < 100: + self.update_billing_status() + else: + # todook: frappe-modifying-after-submit + self.status = "Completed" + self.db_set("status", "Completed") + +class TestDoc(Document): + pass + + def validate(self): + #ruleid: frappe-modifying-child-tables-while-iterating + for item in self.child_table: + if item.value < 0: + self.remove(item) diff --git a/.github/helper/semgrep_rules/frappe_correctness.yml b/.github/helper/semgrep_rules/frappe_correctness.yml new file mode 100644 index 0000000000..394abbf74d --- /dev/null +++ b/.github/helper/semgrep_rules/frappe_correctness.yml @@ -0,0 +1,56 @@ +# This file specifies rules for correctness according to how frappe doctype data model works. + +rules: +- id: frappe-modifying-after-submit + patterns: + - pattern: self.$ATTR = ... + - pattern-inside: | + def on_submit(self, ...): + ... + message: | + Doctype modified after submission. Please check if modification of self.$ATTR is commited to database. + languages: [python] + severity: ERROR + +- id: frappe-print-function-in-doctypes + pattern: print(...) + message: | + Did you mean to leave this print statement in? Consider using msgprint or logger instead of print statement. + languages: [python] + severity: WARNING + paths: + exclude: + - test_*.py + include: + - "*/**/doctype/*" + +- id: frappe-modifying-child-tables-while-iterating + pattern-either: + - pattern: | + for $ROW in self.$TABLE: + ... + self.remove(...) + - pattern: | + for $ROW in self.$TABLE: + ... + self.append(...) + message: | + Child table being modified while iterating on it. + languages: [python] + severity: ERROR + paths: + include: + - "*/**/doctype/*" + +- id: frappe-same-key-assigned-twice + pattern-either: + - pattern: | + {..., $X: $A, ..., $X: $B, ...} + - pattern: | + dict(..., ($X, $A), ..., ($X, $B), ...) + - pattern: | + _dict(..., ($X, $A), ..., ($X, $B), ...) + message: | + key `$X` is uselessly assigned twice. This could be a potential bug. + languages: [python] + severity: ERROR diff --git a/.github/helper/semgrep_rules/security.py b/.github/helper/semgrep_rules/security.py new file mode 100644 index 0000000000..f477d7c176 --- /dev/null +++ b/.github/helper/semgrep_rules/security.py @@ -0,0 +1,6 @@ +def function_name(input): + # ruleid: frappe-codeinjection-eval + eval(input) + +# ok: frappe-codeinjection-eval +eval("1 + 1") diff --git a/.github/helper/semgrep_rules/security.yml b/.github/helper/semgrep_rules/security.yml new file mode 100644 index 0000000000..5a5098bf50 --- /dev/null +++ b/.github/helper/semgrep_rules/security.yml @@ -0,0 +1,25 @@ +rules: +- id: frappe-codeinjection-eval + patterns: + - pattern-not: eval("...") + - pattern: eval(...) + message: | + Detected the use of eval(). eval() can be dangerous if used to evaluate + dynamic content. Avoid it or use safe_eval(). + languages: [python] + severity: ERROR + +- id: frappe-sqli-format-strings + patterns: + - pattern-inside: | + @frappe.whitelist() + def $FUNC(...): + ... + - pattern-either: + - pattern: frappe.db.sql("..." % ...) + - pattern: frappe.db.sql(f"...", ...) + - pattern: frappe.db.sql("...".format(...), ...) + message: | + Detected use of raw string formatting for SQL queries. This can lead to sql injection vulnerabilities. Refer security guidelines - https://github.com/frappe/erpnext/wiki/Code-Security-Guidelines + languages: [python] + severity: WARNING diff --git a/.github/helper/semgrep_rules/translate.js b/.github/helper/semgrep_rules/translate.js new file mode 100644 index 0000000000..7b92fe2dff --- /dev/null +++ b/.github/helper/semgrep_rules/translate.js @@ -0,0 +1,37 @@ +// ruleid: frappe-translation-empty-string +__("") +// ruleid: frappe-translation-empty-string +__('') + +// ok: frappe-translation-js-formatting +__('Welcome {0}, get started with ERPNext in just a few clicks.', [full_name]); + +// ruleid: frappe-translation-js-formatting +__(`Welcome ${full_name}, get started with ERPNext in just a few clicks.`); + +// ok: frappe-translation-js-formatting +__('This is fine'); + + +// ok: frappe-translation-trailing-spaces +__('This is fine'); + +// ruleid: frappe-translation-trailing-spaces +__(' this is not ok '); +// ruleid: frappe-translation-trailing-spaces +__('this is not ok '); +// ruleid: frappe-translation-trailing-spaces +__(' this is not ok'); + +// ok: frappe-translation-js-splitting +__('You have {0} subscribers in your mailing list.', [subscribers.length]) + +// todoruleid: frappe-translation-js-splitting +__('You have') + subscribers.length + __('subscribers in your mailing list.') + +// ruleid: frappe-translation-js-splitting +__('You have' + 'subscribers in your mailing list.') + +// ruleid: frappe-translation-js-splitting +__('You have {0} subscribers' + + 'in your mailing list', [subscribers.length]) diff --git a/.github/helper/semgrep_rules/translate.py b/.github/helper/semgrep_rules/translate.py new file mode 100644 index 0000000000..bd6cd9126c --- /dev/null +++ b/.github/helper/semgrep_rules/translate.py @@ -0,0 +1,53 @@ +# Examples taken from https://frappeframework.com/docs/user/en/translations +# This file is used for testing the tests. + +from frappe import _ + +full_name = "Jon Doe" +# ok: frappe-translation-python-formatting +_('Welcome {0}, get started with ERPNext in just a few clicks.').format(full_name) + +# ruleid: frappe-translation-python-formatting +_('Welcome %s, get started with ERPNext in just a few clicks.' % full_name) +# ruleid: frappe-translation-python-formatting +_('Welcome %(name)s, get started with ERPNext in just a few clicks.' % {'name': full_name}) + +# ruleid: frappe-translation-python-formatting +_('Welcome {0}, get started with ERPNext in just a few clicks.'.format(full_name)) + + +subscribers = ["Jon", "Doe"] +# ok: frappe-translation-python-formatting +_('You have {0} subscribers in your mailing list.').format(len(subscribers)) + +# ruleid: frappe-translation-python-splitting +_('You have') + len(subscribers) + _('subscribers in your mailing list.') + +# ruleid: frappe-translation-python-splitting +_('You have {0} subscribers \ + in your mailing list').format(len(subscribers)) + +# ok: frappe-translation-python-splitting +_('You have {0} subscribers') \ + + 'in your mailing list' + +# ruleid: frappe-translation-trailing-spaces +msg = _(" You have {0} pending invoice ") +# ruleid: frappe-translation-trailing-spaces +msg = _("You have {0} pending invoice ") +# ruleid: frappe-translation-trailing-spaces +msg = _(" You have {0} pending invoice") + +# ok: frappe-translation-trailing-spaces +msg = ' ' + _("You have {0} pending invoices") + ' ' + +# ruleid: frappe-translation-python-formatting +_(f"can not format like this - {subscribers}") +# ruleid: frappe-translation-python-splitting +_(f"what" + f"this is also not cool") + + +# ruleid: frappe-translation-empty-string +_("") +# ruleid: frappe-translation-empty-string +_('') diff --git a/.github/helper/semgrep_rules/translate.yml b/.github/helper/semgrep_rules/translate.yml new file mode 100644 index 0000000000..3737da5a7e --- /dev/null +++ b/.github/helper/semgrep_rules/translate.yml @@ -0,0 +1,63 @@ +rules: +- id: frappe-translation-empty-string + pattern-either: + - pattern: _("") + - pattern: __("") + message: | + Empty string is useless for translation. + Please refer: https://frappeframework.com/docs/user/en/translations + languages: [python, javascript, json] + severity: ERROR + +- id: frappe-translation-trailing-spaces + pattern-either: + - pattern: _("=~/(^[ \t]+|[ \t]+$)/") + - pattern: __("=~/(^[ \t]+|[ \t]+$)/") + message: | + Trailing or leading whitespace not allowed in translate strings. + Please refer: https://frappeframework.com/docs/user/en/translations + languages: [python, javascript, json] + severity: ERROR + +- id: frappe-translation-python-formatting + pattern-either: + - pattern: _("..." % ...) + - pattern: _("...".format(...)) + - pattern: _(f"...") + message: | + Only positional formatters are allowed and formatting should not be done before translating. + Please refer: https://frappeframework.com/docs/user/en/translations + languages: [python] + severity: ERROR + +- id: frappe-translation-js-formatting + patterns: + - pattern: __(`...`) + - pattern-not: __("...") + message: | + Template strings are not allowed for text formatting. + Please refer: https://frappeframework.com/docs/user/en/translations + languages: [javascript, json] + severity: ERROR + +- id: frappe-translation-python-splitting + pattern-either: + - pattern: _(...) + ... + _(...) + - pattern: _("..." + "...") + - pattern-regex: '_\([^\)]*\\\s*' + message: | + Do not split strings inside translate function. Do not concatenate using translate functions. + Please refer: https://frappeframework.com/docs/user/en/translations + languages: [python] + severity: ERROR + +- id: frappe-translation-js-splitting + pattern-either: + - pattern-regex: '__\([^\)]*[\+\\]\s*' + - pattern: __('...' + '...') + - pattern: __('...') + __('...') + message: | + Do not split strings inside translate function. Do not concatenate using translate functions. + Please refer: https://frappeframework.com/docs/user/en/translations + languages: [javascript, json] + severity: ERROR diff --git a/.github/helper/semgrep_rules/ux.py b/.github/helper/semgrep_rules/ux.py new file mode 100644 index 0000000000..4a74457435 --- /dev/null +++ b/.github/helper/semgrep_rules/ux.py @@ -0,0 +1,31 @@ +import frappe +from frappe import msgprint, throw, _ + + +# ruleid: frappe-missing-translate-function +throw("Error Occured") + +# ruleid: frappe-missing-translate-function +frappe.throw("Error Occured") + +# ruleid: frappe-missing-translate-function +frappe.msgprint("Useful message") + +# ruleid: frappe-missing-translate-function +msgprint("Useful message") + + +# ok: frappe-missing-translate-function +translatedmessage = _("Hello") + +# ok: frappe-missing-translate-function +throw(translatedmessage) + +# ok: frappe-missing-translate-function +msgprint(translatedmessage) + +# ok: frappe-missing-translate-function +msgprint(_("Helpful message")) + +# ok: frappe-missing-translate-function +frappe.throw(_("Error occured")) diff --git a/.github/helper/semgrep_rules/ux.yml b/.github/helper/semgrep_rules/ux.yml new file mode 100644 index 0000000000..ed06a6a80c --- /dev/null +++ b/.github/helper/semgrep_rules/ux.yml @@ -0,0 +1,15 @@ +rules: +- id: frappe-missing-translate-function + pattern-either: + - patterns: + - pattern: frappe.msgprint("...", ...) + - pattern-not: frappe.msgprint(_("..."), ...) + - pattern-not: frappe.msgprint(__("..."), ...) + - patterns: + - pattern: frappe.throw("...", ...) + - pattern-not: frappe.throw(_("..."), ...) + - pattern-not: frappe.throw(__("..."), ...) + message: | + All user facing text must be wrapped in translate function. Please refer to translation documentation. https://frappeframework.com/docs/user/en/guides/basics/translations + languages: [python, javascript, json] + severity: ERROR diff --git a/.github/workflows/semgrep.yml b/.github/workflows/semgrep.yml new file mode 100644 index 0000000000..df08263236 --- /dev/null +++ b/.github/workflows/semgrep.yml @@ -0,0 +1,24 @@ +name: Semgrep + +on: + pull_request: + branches: + - develop +jobs: + semgrep: + name: Frappe Linter + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Setup python3 + uses: actions/setup-python@v2 + with: + python-version: 3.8 + - name: Run semgrep + run: | + python -m pip install -q semgrep + git fetch origin $GITHUB_BASE_REF:$GITHUB_BASE_REF -q + files=$(git diff --name-only --diff-filter=d $GITHUB_BASE_REF) + [[ -d .github/helper/semgrep_rules ]] && semgrep --severity ERROR --config=.github/helper/semgrep_rules --quiet --error $files + semgrep --config="r/python.lang.correctness" --quiet --error $files + [[ -d .github/helper/semgrep_rules ]] && semgrep --severity WARNING --severity INFO --config=.github/helper/semgrep_rules --quiet $files