Run python linter and formatter in CI

Helps find some errors and cosmetic problems.
2025-05-24 06:53:59 +08:00 · 2023-01-27 18:46:09 +04:00 · 2023-01-27 18:46:09 +04:00 · 21a3f8206c
commit 21a3f8206c
parent 334864127d
5 changed files with 222 additions and 120 deletions
--- a/.github/gh_matrix_builder.py
+++ b/.github/gh_matrix_builder.py
@ -136,6 +136,7 @@ def macos_config(overrides):
    base_config.update(overrides)
    return base_config

+
 # common installcheck_args for all pg15 tests
 # dist_move_chunk is skipped due to #4972
 # partialize_finalize is ignored due to #4937
@ -147,12 +148,22 @@ m["include"].append(
    build_debug_config({"pg": PG13_LATEST, "cc": "clang-14", "cxx": "clang++-14"})
 )
 m["include"].append(build_debug_config({"pg": PG14_LATEST}))
-m["include"].append(build_debug_config({"pg": PG15_LATEST, "installcheck_args": pg15_installcheck_args}))
+m["include"].append(
+    build_debug_config({"pg": PG15_LATEST, "installcheck_args": pg15_installcheck_args})
+)

 # test latest postgres release in MacOS
-m["include"].append(build_release_config(macos_config({"pg": PG15_LATEST, "installcheck_args": pg15_installcheck_args})))
+m["include"].append(
+    build_release_config(
+        macos_config({"pg": PG15_LATEST, "installcheck_args": pg15_installcheck_args})
+    )
+)
 # test latest postgres release without telemetry
-m["include"].append(build_without_telemetry({"pg": PG15_LATEST, "installcheck_args": pg15_installcheck_args}))
+m["include"].append(
+    build_without_telemetry(
+        {"pg": PG15_LATEST, "installcheck_args": pg15_installcheck_args}
+    )
+)

 # if this is not a pull request e.g. a scheduled run or a push
 # to a specific branch like prerelease_test we add additional
@ -193,7 +204,11 @@ if event_type != "pull_request":
    )

    # add debug test for first supported PG15 version
-    m["include"].append(build_debug_config({"pg": PG15_EARLIEST, "installcheck_args": pg15_installcheck_args}))
+    m["include"].append(
+        build_debug_config(
+            {"pg": PG15_EARLIEST, "installcheck_args": pg15_installcheck_args}
+        )
+    )

    # add debug test for MacOS
    m["include"].append(build_debug_config(macos_config({})))
@ -202,7 +217,11 @@ if event_type != "pull_request":
    m["include"].append(build_release_config({"pg": PG12_LATEST}))
    m["include"].append(build_release_config({"pg": PG13_LATEST}))
    m["include"].append(build_release_config({"pg": PG14_LATEST}))
-    m["include"].append(build_release_config({"pg": PG15_LATEST, "installcheck_args": pg15_installcheck_args}))
+    m["include"].append(
+        build_release_config(
+            {"pg": PG15_LATEST, "installcheck_args": pg15_installcheck_args}
+        )
+    )

    # add apache only test for latest pg
    m["include"].append(build_apache_config({"pg": PG12_LATEST}))
@ -223,7 +242,15 @@ if event_type != "pull_request":
            }
        )
    )
-    m["include"].append(build_debug_config({"pg": 15, "snapshot": "snapshot", "installcheck_args": pg15_installcheck_args}))
+    m["include"].append(
+        build_debug_config(
+            {
+                "pg": 15,
+                "snapshot": "snapshot",
+                "installcheck_args": pg15_installcheck_args,
+            }
+        )
+    )
 else:
    # Check if we need to check for the flaky tests. Determine which test files
    # have been changed in the PR. The sql files might include other files that
@ -267,7 +294,9 @@ else:
            tests.add(name)
        else:
            # Should've been filtered out above.
-            print(f"unknown extension '{ext}' for test output file '{f}'", file=sys.stderr)
+            print(
+                f"unknown extension '{ext}' for test output file '{f}'", file=sys.stderr
+            )
            sys.exit(1)

    if tests:
--- a/.github/workflows/code_style.yaml
+++ b/.github/workflows/code_style.yaml
@ -1,5 +1,5 @@
 name: Code style
-on:
+"on":
  push:
    branches:
      - main
@ -63,6 +63,20 @@ jobs:
        ./scripts/clang_format_all.sh
        git diff --exit-code

+  python_checks:
+    name: Check Python code in tree
+    runs-on: ubuntu-latest
+    steps:
+      - name: Install prerequisites
+        run: |
+          pip install black prospector psutil
+      - name: Checkout source
+        uses: actions/checkout@v3
+      - name: Run prospector
+        run: |
+          find . -type f -name "*.py" -print -exec prospector {} + -exec black {} +
+          git diff --exit-code
+
  misc_checks:
    name: Check license, update scripts, and git hooks
    runs-on: ubuntu-22.04
--- a/scripts/githooks/commit_msg.py
+++ b/scripts/githooks/commit_msg.py
@ -4,28 +4,33 @@
 # https://chris.beams.io/posts/git-commit/
 import sys

+
 class GitCommitMessage:
-    'Represents a parsed Git commit message'
+    "Represents a parsed Git commit message"

-    rules = [ 'Separate subject from body with a blank line',
-              'Limit the subject line to 50 characters',
-              'Capitalize the subject line',
-              'Do not end the subject line with a period',
-              'Use the imperative mood in the subject line',
-              'Wrap the body at 72 characters',
-              'Use the body to explain what and why vs. how' ]
+    rules = [
+        "Separate subject from body with a blank line",
+        "Limit the subject line to 50 characters",
+        "Capitalize the subject line",
+        "Do not end the subject line with a period",
+        "Use the imperative mood in the subject line",
+        "Wrap the body at 72 characters",
+        "Use the body to explain what and why vs. how",
+    ]

-    valid_rules = [ False, False, False, False, False, False, False ]
+    valid_rules = [False, False, False, False, False, False, False]

-    def __init__(self, filename = None):
+    def __init__(self, filename=None):
        lines = []

-        if filename != None:
-            with open(filename, 'r') as f:
+        if filename is not None:
+            with open(filename, "r", encoding="utf-8") as f:
                for line in f:
-                    if line.startswith('# ------------------------ >8 ------------------------'):
+                    if line.startswith(
+                        "# ------------------------ >8 ------------------------"
+                    ):
                        break
-                    if not line.startswith('#'):
+                    if not line.startswith("#"):
                        lines.append(line)

        self.parse_lines(lines)
@ -42,7 +47,7 @@ class GitCommitMessage:
        self.has_subject_body_separator = False

        if len(lines) > 1:
-            self.has_subject_body_separator = (len(lines[1].strip()) == 0)
+            self.has_subject_body_separator = len(lines[1].strip()) == 0

            if self.has_subject_body_separator:
                self.body_lines = lines[2:]
@ -52,54 +57,58 @@ class GitCommitMessage:
        return self

    def check_subject_body_separtor(self):
-        'Rule 1: Separate subject from body with a blank line'
+        "Rule 1: Separate subject from body with a blank line"

        if len(self.body_lines) > 0:
            return self.has_subject_body_separator
        return True

    def check_subject_limit(self):
-        'Rule 2: Limit the subject line to 50 characters'
+        "Rule 2: Limit the subject line to 50 characters"
        return len(self.subject.rstrip("\n")) <= 50

    def check_subject_capitalized(self):
-        'Rule 3: Capitalize the subject line'
+        "Rule 3: Capitalize the subject line"
        return len(self.subject) > 0 and self.subject[0].isupper()

    def check_subject_no_period(self):
-        'Rule 4: Do not end the subject line with a period'
-        return not self.subject.endswith('.')
+        "Rule 4: Do not end the subject line with a period"
+        return not self.subject.endswith(".")

-    common_first_words = [ 'Add',
-                           'Adjust',
-                           'Support',
-                           'Change',
-                           'Remove',
-                           'Fix',
-                           'Print',
-                           'Track',
-                           'Refactor',
-                           'Combine',
-                           'Release',
-                           'Set',
-                           'Stop',
-                           'Make',
-                           'Mark',
-                           'Enable',
-                           'Check',
-                           'Exclude',
-                           'Format',
-                           'Correct']
+    common_first_words = [
+        "Add",
+        "Adjust",
+        "Support",
+        "Change",
+        "Remove",
+        "Fix",
+        "Print",
+        "Track",
+        "Refactor",
+        "Combine",
+        "Release",
+        "Set",
+        "Stop",
+        "Make",
+        "Mark",
+        "Enable",
+        "Check",
+        "Exclude",
+        "Format",
+        "Correct",
+    ]

    def check_subject_imperative(self):
-        'Rule 5: Use the imperative mood in the subject line.'
-        'We can only check for common mistakes here, like using'
-        'the -ing form of a verb or non-imperative version of '
-        'common verbs'
+        """Rule 5: Use the imperative mood in the subject line.
+
+        We can only check for common mistakes here, like using
+        the -ing form of a verb or non-imperative version of
+        common verbs
+        """

        firstword = self.subject_words[0]

-        if firstword.endswith('ing'):
+        if firstword.endswith("ing"):
            return False

        for word in self.common_first_words:
@ -109,7 +118,7 @@ class GitCommitMessage:
        return True

    def check_body_limit(self):
-        'Rule 6: Wrap the body at 72 characters'
+        "Rule 6: Wrap the body at 72 characters"

        if len(self.body_lines) == 0:
            return True
@ -121,7 +130,7 @@ class GitCommitMessage:
        return True

    def check_body_uses_why(self):
-        'Rule 7: Use the body to explain what and why vs. how'
+        "Rule 7: Use the body to explain what and why vs. how"
        # Not enforcable
        return True

@ -132,15 +141,15 @@ class GitCommitMessage:
        check_subject_no_period,
        check_subject_imperative,
        check_body_limit,
-        check_body_uses_why ]
+        check_body_uses_why,
+    ]

    def check_the_seven_rules(self):
-        'validates the commit message against the seven rules'
+        "validates the commit message against the seven rules"

        num_violations = 0

-        for i in range(len(self.rule_funcs)):
-            func = self.rule_funcs[i]
+        for i, func in enumerate(self.rule_funcs):
            res = func(self)
            self.valid_rules[i] = res

@ -151,22 +160,26 @@ class GitCommitMessage:
            print()
            print("**** WARNING ****")
            print()
-            print("The commit message does not seem to comply with the project's guidelines.")
-            print("Please try to follow the \"Seven rules of a great commit message\":")
+            print(
+                "The commit message does not seem to comply with the project's guidelines."
+            )
+            print('Please try to follow the "Seven rules of a great commit message":')
            print("https://chris.beams.io/posts/git-commit/")
            print()
            print("The following rules are violated:\n")

            for i in range(len(self.rule_funcs)):
                if not self.valid_rules[i]:
-                    print(f"\t* Rule {i+1}: \"{self.rules[i]}\"")
+                    print(f'\t* Rule {i+1}: "{self.rules[i]}"')

        # Extra sanity checks beyond the seven rules
        if len(self.body_lines) == 0:
            print()
            print("NOTE: the commit message has no body.")
            print("It is recommended to add a body with a description of your")
-            print("changes, even if they are small. Explain what and why instead of how:")
+            print(
+                "changes, even if they are small. Explain what and why instead of how:"
+            )
            print("https://chris.beams.io/posts/git-commit/#why-not-how")

        if len(self.subject_words) < 3:
@ -182,18 +195,20 @@ class GitCommitMessage:

        return num_violations

+
 def main():
    if len(sys.argv) != 2:
        print("Unexpected number of arguments")
-        exit(1)
+        sys.exit(1)

    msg = GitCommitMessage(sys.argv[1])
    return msg.check_the_seven_rules()

+
 if __name__ == "__main__":
    main()
    # Always exit with success. We could also fail the commit if with
    # a non-zero exit code, but that might be a bit excessive and we'd
    # have to save the failed commit message to a file so that it can
    # be recovered.
-    exit(0)
+    sys.exit(0)
--- a/scripts/githooks/commit_msg_tests.py
+++ b/scripts/githooks/commit_msg_tests.py
@ -3,8 +3,8 @@
 import unittest
 from commit_msg import GitCommitMessage

-class TestCommitMsg(unittest.TestCase):

+class TestCommitMsg(unittest.TestCase):
    def testNoInput(self):
        m = GitCommitMessage().parse_lines([])
        self.assertEqual(len(m.body_lines), 0)
@ -12,64 +12,91 @@ class TestCommitMsg(unittest.TestCase):
        self.assertEqual(len(m.body_lines), 0)

    def testParsing(self):
-        m = GitCommitMessage().parse_lines(['This is a subject line', '   ', 'body line 1', 'body line 2'])
-        self.assertEqual(m.subject, 'This is a subject line')
+        m = GitCommitMessage().parse_lines(
+            ["This is a subject line", "   ", "body line 1", "body line 2"]
+        )
+        self.assertEqual(m.subject, "This is a subject line")
        self.assertTrue(m.has_subject_body_separator)
-        self.assertEqual(m.body_lines[0], 'body line 1')
-        self.assertEqual(m.body_lines[1], 'body line 2')
+        self.assertEqual(m.body_lines[0], "body line 1")
+        self.assertEqual(m.body_lines[1], "body line 2")

    def testNonImperative(self):
-        m = GitCommitMessage().parse_lines(['Adds new file'])
+        m = GitCommitMessage().parse_lines(["Adds new file"])
        self.assertFalse(m.check_subject_imperative())

-        m.parse_lines(['Adding new file'])
+        m.parse_lines(["Adding new file"])
        self.assertFalse(m.check_subject_imperative())

        # Default to accept
-        m.parse_lines(['Foo bar'])
+        m.parse_lines(["Foo bar"])
        self.assertTrue(m.check_subject_imperative())

    def testSubjectBodySeparator(self):
-        m = GitCommitMessage().parse_lines(['This is a subject line'])
+        m = GitCommitMessage().parse_lines(["This is a subject line"])
        self.assertTrue(m.check_subject_body_separtor())
-        m = GitCommitMessage().parse_lines(['This is a subject line', 'body'])
+        m = GitCommitMessage().parse_lines(["This is a subject line", "body"])
        self.assertFalse(m.check_subject_body_separtor())
-        m = GitCommitMessage().parse_lines(['This is a subject line', '', 'body'])
+        m = GitCommitMessage().parse_lines(["This is a subject line", "", "body"])
        self.assertTrue(m.check_subject_body_separtor())
-        m = GitCommitMessage().parse_lines(['This is a subject line', '   ', 'body'])
+        m = GitCommitMessage().parse_lines(["This is a subject line", "   ", "body"])
        self.assertTrue(m.check_subject_body_separtor())

    def testSubjectLimit(self):
-        m = GitCommitMessage().parse_lines(['This subject line is exactly 48 characters long'])
+        m = GitCommitMessage().parse_lines(
+            ["This subject line is exactly 48 characters long"]
+        )
        self.assertTrue(m.check_subject_limit())
-        m = GitCommitMessage().parse_lines(['This is a very long subject line that will obviously exceed the limit'])
+        m = GitCommitMessage().parse_lines(
+            ["This is a very long subject line that will obviously exceed the limit"]
+        )
        self.assertFalse(m.check_subject_limit())
-        m = GitCommitMessage().parse_lines(['This 50-character subject line ends with an LF EOL\n'])
+        m = GitCommitMessage().parse_lines(
+            ["This 50-character subject line ends with an LF EOL\n"]
+        )
        self.assertTrue(m.check_subject_limit())

    def testSubjectCapitalized(self):
-        m = GitCommitMessage().parse_lines(['This subject line is capitalized'])
+        m = GitCommitMessage().parse_lines(["This subject line is capitalized"])
        self.assertTrue(m.check_subject_capitalized())
-        m = GitCommitMessage().parse_lines(['this subject line is not capitalized'])
+        m = GitCommitMessage().parse_lines(["this subject line is not capitalized"])
        self.assertFalse(m.check_subject_capitalized())

    def testSubjectNoPeriod(self):
-        m = GitCommitMessage().parse_lines(['This subject line ends with a period.'])
+        m = GitCommitMessage().parse_lines(["This subject line ends with a period."])
        self.assertFalse(m.check_subject_no_period())
-        m = GitCommitMessage().parse_lines(['This subject line does not end with a period'])
+        m = GitCommitMessage().parse_lines(
+            ["This subject line does not end with a period"]
+        )
        self.assertTrue(m.check_subject_no_period())

    def testBodyLimit(self):
-        m = GitCommitMessage().parse_lines(['This is a subject line', ' ', 'A short body line'])
+        m = GitCommitMessage().parse_lines(
+            ["This is a subject line", " ", "A short body line"]
+        )
        self.assertTrue(m.check_body_limit())
-        m = GitCommitMessage().parse_lines(['This is a subject line', ' ', 'A very long body line which certainly exceeds the 72 char recommended limit'])
+        m = GitCommitMessage().parse_lines(
+            [
+                "This is a subject line",
+                " ",
+                "A very long body line which certainly exceeds the 72 char recommended limit",
+            ]
+        )
        self.assertFalse(m.check_body_limit())
-        m = GitCommitMessage().parse_lines(['This is a subject line', ' ', 'A body line with exactly 72 characters, followed by an EOL (Unix-style).\n'])
+        m = GitCommitMessage().parse_lines(
+            [
+                "This is a subject line",
+                " ",
+                "A body line with exactly 72 characters, followed by an EOL (Unix-style).\n",
+            ]
+        )
        self.assertTrue(m.check_body_limit())

    def testCheckAllRules(self):
-        m = GitCommitMessage().parse_lines(['This is a subject line', '', 'A short body line'])
+        m = GitCommitMessage().parse_lines(
+            ["This is a subject line", "", "A short body line"]
+        )
        self.assertEqual(0, m.check_the_seven_rules())

+
 if __name__ == "__main__":
    unittest.main()
--- a/scripts/test_memory_spikes.py
+++ b/scripts/test_memory_spikes.py
@ -10,9 +10,9 @@ import time
 import sys
 from datetime import datetime

-DEFAULT_MEMCAP = 300 # in MB
-THRESHOLD_RATIO = 1.5 # ratio above which considered memory spike
-WAIT_TO_STABILIZE = 15 # wait in seconds before considering memory stable
+DEFAULT_MEMCAP = 300  # in MB
+THRESHOLD_RATIO = 1.5  # ratio above which considered memory spike
+WAIT_TO_STABILIZE = 15  # wait in seconds before considering memory stable
 CHECK_INTERVAL = 15
 DEBUG = False

@ -25,6 +25,7 @@ def find_procs_by_name(name):
            ls.append(p)
    return ls

+
 # return human readable form of number of bytes n
 def bytes2human(n):
    # http://code.activestate.com/recipes/578019
@ -32,80 +33,95 @@ def bytes2human(n):
    # '9.8K'
    # >>> bytes2human(100001221)
    # '95.4M'
-    symbols = ('K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y')
+    symbols = ("K", "M", "G", "T", "P", "E", "Z", "Y")
    prefix = {}
    for i, s in enumerate(symbols):
        prefix[s] = 1 << (i + 1) * 10
    for s in reversed(symbols):
        if n >= prefix[s]:
            value = float(n) / prefix[s]
-            return '%.1f%s' % (value, s)
-    return "%sB" % n
+            return f"{value:.1f}{s}"
+    return f"{n}B"
+

 # prints pid of processes
 def process_details(process):
-  return "{} {}".format(process.pid, ''.join(process.cmdline()).strip())
+    return f"{process.pid} {''.join(process.cmdline()).strip()}"
+

 def process_stats():
-    processes = find_procs_by_name('postgres')
+    processes = find_procs_by_name("postgres")
    for p in processes:
        print(p, p.num_ctx_switches(), p.cpu_times(), p.memory_info(), flush=True)

+
 # return process id of new postgres process created when running SQL file
 def find_new_process():
    # get postgres processes that are running before insertion starts
-    base_process = find_procs_by_name('postgres')
-    print('Processes running before inserts run: ')
+    base_process = find_procs_by_name("postgres")
+    print("Processes running before inserts run: ")
    for p in base_process:
        print(process_details(p))

    process_count = len(base_process)

-    print("Waiting {} seconds for process running inserts to start".format(WAIT_TO_STABILIZE), flush=True)
-    time.sleep(WAIT_TO_STABILIZE) # wait WAIT_TO_STABILIZE seconds to get process that runs the inserts
+    print(
+        f"Waiting {WAIT_TO_STABILIZE} seconds for process running inserts to start",
+        flush=True,
+    )
+    time.sleep(
+        WAIT_TO_STABILIZE
+    )  # wait WAIT_TO_STABILIZE seconds to get process that runs the inserts

    # continuously check for creation of new postgres process
    timeout = time.time() + 60
    while True:
        # prevent infinite loop
        if time.time() > timeout:
-            print('Timed out on finding new process, should force quit SQL inserts', flush=True)
+            print(
+                "Timed out on finding new process, should force quit SQL inserts",
+                flush=True,
+            )
            sys.exit(4)

-        process = find_procs_by_name('postgres')
+        process = find_procs_by_name("postgres")
        cnt = len(process)
        print("process count ", cnt)
        if cnt > process_count:
-            process = find_procs_by_name('postgres')
+            process = find_procs_by_name("postgres")
            difference_set = set(process) - set(base_process)
            new_process = None
            # We assume that the backend is the first 'new' process to start, so it will have
            # the lower PID
            for p in difference_set:
-                print('found process: {}'.format(process_details(p)))
+                print(f"found process: {process_details(p)}")
                if new_process is None or p.pid < new_process.pid:
                    new_process = p
-            print('new_process: {}'.format(process_details(new_process)))
+            print(f"new_process: {process_details(new_process)}")
            return new_process.pid
        time.sleep(1)

+
 def main():
    # get process created from running insert test sql file
    pid = find_new_process()
    p = psutil.Process(pid)
    print('*** Check this pid is the same as "pg_backend_pid" from SQL command ***')
-    print('New process backend process:', pid)
+    print("New process backend process:", pid)

-    print('Waiting {} seconds for memory consumption to stabilize'.format(WAIT_TO_STABILIZE), flush=True)
+    print(
+        f"Waiting {WAIT_TO_STABILIZE} seconds for memory consumption to stabilize",
+        flush=True,
+    )
    time.sleep(WAIT_TO_STABILIZE)

    # Calculate average memory consumption from 5 values over 15 seconds
-    sum = 0
-    for i in range (0, 5):
-        sum += p.memory_info().rss
+    total = 0
+    for _ in range(0, 5):
+        total += p.memory_info().rss
        time.sleep(3)
-    avg = sum / 5
-    print('Average memory consumption: ', bytes2human(avg), flush=True)
+    avg = total / 5
+    print("Average memory consumption: ", bytes2human(avg), flush=True)

    cap = int(sys.argv[1] if len(sys.argv) > 1 else DEFAULT_MEMCAP) * 1024 * 1024
    upper_threshold = min(cap, avg * THRESHOLD_RATIO)
@ -118,27 +134,28 @@ def main():
            break
        # prevent infinite loop
        if time.time() > timeout:
-            print('Timed out on running inserts (over 45 mins)')
-            print('Killing postgres process')
+            print("Timed out on running inserts (over 45 mins)")
+            print("Killing postgres process")
            p.kill()
            sys.exit(4)

        rss = p.memory_info().rss
        stamp = datetime.now().strftime("%H:%M:%S")
-        print('{} Memory used by process {}: {}'.format(stamp, p.pid, bytes2human(rss)), flush=True)
+        print(f"{stamp} Memory used by process {p.pid}: {bytes2human(rss)}", flush=True)
        if DEBUG:
            process_stats()

        # exit with error if memory above threshold
        if rss > upper_threshold:
-            print('Memory consumption exceeded upper threshold')
-            print('Killing postgres process', flush=True)
+            print("Memory consumption exceeded upper threshold")
+            print("Killing postgres process", flush=True)
            p.kill()
            sys.exit(4)
        time.sleep(CHECK_INTERVAL)

-    print('No memory leaks detected', flush=True)
-    sys.exit(0) # success
+    print("No memory leaks detected", flush=True)
+    sys.exit(0)  # success

-if __name__ == '__main__':
+
+if __name__ == "__main__":
    main()