From 21a3f8206c0de98932867096637c7d1e3d04d925 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 27 Jan 2023 18:46:09 +0400 Subject: [PATCH] Run python linter and formatter in CI Helps find some errors and cosmetic problems. --- .github/gh_matrix_builder.py | 43 +++++++-- .github/workflows/code_style.yaml | 16 +++- scripts/githooks/commit_msg.py | 129 +++++++++++++++------------ scripts/githooks/commit_msg_tests.py | 73 ++++++++++----- scripts/test_memory_spikes.py | 81 ++++++++++------- 5 files changed, 222 insertions(+), 120 deletions(-) diff --git a/.github/gh_matrix_builder.py b/.github/gh_matrix_builder.py index 349d3a0d8..b70624a98 100755 --- a/.github/gh_matrix_builder.py +++ b/.github/gh_matrix_builder.py @@ -136,6 +136,7 @@ def macos_config(overrides): base_config.update(overrides) return base_config + # common installcheck_args for all pg15 tests # dist_move_chunk is skipped due to #4972 # partialize_finalize is ignored due to #4937 @@ -147,12 +148,22 @@ m["include"].append( build_debug_config({"pg": PG13_LATEST, "cc": "clang-14", "cxx": "clang++-14"}) ) m["include"].append(build_debug_config({"pg": PG14_LATEST})) -m["include"].append(build_debug_config({"pg": PG15_LATEST, "installcheck_args": pg15_installcheck_args})) +m["include"].append( + build_debug_config({"pg": PG15_LATEST, "installcheck_args": pg15_installcheck_args}) +) # test latest postgres release in MacOS -m["include"].append(build_release_config(macos_config({"pg": PG15_LATEST, "installcheck_args": pg15_installcheck_args}))) +m["include"].append( + build_release_config( + macos_config({"pg": PG15_LATEST, "installcheck_args": pg15_installcheck_args}) + ) +) # test latest postgres release without telemetry -m["include"].append(build_without_telemetry({"pg": PG15_LATEST, "installcheck_args": pg15_installcheck_args})) +m["include"].append( + build_without_telemetry( + {"pg": PG15_LATEST, "installcheck_args": pg15_installcheck_args} + ) +) # if this is not a pull request e.g. a scheduled run or a push # to a specific branch like prerelease_test we add additional @@ -193,7 +204,11 @@ if event_type != "pull_request": ) # add debug test for first supported PG15 version - m["include"].append(build_debug_config({"pg": PG15_EARLIEST, "installcheck_args": pg15_installcheck_args})) + m["include"].append( + build_debug_config( + {"pg": PG15_EARLIEST, "installcheck_args": pg15_installcheck_args} + ) + ) # add debug test for MacOS m["include"].append(build_debug_config(macos_config({}))) @@ -202,7 +217,11 @@ if event_type != "pull_request": m["include"].append(build_release_config({"pg": PG12_LATEST})) m["include"].append(build_release_config({"pg": PG13_LATEST})) m["include"].append(build_release_config({"pg": PG14_LATEST})) - m["include"].append(build_release_config({"pg": PG15_LATEST, "installcheck_args": pg15_installcheck_args})) + m["include"].append( + build_release_config( + {"pg": PG15_LATEST, "installcheck_args": pg15_installcheck_args} + ) + ) # add apache only test for latest pg m["include"].append(build_apache_config({"pg": PG12_LATEST})) @@ -223,7 +242,15 @@ if event_type != "pull_request": } ) ) - m["include"].append(build_debug_config({"pg": 15, "snapshot": "snapshot", "installcheck_args": pg15_installcheck_args})) + m["include"].append( + build_debug_config( + { + "pg": 15, + "snapshot": "snapshot", + "installcheck_args": pg15_installcheck_args, + } + ) + ) else: # Check if we need to check for the flaky tests. Determine which test files # have been changed in the PR. The sql files might include other files that @@ -267,7 +294,9 @@ else: tests.add(name) else: # Should've been filtered out above. - print(f"unknown extension '{ext}' for test output file '{f}'", file=sys.stderr) + print( + f"unknown extension '{ext}' for test output file '{f}'", file=sys.stderr + ) sys.exit(1) if tests: diff --git a/.github/workflows/code_style.yaml b/.github/workflows/code_style.yaml index 737e23498..b450fed3a 100644 --- a/.github/workflows/code_style.yaml +++ b/.github/workflows/code_style.yaml @@ -1,5 +1,5 @@ name: Code style -on: +"on": push: branches: - main @@ -63,6 +63,20 @@ jobs: ./scripts/clang_format_all.sh git diff --exit-code + python_checks: + name: Check Python code in tree + runs-on: ubuntu-latest + steps: + - name: Install prerequisites + run: | + pip install black prospector psutil + - name: Checkout source + uses: actions/checkout@v3 + - name: Run prospector + run: | + find . -type f -name "*.py" -print -exec prospector {} + -exec black {} + + git diff --exit-code + misc_checks: name: Check license, update scripts, and git hooks runs-on: ubuntu-22.04 diff --git a/scripts/githooks/commit_msg.py b/scripts/githooks/commit_msg.py index 27c261b10..98da22387 100755 --- a/scripts/githooks/commit_msg.py +++ b/scripts/githooks/commit_msg.py @@ -4,28 +4,33 @@ # https://chris.beams.io/posts/git-commit/ import sys + class GitCommitMessage: - 'Represents a parsed Git commit message' + "Represents a parsed Git commit message" - rules = [ 'Separate subject from body with a blank line', - 'Limit the subject line to 50 characters', - 'Capitalize the subject line', - 'Do not end the subject line with a period', - 'Use the imperative mood in the subject line', - 'Wrap the body at 72 characters', - 'Use the body to explain what and why vs. how' ] + rules = [ + "Separate subject from body with a blank line", + "Limit the subject line to 50 characters", + "Capitalize the subject line", + "Do not end the subject line with a period", + "Use the imperative mood in the subject line", + "Wrap the body at 72 characters", + "Use the body to explain what and why vs. how", + ] - valid_rules = [ False, False, False, False, False, False, False ] + valid_rules = [False, False, False, False, False, False, False] - def __init__(self, filename = None): + def __init__(self, filename=None): lines = [] - if filename != None: - with open(filename, 'r') as f: + if filename is not None: + with open(filename, "r", encoding="utf-8") as f: for line in f: - if line.startswith('# ------------------------ >8 ------------------------'): + if line.startswith( + "# ------------------------ >8 ------------------------" + ): break - if not line.startswith('#'): + if not line.startswith("#"): lines.append(line) self.parse_lines(lines) @@ -42,7 +47,7 @@ class GitCommitMessage: self.has_subject_body_separator = False if len(lines) > 1: - self.has_subject_body_separator = (len(lines[1].strip()) == 0) + self.has_subject_body_separator = len(lines[1].strip()) == 0 if self.has_subject_body_separator: self.body_lines = lines[2:] @@ -52,54 +57,58 @@ class GitCommitMessage: return self def check_subject_body_separtor(self): - 'Rule 1: Separate subject from body with a blank line' + "Rule 1: Separate subject from body with a blank line" if len(self.body_lines) > 0: return self.has_subject_body_separator return True def check_subject_limit(self): - 'Rule 2: Limit the subject line to 50 characters' + "Rule 2: Limit the subject line to 50 characters" return len(self.subject.rstrip("\n")) <= 50 def check_subject_capitalized(self): - 'Rule 3: Capitalize the subject line' + "Rule 3: Capitalize the subject line" return len(self.subject) > 0 and self.subject[0].isupper() def check_subject_no_period(self): - 'Rule 4: Do not end the subject line with a period' - return not self.subject.endswith('.') + "Rule 4: Do not end the subject line with a period" + return not self.subject.endswith(".") - common_first_words = [ 'Add', - 'Adjust', - 'Support', - 'Change', - 'Remove', - 'Fix', - 'Print', - 'Track', - 'Refactor', - 'Combine', - 'Release', - 'Set', - 'Stop', - 'Make', - 'Mark', - 'Enable', - 'Check', - 'Exclude', - 'Format', - 'Correct'] + common_first_words = [ + "Add", + "Adjust", + "Support", + "Change", + "Remove", + "Fix", + "Print", + "Track", + "Refactor", + "Combine", + "Release", + "Set", + "Stop", + "Make", + "Mark", + "Enable", + "Check", + "Exclude", + "Format", + "Correct", + ] def check_subject_imperative(self): - 'Rule 5: Use the imperative mood in the subject line.' - 'We can only check for common mistakes here, like using' - 'the -ing form of a verb or non-imperative version of ' - 'common verbs' + """Rule 5: Use the imperative mood in the subject line. + + We can only check for common mistakes here, like using + the -ing form of a verb or non-imperative version of + common verbs + """ firstword = self.subject_words[0] - if firstword.endswith('ing'): + if firstword.endswith("ing"): return False for word in self.common_first_words: @@ -109,7 +118,7 @@ class GitCommitMessage: return True def check_body_limit(self): - 'Rule 6: Wrap the body at 72 characters' + "Rule 6: Wrap the body at 72 characters" if len(self.body_lines) == 0: return True @@ -121,7 +130,7 @@ class GitCommitMessage: return True def check_body_uses_why(self): - 'Rule 7: Use the body to explain what and why vs. how' + "Rule 7: Use the body to explain what and why vs. how" # Not enforcable return True @@ -132,15 +141,15 @@ class GitCommitMessage: check_subject_no_period, check_subject_imperative, check_body_limit, - check_body_uses_why ] + check_body_uses_why, + ] def check_the_seven_rules(self): - 'validates the commit message against the seven rules' + "validates the commit message against the seven rules" num_violations = 0 - for i in range(len(self.rule_funcs)): - func = self.rule_funcs[i] + for i, func in enumerate(self.rule_funcs): res = func(self) self.valid_rules[i] = res @@ -151,22 +160,26 @@ class GitCommitMessage: print() print("**** WARNING ****") print() - print("The commit message does not seem to comply with the project's guidelines.") - print("Please try to follow the \"Seven rules of a great commit message\":") + print( + "The commit message does not seem to comply with the project's guidelines." + ) + print('Please try to follow the "Seven rules of a great commit message":') print("https://chris.beams.io/posts/git-commit/") print() print("The following rules are violated:\n") for i in range(len(self.rule_funcs)): if not self.valid_rules[i]: - print(f"\t* Rule {i+1}: \"{self.rules[i]}\"") + print(f'\t* Rule {i+1}: "{self.rules[i]}"') # Extra sanity checks beyond the seven rules if len(self.body_lines) == 0: print() print("NOTE: the commit message has no body.") print("It is recommended to add a body with a description of your") - print("changes, even if they are small. Explain what and why instead of how:") + print( + "changes, even if they are small. Explain what and why instead of how:" + ) print("https://chris.beams.io/posts/git-commit/#why-not-how") if len(self.subject_words) < 3: @@ -182,18 +195,20 @@ class GitCommitMessage: return num_violations + def main(): if len(sys.argv) != 2: print("Unexpected number of arguments") - exit(1) + sys.exit(1) msg = GitCommitMessage(sys.argv[1]) return msg.check_the_seven_rules() + if __name__ == "__main__": main() # Always exit with success. We could also fail the commit if with # a non-zero exit code, but that might be a bit excessive and we'd # have to save the failed commit message to a file so that it can # be recovered. - exit(0) + sys.exit(0) diff --git a/scripts/githooks/commit_msg_tests.py b/scripts/githooks/commit_msg_tests.py index cb56ffc0e..b26243277 100755 --- a/scripts/githooks/commit_msg_tests.py +++ b/scripts/githooks/commit_msg_tests.py @@ -3,8 +3,8 @@ import unittest from commit_msg import GitCommitMessage -class TestCommitMsg(unittest.TestCase): +class TestCommitMsg(unittest.TestCase): def testNoInput(self): m = GitCommitMessage().parse_lines([]) self.assertEqual(len(m.body_lines), 0) @@ -12,64 +12,91 @@ class TestCommitMsg(unittest.TestCase): self.assertEqual(len(m.body_lines), 0) def testParsing(self): - m = GitCommitMessage().parse_lines(['This is a subject line', ' ', 'body line 1', 'body line 2']) - self.assertEqual(m.subject, 'This is a subject line') + m = GitCommitMessage().parse_lines( + ["This is a subject line", " ", "body line 1", "body line 2"] + ) + self.assertEqual(m.subject, "This is a subject line") self.assertTrue(m.has_subject_body_separator) - self.assertEqual(m.body_lines[0], 'body line 1') - self.assertEqual(m.body_lines[1], 'body line 2') + self.assertEqual(m.body_lines[0], "body line 1") + self.assertEqual(m.body_lines[1], "body line 2") def testNonImperative(self): - m = GitCommitMessage().parse_lines(['Adds new file']) + m = GitCommitMessage().parse_lines(["Adds new file"]) self.assertFalse(m.check_subject_imperative()) - m.parse_lines(['Adding new file']) + m.parse_lines(["Adding new file"]) self.assertFalse(m.check_subject_imperative()) # Default to accept - m.parse_lines(['Foo bar']) + m.parse_lines(["Foo bar"]) self.assertTrue(m.check_subject_imperative()) def testSubjectBodySeparator(self): - m = GitCommitMessage().parse_lines(['This is a subject line']) + m = GitCommitMessage().parse_lines(["This is a subject line"]) self.assertTrue(m.check_subject_body_separtor()) - m = GitCommitMessage().parse_lines(['This is a subject line', 'body']) + m = GitCommitMessage().parse_lines(["This is a subject line", "body"]) self.assertFalse(m.check_subject_body_separtor()) - m = GitCommitMessage().parse_lines(['This is a subject line', '', 'body']) + m = GitCommitMessage().parse_lines(["This is a subject line", "", "body"]) self.assertTrue(m.check_subject_body_separtor()) - m = GitCommitMessage().parse_lines(['This is a subject line', ' ', 'body']) + m = GitCommitMessage().parse_lines(["This is a subject line", " ", "body"]) self.assertTrue(m.check_subject_body_separtor()) def testSubjectLimit(self): - m = GitCommitMessage().parse_lines(['This subject line is exactly 48 characters long']) + m = GitCommitMessage().parse_lines( + ["This subject line is exactly 48 characters long"] + ) self.assertTrue(m.check_subject_limit()) - m = GitCommitMessage().parse_lines(['This is a very long subject line that will obviously exceed the limit']) + m = GitCommitMessage().parse_lines( + ["This is a very long subject line that will obviously exceed the limit"] + ) self.assertFalse(m.check_subject_limit()) - m = GitCommitMessage().parse_lines(['This 50-character subject line ends with an LF EOL\n']) + m = GitCommitMessage().parse_lines( + ["This 50-character subject line ends with an LF EOL\n"] + ) self.assertTrue(m.check_subject_limit()) def testSubjectCapitalized(self): - m = GitCommitMessage().parse_lines(['This subject line is capitalized']) + m = GitCommitMessage().parse_lines(["This subject line is capitalized"]) self.assertTrue(m.check_subject_capitalized()) - m = GitCommitMessage().parse_lines(['this subject line is not capitalized']) + m = GitCommitMessage().parse_lines(["this subject line is not capitalized"]) self.assertFalse(m.check_subject_capitalized()) def testSubjectNoPeriod(self): - m = GitCommitMessage().parse_lines(['This subject line ends with a period.']) + m = GitCommitMessage().parse_lines(["This subject line ends with a period."]) self.assertFalse(m.check_subject_no_period()) - m = GitCommitMessage().parse_lines(['This subject line does not end with a period']) + m = GitCommitMessage().parse_lines( + ["This subject line does not end with a period"] + ) self.assertTrue(m.check_subject_no_period()) def testBodyLimit(self): - m = GitCommitMessage().parse_lines(['This is a subject line', ' ', 'A short body line']) + m = GitCommitMessage().parse_lines( + ["This is a subject line", " ", "A short body line"] + ) self.assertTrue(m.check_body_limit()) - m = GitCommitMessage().parse_lines(['This is a subject line', ' ', 'A very long body line which certainly exceeds the 72 char recommended limit']) + m = GitCommitMessage().parse_lines( + [ + "This is a subject line", + " ", + "A very long body line which certainly exceeds the 72 char recommended limit", + ] + ) self.assertFalse(m.check_body_limit()) - m = GitCommitMessage().parse_lines(['This is a subject line', ' ', 'A body line with exactly 72 characters, followed by an EOL (Unix-style).\n']) + m = GitCommitMessage().parse_lines( + [ + "This is a subject line", + " ", + "A body line with exactly 72 characters, followed by an EOL (Unix-style).\n", + ] + ) self.assertTrue(m.check_body_limit()) def testCheckAllRules(self): - m = GitCommitMessage().parse_lines(['This is a subject line', '', 'A short body line']) + m = GitCommitMessage().parse_lines( + ["This is a subject line", "", "A short body line"] + ) self.assertEqual(0, m.check_the_seven_rules()) + if __name__ == "__main__": unittest.main() diff --git a/scripts/test_memory_spikes.py b/scripts/test_memory_spikes.py index 193ac68c2..2fb65a4d3 100644 --- a/scripts/test_memory_spikes.py +++ b/scripts/test_memory_spikes.py @@ -10,9 +10,9 @@ import time import sys from datetime import datetime -DEFAULT_MEMCAP = 300 # in MB -THRESHOLD_RATIO = 1.5 # ratio above which considered memory spike -WAIT_TO_STABILIZE = 15 # wait in seconds before considering memory stable +DEFAULT_MEMCAP = 300 # in MB +THRESHOLD_RATIO = 1.5 # ratio above which considered memory spike +WAIT_TO_STABILIZE = 15 # wait in seconds before considering memory stable CHECK_INTERVAL = 15 DEBUG = False @@ -25,6 +25,7 @@ def find_procs_by_name(name): ls.append(p) return ls + # return human readable form of number of bytes n def bytes2human(n): # http://code.activestate.com/recipes/578019 @@ -32,80 +33,95 @@ def bytes2human(n): # '9.8K' # >>> bytes2human(100001221) # '95.4M' - symbols = ('K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y') + symbols = ("K", "M", "G", "T", "P", "E", "Z", "Y") prefix = {} for i, s in enumerate(symbols): prefix[s] = 1 << (i + 1) * 10 for s in reversed(symbols): if n >= prefix[s]: value = float(n) / prefix[s] - return '%.1f%s' % (value, s) - return "%sB" % n + return f"{value:.1f}{s}" + return f"{n}B" + # prints pid of processes def process_details(process): - return "{} {}".format(process.pid, ''.join(process.cmdline()).strip()) + return f"{process.pid} {''.join(process.cmdline()).strip()}" + def process_stats(): - processes = find_procs_by_name('postgres') + processes = find_procs_by_name("postgres") for p in processes: print(p, p.num_ctx_switches(), p.cpu_times(), p.memory_info(), flush=True) + # return process id of new postgres process created when running SQL file def find_new_process(): # get postgres processes that are running before insertion starts - base_process = find_procs_by_name('postgres') - print('Processes running before inserts run: ') + base_process = find_procs_by_name("postgres") + print("Processes running before inserts run: ") for p in base_process: print(process_details(p)) process_count = len(base_process) - print("Waiting {} seconds for process running inserts to start".format(WAIT_TO_STABILIZE), flush=True) - time.sleep(WAIT_TO_STABILIZE) # wait WAIT_TO_STABILIZE seconds to get process that runs the inserts + print( + f"Waiting {WAIT_TO_STABILIZE} seconds for process running inserts to start", + flush=True, + ) + time.sleep( + WAIT_TO_STABILIZE + ) # wait WAIT_TO_STABILIZE seconds to get process that runs the inserts # continuously check for creation of new postgres process timeout = time.time() + 60 while True: # prevent infinite loop if time.time() > timeout: - print('Timed out on finding new process, should force quit SQL inserts', flush=True) + print( + "Timed out on finding new process, should force quit SQL inserts", + flush=True, + ) sys.exit(4) - process = find_procs_by_name('postgres') + process = find_procs_by_name("postgres") cnt = len(process) print("process count ", cnt) if cnt > process_count: - process = find_procs_by_name('postgres') + process = find_procs_by_name("postgres") difference_set = set(process) - set(base_process) new_process = None # We assume that the backend is the first 'new' process to start, so it will have # the lower PID for p in difference_set: - print('found process: {}'.format(process_details(p))) + print(f"found process: {process_details(p)}") if new_process is None or p.pid < new_process.pid: new_process = p - print('new_process: {}'.format(process_details(new_process))) + print(f"new_process: {process_details(new_process)}") return new_process.pid time.sleep(1) + def main(): # get process created from running insert test sql file pid = find_new_process() p = psutil.Process(pid) print('*** Check this pid is the same as "pg_backend_pid" from SQL command ***') - print('New process backend process:', pid) + print("New process backend process:", pid) - print('Waiting {} seconds for memory consumption to stabilize'.format(WAIT_TO_STABILIZE), flush=True) + print( + f"Waiting {WAIT_TO_STABILIZE} seconds for memory consumption to stabilize", + flush=True, + ) time.sleep(WAIT_TO_STABILIZE) # Calculate average memory consumption from 5 values over 15 seconds - sum = 0 - for i in range (0, 5): - sum += p.memory_info().rss + total = 0 + for _ in range(0, 5): + total += p.memory_info().rss time.sleep(3) - avg = sum / 5 - print('Average memory consumption: ', bytes2human(avg), flush=True) + avg = total / 5 + print("Average memory consumption: ", bytes2human(avg), flush=True) cap = int(sys.argv[1] if len(sys.argv) > 1 else DEFAULT_MEMCAP) * 1024 * 1024 upper_threshold = min(cap, avg * THRESHOLD_RATIO) @@ -118,27 +134,28 @@ def main(): break # prevent infinite loop if time.time() > timeout: - print('Timed out on running inserts (over 45 mins)') - print('Killing postgres process') + print("Timed out on running inserts (over 45 mins)") + print("Killing postgres process") p.kill() sys.exit(4) rss = p.memory_info().rss stamp = datetime.now().strftime("%H:%M:%S") - print('{} Memory used by process {}: {}'.format(stamp, p.pid, bytes2human(rss)), flush=True) + print(f"{stamp} Memory used by process {p.pid}: {bytes2human(rss)}", flush=True) if DEBUG: process_stats() # exit with error if memory above threshold if rss > upper_threshold: - print('Memory consumption exceeded upper threshold') - print('Killing postgres process', flush=True) + print("Memory consumption exceeded upper threshold") + print("Killing postgres process", flush=True) p.kill() sys.exit(4) time.sleep(CHECK_INTERVAL) - print('No memory leaks detected', flush=True) - sys.exit(0) # success + print("No memory leaks detected", flush=True) + sys.exit(0) # success -if __name__ == '__main__': + +if __name__ == "__main__": main()