#!/usr/bin/env python3 import os import re import string import subprocess import sys from github import Github # This is PyGithub. import requests # Limit our history search and fetch depth to this value, not to get stuck in # case of a bug. HISTORY_DEPTH = 1000 def run_query(query): """A simple function to use requests.post to make the GraphQL API call.""" request = requests.post( "https://api.github.com/graphql", json={"query": query}, headers={"Authorization": f'Bearer {os.environ.get("GITHUB_TOKEN")}'}, timeout=20, ) response = request.json() # Have to work around the unique GraphQL convention of returning 200 for errors. if request.status_code != 200 or "errors" in response: raise ValueError( f"Query failed to run by returning code of {request.status_code}." f"\nQuery: '{query}'" f"\nResponse: '{request.json()}'" ) return response def get_referenced_issue(pr_number): """Get the number of issue fixed by the given pull request. Returns None if no issue is fixed, or more than one issue""" # We only need the first issue here. We also request only the first 30 labels, # because GitHub requires some small restriction there that is counted # towards the GraphQL API usage quota. ref_result = run_query( string.Template( """ query { repository(owner: "timescale", name: "timescaledb") { pullRequest(number: $pr_number) { closingIssuesReferences(first: 1) { nodes { number, title, labels (first: 30) { nodes { name } } } } } } } """ ).substitute({"pr_number": pr_number}) ) # The above returns: # {'data': {'repository': {'pullRequest': {'closingIssuesReferences': {'nodes': [{'number': 6819, # 'title': '[Bug]: Segfault when `ts_insert_blocker` function is called', # 'labels': {'nodes': [{'name': 'bug'}]}}]}}}}} # # We can have {'nodes': [None]} in case it references an inaccessble repository, # just ignore it. ref_nodes = ref_result["data"]["repository"]["pullRequest"][ "closingIssuesReferences" ]["nodes"] if not ref_nodes or len(ref_nodes) != 1 or not ref_nodes[0]: return None, None, None number = ref_nodes[0]["number"] title = ref_nodes[0]["title"] labels = {x["name"] for x in ref_nodes[0]["labels"]["nodes"]} return number, title, labels def set_auto_merge(pr_number): """Enable auto-merge for the given PR""" owner, name = target_repo_name.split("/") # We first have to find out the PR id, which is some base64 string, different # from its number. query = string.Template( """query { repository(owner: "$owner", name: "$name") { pullRequest(number: $pr_number) { id } } }""" ).substitute(pr_number=pr_number, owner=owner, name=name) result = run_query(query) pr_id = result["data"]["repository"]["pullRequest"]["id"] query = string.Template( """mutation { enablePullRequestAutoMerge( input: { pullRequestId: "$pr_id", mergeMethod: REBASE } ) { clientMutationId } }""" ).substitute(pr_id=pr_id) run_query(query) def git_output(command): """Get output from the git command, checking for the successful exit code""" return subprocess.check_output(f"git {command}", shell=True, text=True) def git_check(command): """Run a git command, checking for the successful exit code""" subprocess.run(f"git {command}", shell=True, check=True) def git_returncode(command): """Run a git command, returning the exit code""" return subprocess.run(f"git {command}", shell=True, check=False).returncode # The token has to have the "access public repositories" permission, or else creating a PR returns 404. github = Github(os.environ.get("GITHUB_TOKEN")) # If we are running inside Github Action, will modify the main repo. source_remote = "origin" source_repo_name = os.environ.get("GITHUB_REPOSITORY") target_remote = source_remote target_repo_name = source_repo_name if not source_repo_name: # We are running manually for debugging, probably want to modify a fork. source_repo_name = "timescale/timescaledb" target_repo_name = os.environ.get("BACKPORT_TARGET_REPO") target_remote = os.environ.get("BACKPORT_TARGET_REMOTE") if not target_repo_name or not target_remote: print( "Please specify the target repositories for debugging, using the " "environment variables BACKPORT_TARGET_REPO (e.g. `timescale/timescaledb`) " "and BACKPORT_TARGET_REMOTE (e.g. `origin`).", file=sys.stderr, ) sys.exit(1) print( f"Will look at '{source_repo_name}' (git remote '{source_remote}') for bug fixes, " f"and create the backport PRs in '{target_repo_name}' (git remote '{target_remote}')." ) source_repo = github.get_repo(source_repo_name) target_repo = github.get_repo(target_repo_name) # Set git name and email corresponding to the token user. token_user = github.get_user() os.environ["GIT_COMMITTER_NAME"] = token_user.name # This is an email that is used by Github when you opt to hide your real email # address. It is required so that the commits are recognized by Github as made # by the user. That is, if you use a wrong e-mail, there won't be a clickable # profile picture next to the commit in the Github interface. os.environ[ "GIT_COMMITTER_EMAIL" ] = f"{token_user.id}+{token_user.login}@users.noreply.github.com" print( f"Will commit as {os.environ['GIT_COMMITTER_NAME']} <{os.environ['GIT_COMMITTER_EMAIL']}>" ) # Fetch the main branch. Apparently the local repo can be shallow in some cases # in Github Actions, so specify the depth. --unshallow will complain on normal # repositories, this is why we don't use it here. git_check( f"fetch --quiet --depth={HISTORY_DEPTH} {source_remote} main:refs/remotes/{source_remote}/main" ) # Find out what is the branch corresponding to the previous version compared to # main. We will backport to that branch. version_config = dict( [ re.match(r"^(.+)\s+=\s+(.+)$", line).group(1, 2) for line in git_output(f"show {source_remote}/main:version.config").splitlines() if line ] ) previous_version = version_config["update_from_version"] previous_version_parts = previous_version.split(".") previous_version_parts[-1] = "x" backport_target = ".".join(previous_version_parts) backported_label = f"backported-{backport_target}" print(f"Will backport to {backport_target}.") # Fetch the target branch. Apparently the local repo can be shallow in some cases # in Github Actions, so specify the depth. --unshallow will complain on normal # repositories, this is why we don't use it here. git_check( f"fetch --quiet --depth={HISTORY_DEPTH} {target_remote} {backport_target}:refs/remotes/{target_remote}/{backport_target}" ) # Also fetch all branches from the target repository, because we use the presence # of the backport branches to determine that a backport exists. It's not convenient # to query for branch existence through the PyGithub API. git_check(f"fetch {target_remote}") # Find out which commits are unique to main and target branch. Also build sets of # the titles of these commits. We will compare the titles to check whether a # commit was backported. main_commits = [ line.split("\t") for line in git_output( f'log -{HISTORY_DEPTH} --pretty="format:%h\t%s" {target_remote}/{backport_target}..{source_remote}/main' ).splitlines() if line ] print(f"Have {len(main_commits)} new commits in the main branch.") branch_commits = [ line.split("\t") for line in git_output( f'log -{HISTORY_DEPTH} --pretty="format:%h\t%s" {source_remote}/main..{target_remote}/{backport_target}' ).splitlines() if line ] branch_commit_titles = {x[1] for x in branch_commits} # We will do backports per-PR, because one PR, though not often, might contain # many commits. So as the first step, go through the commits unique to main, find # out which of them have to be backported, and remember the corresponding PRs. # We also have to remember which commits to backport. The list from PR itself is # not what we need, these are the original commits from the PR branch, and we # need the resulting commits in master. class PRInfo: """Information about the PR to be backported.""" def __init__(self, pygithub_pr_, issue_number_): self.pygithub_pr = pygithub_pr_ self.pygithub_commits = [] self.issue_number = issue_number_ def should_backport_by_labels(number, title, labels): """Should we backport the given PR/issue, judging by the labels? Note that this works in ternary logic: True means we must, False means we must not (tags to disable backport take precedence), and None means weak no (no tags to either request or disable backport)""" stopper_labels = labels.intersection( ["disable-auto-backport", "auto-backport-not-done"] ) if stopper_labels: print( f"#{number} '{title}' is labeled as '{list(stopper_labels)[0]}' which prevents automated backporting." ) return False force_labels = labels.intersection(["bug", "force-auto-backport"]) if force_labels: print( f"#{number} '{title}' is labeled as '{list(force_labels)[0]}' which requests automated backporting." ) return True return None # Go through the commits unique to main, and build a dict(pr number -> PRInfo) # of PRs that we will consider for backporting. prs_to_backport = {} for commit_sha, commit_title in main_commits: print() pygithub_commit = source_repo.get_commit(sha=commit_sha) pulls = pygithub_commit.get_pulls() if not pulls: print(f"{commit_sha[:9]} '{commit_title}' does not belong to a PR.") continue if pulls.totalCount > 1: # What would that mean? Just play it safe and skip it. print( f"{commit_sha[:9]} '{commit_title}' references multiple PRs: {', '.join([pull.number for pull in pulls])}" ) continue pull = pulls[0] # If a commit with the same title is already in the branch, mark the PR with # a corresponding tag. This makes it easier to check what was backported # when looking at the release milestone. Note that we do this before other # checks -- maybe it was backported manually regardless of the usual # conditions. if commit_title in branch_commit_titles: print(f"{commit_sha[:9]} '{commit_title}' is already in the branch.") if backported_label not in {label.name for label in pull.labels}: pull.add_to_labels(backported_label) continue # Next, we're going to look at the labels of both the PR and the linked # issue, if any, to understand whether we should backport the fix. We have # labels to request backport like "bug", and labels to prevent backport # like "disable-auto-backport", on both issue and the PR. We're going to use # the ternary False/None/True logic to combine them properly. issue_number, issue_title, issue_labels = get_referenced_issue(pull.number) if not issue_number: should_backport_issue_ternary = None print( f"{commit_sha[:9]} belongs to the PR #{pull.number} '{pull.title}' that does not close an issue." ) else: issue = source_repo.get_issue(number=issue_number) should_backport_issue_ternary = should_backport_by_labels( issue_number, issue_title, issue_labels ) print( f"{commit_sha[:9]} belongs to the PR #{pull.number} '{pull.title}' " f"that references the issue #{issue.number} '{issue.title}'." ) pull_labels = {label.name for label in pull.labels} should_backport_pr_ternary = should_backport_by_labels( pull.number, pull.title, pull_labels ) # We backport if either the PR or the issue labels request the backport, and # none of them prevent it. I'm writing it with `is True` because I don't # remember python rules for ternary logic with None (do you?). if ( should_backport_pr_ternary is True or should_backport_issue_ternary is True ) and ( should_backport_pr_ternary is not False and should_backport_issue_ternary is not False ): print(f"{commit_sha[:9]} '{commit_title}' will be considered for backporting.") else: continue # Remember the PR and the corresponding resulting commit in main. if pull.number not in prs_to_backport: prs_to_backport[pull.number] = PRInfo(pull, issue_number) # We're traversing the history backwards, and want to have the list of # commits in forward order. prs_to_backport[pull.number].pygithub_commits.insert(0, pygithub_commit) def report_backport_not_done(original_pr, reason, details=None): """If something prevents us from backporting the PR automatically, report it in a comment to original PR, and add a label preventing further attempts.""" print( f"Will not backport the PR #{original_pr.number} '{original_pr.title}': {reason}" ) github_comment = f"Automated backport to {backport_target} not done: {reason}." if details: github_comment += f"\n\n{details}" # Link to the job if we're running in the Github Action environment. if "GITHUB_REPOSITORY" in os.environ: github_comment += ( "\n\n" f"[Job log](https://github.com/{os.environ.get('GITHUB_REPOSITORY')}" f"/actions/runs/{os.environ.get('GITHUB_RUN_ID')}" f"/attempts/{os.environ.get('GITHUB_RUN_ATTEMPT')})" ) original_pr.create_issue_comment(github_comment) original_pr.add_to_labels("auto-backport-not-done") # Now, go over the list of PRs that we have collected, and try to backport # each of them. print(f"Have {len(prs_to_backport)} PRs to backport.") for index, pr_info in enumerate(prs_to_backport.values()): print() # Don't want to have an endless loop that modifies the repository in an # unattended script. The already backported/conflicted PRs shouldn't even # get into this list, so the low number is OK, it will still make progress. if index > 5: print(f"{index} PRs processed, stopping as a precaution.") sys.exit(0) original_pr = pr_info.pygithub_pr backport_branch = f"backport/{backport_target}/{original_pr.number}" # If there is already a backport branch for this PR, this probably means # that we already created the backport PR. Skip it. if ( git_returncode(f"rev-parse {target_remote}/{backport_branch} > /dev/null 2>&1") == 0 ): print( f'Backport branch {backport_branch} for PR #{original_pr.number}: "{original_pr.title}" already exists. Skipping.' ) continue # Try to cherry-pick the commits. git_check( f"checkout --quiet --detach {target_remote}/{backport_target} > /dev/null" ) commit_shas = [commit.sha for commit in pr_info.pygithub_commits] if git_returncode(f"cherry-pick --quiet -m 1 -x {' '.join(commit_shas)}") != 0: details = f"### Git status\n\n```\n{git_output('status')}\n```" git_check("cherry-pick --abort") report_backport_not_done(original_pr, "cherry-pick failed", details) continue # We don't have the permission to modify workflows changed_files = {file.filename for file in original_pr.get_files()} changed_workflow_files = { filename for filename in changed_files if filename.startswith(".github/workflows/") } if changed_workflow_files: details = ( f"The PR touches a workflow file '{list(changed_workflow_files)[0]}' " " and cannot be backported automatically" ) report_backport_not_done(original_pr, "backport failed", details) continue # Push the backport branch. git_check(f"push --quiet {target_remote} @:refs/heads/{backport_branch}") # Prepare description for the backport PR. backport_description = ( f"This is an automated backport of #{original_pr.number}: {original_pr.title}." ) if pr_info.issue_number: backport_description += f"\nThe original issue is #{pr_info.issue_number}." # Do not merge the PR automatically if it changes some particularly # conflict-prone files that are better to review manually. Also mention this # in the description. stopper_files = changed_files.intersection( ["sql/updates/latest-dev.sql", "sql/updates/reverse-dev.sql"] ) if stopper_files: backport_description += ( "\n" f"This PR will not be merged automatically, because it modifies '{list(stopper_files)[0]}' " "which is conflict-prone. Please review these changes manually." ) else: backport_description += ( "\n" "This PR will be merged automatically after all the relevant CI checks pass." ) backport_description += ( " If this fix should not be backported, or will be backported manually, " "just close this PR. You can use the backport branch to add your " "changes, it won't be modified automatically anymore." "\n" "\n" "For more details, please see the [documentation]" "(https://github.com/timescale/eng-database/wiki/Releasing-TimescaleDB#automated-cherry-picking-of-bug-fixes)" ) # Add original PR description. Comment out the Github issue reference # keywords like 'Fixes #1234', to avoid having multiple PRs saying they fix # a given issue. The backport PR is going to reference the fixed issue as # "Original issue #xxxx". original_description = re.sub( r"((fix|clos|resolv)[esd]+)(\s+#[0-9]+)", r"`\1`\3", original_pr.body, flags=re.IGNORECASE, ) backport_description += ( "\n" "\n" "## Original description" "\n" f"### {original_pr.title}" "\n" f"{original_description}" ) # Create the backport PR. backport_pr = target_repo.create_pull( title=f"Backport to {backport_target}: #{original_pr.number}: {original_pr.title}", body=backport_description, head=backport_branch, base=backport_target, ) backport_pr.add_to_labels("is-auto-backport") backport_pr.add_to_assignees(original_pr.user.login) if not stopper_files: set_auto_merge(backport_pr.number) print( f"Created backport PR #{backport_pr.number} for #{original_pr.number}: {original_pr.title}" )