From 552950d221b332c6e0cbc2682e39022f2b972f32 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 20 Jan 2023 16:52:00 +0400 Subject: [PATCH] Save SQLSmith results to the CI DB Mostly we are interested in stacktraces and failed queries here. --- .github/workflows/sqlsmith.yaml | 47 +++++++++++++++-- scripts/upload_ci_stats.sh | 89 +++++++++++---------------------- 2 files changed, 71 insertions(+), 65 deletions(-) diff --git a/.github/workflows/sqlsmith.yaml b/.github/workflows/sqlsmith.yaml index 479767832..9aace593c 100644 --- a/.github/workflows/sqlsmith.yaml +++ b/.github/workflows/sqlsmith.yaml @@ -9,6 +9,8 @@ on: - sqlsmith jobs: sqlsmith: + # Change the JOB_NAME variable below when changing the name. + # Don't use the env variable here because the env context is not accessible. name: SQLsmith PG${{ matrix.pg }} runs-on: ${{ matrix.os }} strategy: @@ -20,6 +22,7 @@ jobs: env: PG_SRC_DIR: pgbuild PG_INSTALL_DIR: postgresql + JOB_NAME: SQLsmith PG${{ matrix.pg }} steps: - name: Install Linux Dependencies @@ -70,8 +73,14 @@ jobs: # 10 times 10000 queries seems to take roughly 40 minutes in CI - name: Run SQLsmith run: | + set -o pipefail cd sqlsmith - for i in `seq 1 10`; do ./sqlsmith --seed=$((16#$(openssl rand -hex 3))) --exclude-catalog --target="host=/tmp dbname=smith" --max-queries=10000; done + for _ in {1..10} + do + ./sqlsmith --seed=$((16#$(openssl rand -hex 3))) --exclude-catalog \ + --target="host=/tmp dbname=smith" --max-queries=10000 \ + 2>&1 | tee sqlsmith.log # We only keep the last log. + done - name: Check for coredumps if: always() @@ -87,10 +96,19 @@ jobs: - name: Stack trace if: always() && steps.collectlogs.outputs.coredumps == 'true' run: | - sudo coredumpctl gdb <<EOT - printf "%s\n\n", debug_query_string + sudo coredumpctl gdb <<<" + set verbose on + set trace-commands on + show debug-file-directory + printf "'"'"query = '%s'\n\n"'"'", debug_query_string + frame function ExceptionalCondition + printf "'"'"condition = '%s'\n"'"'", conditionName + up 1 + l + info args + info locals bt full - EOT + " 2>&1 | tee stacktrace.log ./scripts/bundle_coredumps.sh false @@ -100,3 +118,24 @@ jobs: with: name: Coredumps sqlsmith ${{ matrix.os }} PG${{ matrix.pg }} path: coredumps + + - name: Upload test results to the database + if: always() + env: + CI_STATS_DB: ${{ secrets.CI_STATS_DB }} + GITHUB_EVENT_NAME: ${{ github.event_name }} + GITHUB_REF_NAME: ${{ github.ref_name }} + GITHUB_REPOSITORY: ${{ github.repository }} + GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }} + GITHUB_RUN_ID: ${{ github.run_id }} + GITHUB_RUN_NUMBER: ${{ github.run_number }} + JOB_STATUS: ${{ job.status }} + run: | + if [[ "${{ github.event_name }}" == "pull_request" ]] ; + then + GITHUB_PR_NUMBER="${{ github.event.number }}" + else + GITHUB_PR_NUMBER=0 + fi + export GITHUB_PR_NUMBER + scripts/upload_ci_stats.sh diff --git a/scripts/upload_ci_stats.sh b/scripts/upload_ci_stats.sh index 9a85f8739..0be1c72c1 100755 --- a/scripts/upload_ci_stats.sh +++ b/scripts/upload_ci_stats.sh @@ -1,13 +1,6 @@ #!/usr/bin/env bash set -xue -if ! [ -e 'installcheck.log' ] -then - # Probably the previous steps have failed and we have nothing to upload. - echo "installcheck.log does not exist" - exit 0 -fi - if [ -z "${CI_STATS_DB:-}" ] then # The secret with the stats db connection string is not accessible in forks. @@ -17,8 +10,10 @@ fi PSQL=(psql "${CI_STATS_DB}" -qtAX "--set=ON_ERROR_STOP=1") -# The tables we are going to use. -DESIRED_SCHEMA=" +# The tables we are going to use. This schema is here just as a reminder, you'll +# have to create them manually. After you manually change the actual DB schema, +# don't forget to append the needed migration code below. +: " create extension if not exists timescaledb; create table job( @@ -60,41 +55,8 @@ create table log( create unique index on log(job_date, test_name); select create_hypertable('log', 'job_date'); - --- don't add a trailing newline because bash command substitution removes it" - -DROP_QUERY=" -drop table if exists test cascade; -drop table if exists job cascade; -drop table if exists log cascade; " -# Recreate the tables if the schema changed. -EXISTING_SCHEMA=$("${PSQL[@]}" -c " - create table if not exists _schema(create_query text, drop_query text); - select create_query from _schema; -") - -if ! [ "${EXISTING_SCHEMA}" == "${DESIRED_SCHEMA}" ]; -then - "${PSQL[@]}" -v new_create="$DESIRED_SCHEMA" -v new_drop="$DROP_QUERY" <<<" --- Run both the old and the new drop queries and ignore errors, to try to --- bring the database into a predictable state even if it's current state is --- incorrect (e.g. _schema doesn't actually match the existing tables). -\set ON_ERROR_STOP 0 -select drop_query from _schema \gexec -:new_drop -\set ON_ERROR_STOP 1 - --- Create new tables. -begin; -:new_create -truncate table _schema; -insert into _schema values (:'new_create', :'new_drop'); -commit; -" -fi - # Create the job record. COMMIT_SHA=$(git -C "$(dirname "${BASH_SOURCE[0]}")" rev-parse @) export COMMIT_SHA @@ -113,20 +75,35 @@ returning job_date; ") export JOB_DATE -# Split the regression.diffs into per-test files. -gawk ' - match($0, /^(diff|\+\+\+|\-\-\-) .*\/(.*)[.]out/, a) { - file = a[2] ".diff"; - next; +# Parse the installcheck.log to find the individual test results. Note that this +# file might not exist for failed checks or non-regression checks like SQLSmith. +# We still want to save the other logs. +if [ -f 'installcheck.log' ] +then + gawk -v OFS='\t' ' + match($0, /^(test| ) ([^ ]+)[ ]+\.\.\.[ ]+([^ ]+) (|\(.*\))[ ]+([0-9]+) ms$/, a) { + print ENVIRON["JOB_DATE"], a[2], tolower(a[3] (a[4] ? (" " a[4]) : "")), a[5]; } + ' installcheck.log > tests.tsv - { if (file) print $0 > file; } -' regression.log + # Save the test results into the database. + "${PSQL[@]}" -c "\copy test from tests.tsv" + + # Split the regression.diffs into per-test files. + gawk ' + match($0, /^(diff|\+\+\+|\-\-\-) .*\/(.*)[.]out/, a) { + file = a[2] ".diff"; + next; + } + + { if (file) print $0 > file; } + ' regression.log +fi # Snip the long sequences of "+" or "-" changes in the diffs. for x in *.diff; do - if ! [ -e "$x" ] ; then continue ; fi + if ! [ -f "$x" ] ; then continue ; fi gawk -v max_context_lines=10 -v min_context_lines=2 ' /^-/ { new_sign = "-" } /^+/ { new_sign = "+" } @@ -171,18 +148,8 @@ do mv "$x.tmp" "$x" done -# Parse the installcheck.log to find the individual test results. -gawk -v OFS='\t' ' -match($0, /^(test| ) ([^ ]+)[ ]+\.\.\.[ ]+([^ ]+) (|\(.*\))[ ]+([0-9]+) ms$/, a) { - print ENVIRON["JOB_DATE"], a[2], tolower(a[3] (a[4] ? (" " a[4]) : "")), a[5]; -} -' installcheck.log > tests.tsv - -# Save the test results into the database. -"${PSQL[@]}" -c "\copy test from tests.tsv" - # Upload the logs. -for x in sanitizer/* {sanitizer,stacktrace,postgres-failure}.log *.diff +for x in sanitizer/* {sqlsmith/sqlsmith,sanitizer,stacktrace,postgres-failure}.log *.diff do if ! [ -e "$x" ]; then continue ; fi "${PSQL[@]}" <<<"