From 552950d221b332c6e0cbc2682e39022f2b972f32 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Fri, 20 Jan 2023 16:52:00 +0400
Subject: [PATCH] Save SQLSmith results to the CI DB

Mostly we are interested in stacktraces and failed queries here.
---
 .github/workflows/sqlsmith.yaml | 47 +++++++++++++++--
 scripts/upload_ci_stats.sh      | 89 +++++++++++----------------------
 2 files changed, 71 insertions(+), 65 deletions(-)

diff --git a/.github/workflows/sqlsmith.yaml b/.github/workflows/sqlsmith.yaml
index 479767832..9aace593c 100644
--- a/.github/workflows/sqlsmith.yaml
+++ b/.github/workflows/sqlsmith.yaml
@@ -9,6 +9,8 @@ on:
       - sqlsmith
 jobs:
   sqlsmith:
+    # Change the JOB_NAME variable below when changing the name.
+    # Don't use the env variable here because the env context is not accessible.
     name: SQLsmith PG${{ matrix.pg }}
     runs-on: ${{ matrix.os }}
     strategy:
@@ -20,6 +22,7 @@ jobs:
     env:
       PG_SRC_DIR: pgbuild
       PG_INSTALL_DIR: postgresql
+      JOB_NAME: SQLsmith PG${{ matrix.pg }}
 
     steps:
     - name: Install Linux Dependencies
@@ -70,8 +73,14 @@ jobs:
     # 10 times 10000 queries seems to take roughly 40 minutes in CI
     - name: Run SQLsmith
       run: |
+        set -o pipefail
         cd sqlsmith
-        for i in `seq 1 10`; do ./sqlsmith --seed=$((16#$(openssl rand -hex 3))) --exclude-catalog --target="host=/tmp dbname=smith" --max-queries=10000; done
+        for _ in {1..10}
+        do
+            ./sqlsmith --seed=$((16#$(openssl rand -hex 3))) --exclude-catalog \
+                --target="host=/tmp dbname=smith" --max-queries=10000 \
+            2>&1 | tee sqlsmith.log # We only keep the last log.
+        done
 
     - name: Check for coredumps
       if: always()
@@ -87,10 +96,19 @@ jobs:
     - name: Stack trace
       if: always() && steps.collectlogs.outputs.coredumps == 'true'
       run: |
-        sudo coredumpctl gdb <<EOT
-          printf "%s\n\n", debug_query_string
+        sudo coredumpctl gdb <<<"
+          set verbose on
+          set trace-commands on
+          show debug-file-directory
+          printf "'"'"query = '%s'\n\n"'"'", debug_query_string
+          frame function ExceptionalCondition
+          printf "'"'"condition = '%s'\n"'"'", conditionName
+          up 1
+          l
+          info args
+          info locals
           bt full
-        EOT
+        " 2>&1 | tee stacktrace.log
         ./scripts/bundle_coredumps.sh
         false
 
@@ -100,3 +118,24 @@ jobs:
       with:
         name: Coredumps sqlsmith ${{ matrix.os }} PG${{ matrix.pg }}
         path: coredumps
+
+    - name: Upload test results to the database
+      if: always()
+      env:
+        CI_STATS_DB: ${{ secrets.CI_STATS_DB }}
+        GITHUB_EVENT_NAME: ${{ github.event_name }}
+        GITHUB_REF_NAME: ${{ github.ref_name }}
+        GITHUB_REPOSITORY: ${{ github.repository }}
+        GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
+        GITHUB_RUN_ID: ${{ github.run_id }}
+        GITHUB_RUN_NUMBER: ${{ github.run_number }}
+        JOB_STATUS: ${{ job.status }}
+      run: |
+        if [[ "${{ github.event_name }}" == "pull_request" ]] ;
+        then
+            GITHUB_PR_NUMBER="${{ github.event.number }}"
+        else
+            GITHUB_PR_NUMBER=0
+        fi
+        export GITHUB_PR_NUMBER
+        scripts/upload_ci_stats.sh
diff --git a/scripts/upload_ci_stats.sh b/scripts/upload_ci_stats.sh
index 9a85f8739..0be1c72c1 100755
--- a/scripts/upload_ci_stats.sh
+++ b/scripts/upload_ci_stats.sh
@@ -1,13 +1,6 @@
 #!/usr/bin/env bash
 set -xue
 
-if ! [ -e 'installcheck.log' ]
-then
-    # Probably the previous steps have failed and we have nothing to upload.
-    echo "installcheck.log does not exist"
-    exit 0
-fi
-
 if [ -z "${CI_STATS_DB:-}" ]
 then
     # The secret with the stats db connection string is not accessible in forks.
@@ -17,8 +10,10 @@ fi
 
 PSQL=(psql "${CI_STATS_DB}" -qtAX "--set=ON_ERROR_STOP=1")
 
-# The tables we are going to use.
-DESIRED_SCHEMA="
+# The tables we are going to use. This schema is here just as a reminder, you'll
+# have to create them manually. After you manually change the actual DB schema,
+# don't forget to append the needed migration code below.
+: "
 create extension if not exists timescaledb;
 
 create table job(
@@ -60,41 +55,8 @@ create table log(
 create unique index on log(job_date, test_name);
 
 select create_hypertable('log', 'job_date');
-
--- don't add a trailing newline because bash command substitution removes it"
-
-DROP_QUERY="
-drop table if exists test cascade;
-drop table if exists job cascade;
-drop table if exists log cascade;
 "
 
-# Recreate the tables if the schema changed.
-EXISTING_SCHEMA=$("${PSQL[@]}" -c "
-    create table if not exists _schema(create_query text, drop_query text);
-    select create_query from _schema;
-")
-
-if ! [ "${EXISTING_SCHEMA}" == "${DESIRED_SCHEMA}" ];
-then
-    "${PSQL[@]}" -v new_create="$DESIRED_SCHEMA" -v new_drop="$DROP_QUERY" <<<"
--- Run both the old and the new drop queries and ignore errors, to try to
--- bring the database into a predictable state even if it's current state is
--- incorrect (e.g. _schema doesn't actually match the existing tables).
-\set ON_ERROR_STOP 0
-select drop_query from _schema \gexec
-:new_drop
-\set ON_ERROR_STOP 1
-
--- Create new tables.
-begin;
-:new_create
-truncate table _schema;
-insert into _schema values (:'new_create', :'new_drop');
-commit;
-"
-fi
-
 # Create the job record.
 COMMIT_SHA=$(git -C "$(dirname "${BASH_SOURCE[0]}")" rev-parse @)
 export COMMIT_SHA
@@ -113,20 +75,35 @@ returning job_date;
 ")
 export JOB_DATE
 
-# Split the regression.diffs into per-test files.
-gawk '
-    match($0, /^(diff|\+\+\+|\-\-\-) .*\/(.*)[.]out/, a) {
-        file = a[2] ".diff";
-        next;
+# Parse the installcheck.log to find the individual test results. Note that this
+# file might not exist for failed checks or non-regression checks like SQLSmith.
+# We still want to save the other logs.
+if [ -f 'installcheck.log' ]
+then
+    gawk -v OFS='\t' '
+    match($0, /^(test|    ) ([^ ]+)[ ]+\.\.\.[ ]+([^ ]+) (|\(.*\))[ ]+([0-9]+) ms$/, a) {
+        print ENVIRON["JOB_DATE"], a[2], tolower(a[3] (a[4] ? (" " a[4]) : "")), a[5];
     }
+    ' installcheck.log > tests.tsv
 
-    { if (file) print $0 > file; }
-' regression.log
+    # Save the test results into the database.
+    "${PSQL[@]}" -c "\copy test from tests.tsv"
+
+    # Split the regression.diffs into per-test files.
+    gawk '
+        match($0, /^(diff|\+\+\+|\-\-\-) .*\/(.*)[.]out/, a) {
+            file = a[2] ".diff";
+            next;
+        }
+
+        { if (file) print $0 > file; }
+    ' regression.log
+fi
 
 # Snip the long sequences of "+" or "-" changes in the diffs.
 for x in *.diff;
 do
-    if ! [ -e "$x" ] ; then continue ; fi
+    if ! [ -f "$x" ] ; then continue ; fi
     gawk -v max_context_lines=10 -v min_context_lines=2 '
         /^-/ { new_sign = "-" }
         /^+/ { new_sign = "+" }
@@ -171,18 +148,8 @@ do
     mv "$x.tmp" "$x"
 done
 
-# Parse the installcheck.log to find the individual test results.
-gawk -v OFS='\t' '
-match($0, /^(test|    ) ([^ ]+)[ ]+\.\.\.[ ]+([^ ]+) (|\(.*\))[ ]+([0-9]+) ms$/, a) {
-    print ENVIRON["JOB_DATE"], a[2], tolower(a[3] (a[4] ? (" " a[4]) : "")), a[5];
-}
-' installcheck.log > tests.tsv
-
-# Save the test results into the database.
-"${PSQL[@]}" -c "\copy test from tests.tsv"
-
 # Upload the logs.
-for x in sanitizer/* {sanitizer,stacktrace,postgres-failure}.log *.diff
+for x in sanitizer/* {sqlsmith/sqlsmith,sanitizer,stacktrace,postgres-failure}.log *.diff
 do
     if ! [ -e "$x" ]; then continue ; fi
     "${PSQL[@]}" <<<"