Upload test results into a database

This will help us find the flaky tests or the rare failures.
2025-05-15 01:53:41 +08:00 · 2022-11-14 16:59:59 +04:00 · 2022-11-14 16:59:59 +04:00 · 0d30155b26
commit 0d30155b26
parent 0360812e3c
16 changed files with 340 additions and 54 deletions
--- a/.github/workflows/linux-build-and-test.yaml
+++ b/.github/workflows/linux-build-and-test.yaml
@ -28,6 +28,7 @@ jobs:
        fi
    
  regress:
+    # Change the JOB_NAME variable below when changing the name.
    name: PG${{ matrix.pg }}${{ matrix.snapshot }} ${{ matrix.name }} ${{ matrix.os }}
    needs: matrixbuilder
    runs-on: ${{ matrix.os }}
@ -59,7 +60,7 @@ jobs:
        # This is needed because GitHub image macos-10.15 version
        # 20210927.1 did not install OpenSSL so we install openssl
        # explicitly.
-        brew install openssl
+        brew install openssl gawk
        sudo perl -MCPAN -e "CPAN::Shell->notest('install', 'IPC::Run')"
        sudo perl -MCPAN -e "CPAN::Shell->notest('install', 'Test::Most')"

@ -178,9 +179,11 @@ jobs:
      uses: actions/upload-artifact@v3
      with:
        name: Regression diff ${{ matrix.os }} ${{ matrix.name }} ${{ matrix.pg }}
-        path: regression.log
+        path: |
+          regression.log
+          installcheck.log

-    - name: Save postmaster.log
+    - name: Save PostgreSQL log
      if: always()
      uses: actions/upload-artifact@v3
      with:
@ -190,14 +193,16 @@ jobs:
    - name: Stack trace
      if: always() && steps.collectlogs.outputs.coredumps == 'true'
      run: |
-        sudo coredumpctl gdb <<EOT
+        sudo coredumpctl gdb <<<"
          set verbose on
          show debug-file-directory
-          printf "%s\n\n", debug_query_string
+          printf "query = '%s'\n\n", debug_query_string
+          frame function ExceptionalCondition
+          printf "condition = '%s'\n", conditionName
          bt full
-        EOT
+        " | tee stacktrace.log
        ./scripts/bundle_coredumps.sh
-        false
+        grep -C40 "was terminated by signal" postgres.log > postgres-failure.log ||:

    - name: Coredumps
      if: always() && steps.collectlogs.outputs.coredumps == 'true'
@ -215,3 +220,29 @@ jobs:
          build/test/tmp_check/log
          build/tsl/test/tmp_check/log

+    - name: Upload test results to the database
+      # Don't upload the results of the flaky check, because the db schema only
+      # supports running one test once per job.
+      if: always() && (! contains(matrix.name, 'Flaky'))
+      env:
+        # GitHub Actions allow you neither to use the env context for the job name,
+        # nor to access the job name from the step context, so we have to
+        # duplicate it to work around this nonsense.
+        JOB_NAME: PG${{ matrix.pg }}${{ matrix.snapshot }} ${{ matrix.name }} ${{ matrix.os }}
+        CI_STATS_DB: ${{ secrets.CI_STATS_DB }}
+        GITHUB_EVENT_NAME: ${{ github.event_name }}
+        GITHUB_REF_NAME: ${{ github.ref_name }}
+        GITHUB_REPOSITORY: ${{ github.repository }}
+        GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
+        GITHUB_RUN_ID: ${{ github.run_id }}
+        GITHUB_RUN_NUMBER: ${{ github.run_number }}
+        JOB_STATUS: ${{ job.status }}
+      run: |
+        if [[ "${{ github.event_name }}" == "pull_request" ]] ;
+        then
+            GITHUB_PR_NUMBER="${{ github.event.number }}"
+        else
+            GITHUB_PR_NUMBER=0
+        fi
+        export GITHUB_PR_NUMBER
+        scripts/upload_ci_stats.sh
--- a/.github/workflows/sanitizer-build-and-test.yaml
+++ b/.github/workflows/sanitizer-build-and-test.yaml
@ -48,6 +48,8 @@ jobs:
      run: python .github/gh_config_reader.py

  sanitizer:
+    # Change the JOB_NAME variable below when changing the name.
+    # Don't use the env variable here because the env context is not accessible.
    name: PG${{ matrix.pg }} Sanitizer ${{ matrix.os }}
    runs-on: ${{ matrix.os }}
    needs: config
@ -144,9 +146,11 @@ jobs:
      uses: actions/upload-artifact@v3
      with:
        name: Regression diff ${{ matrix.os }} ${{ env.name }} ${{ matrix.pg }}
-        path: regression.log
+        path: |
+          regression.log
+          installcheck.log

-    - name: Save postmaster.log
+    - name: Save PostgreSQL log
      if: always()
      uses: actions/upload-artifact@v3
      with:
@ -156,12 +160,16 @@ jobs:
    - name: Stack trace
      if: always() && steps.collectlogs.outputs.coredumps == 'true'
      run: |
-        sudo coredumpctl gdb <<EOT
-          printf "%s\n\n", debug_query_string
+        sudo coredumpctl gdb <<<"
+          set verbose on
+          show debug-file-directory
+          printf "query = '%s'\n\n", debug_query_string
+          frame function ExceptionalCondition
+          printf "condition = '%s'\n", conditionName
          bt full
-        EOT
+        " | tee stacktrace.log
        ./scripts/bundle_coredumps.sh
-        false
+        grep -C40 "was terminated by signal" postgres.log > postgres-failure.log ||:

    - name: Coredumps
      if: always() && steps.collectlogs.outputs.coredumps == 'true'
@ -175,4 +183,29 @@ jobs:
      uses: actions/upload-artifact@v3
      with:
        name: sanitizer logs ${{ matrix.os }} ${{ env.name }} ${{ matrix.pg }}
-        path: ${{ github.workspace }}/sanitizer.log.*
+        path: ${{ github.workspace }}/sanitizer.log
+
+    - name: Upload test results to the database
+      if: always()
+      env:
+        # GitHub Actions allow you neither to use the env context for the job name,
+        # nor to access the job name from the step context, so we have to
+        # duplicate it to work around this nonsense.
+        JOB_NAME: PG${{ matrix.pg }} ${{ env.name }} ${{ matrix.os }}
+        CI_STATS_DB: ${{ secrets.CI_STATS_DB }}
+        GITHUB_EVENT_NAME: ${{ github.event_name }}
+        GITHUB_REF_NAME: ${{ github.ref_name }}
+        GITHUB_REPOSITORY: ${{ github.repository }}
+        GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
+        GITHUB_RUN_ID: ${{ github.run_id }}
+        GITHUB_RUN_NUMBER: ${{ github.run_number }}
+        JOB_STATUS: ${{ job.status }}
+      run: |
+        if [[ "${{ github.event_name }}" == "pull_request" ]] ;
+        then
+            GITHUB_PR_NUMBER="${{ github.event.number }}"
+        else
+            GITHUB_PR_NUMBER=0
+        fi
+        export GITHUB_PR_NUMBER
+        scripts/upload_ci_stats.sh
--- a/.github/workflows/windows-build-and-test.yaml
+++ b/.github/workflows/windows-build-and-test.yaml
@ -33,6 +33,7 @@ jobs:
        fi

  build:
+    # Change the JOB_NAME variable below when changing the name.
    name: PG${{ matrix.pg }} ${{ matrix.build_type }} ${{ matrix.os }}
    runs-on: ${{ matrix.os }}
    needs: config
@ -61,6 +62,21 @@ jobs:
      TABLESPACE1: D:\tablespace1\
      TABLESPACE2: D:\tablespace2\
    steps:
+    - name: Setup WSL
+      if: matrix.pg != '12'
+      uses: Vampire/setup-wsl@v1
+      with:
+        additional-packages:
+          cmake
+          gawk
+          gcc
+          git
+          gnupg
+          make
+          postgresql-client
+          postgresql-common
+          tree
+
    - name: Configure git
      # Since we want to reuse the checkout in the WSL environment
      # we have to prevent git from changing the line ending in the
@ -68,6 +84,7 @@ jobs:
      run: |
        git config --global core.autocrlf false
        git config --global core.eol lf
+
    - name: Checkout TimescaleDB source
      uses: actions/checkout@v3

@ -116,6 +133,7 @@ jobs:
        icacls ${{ env.TABLESPACE2 }} /grant runneradmin:F /T
        copy build_win/test/postgresql.conf ${{ env.PGDATA }}
        copy build_win/test/pg_hba.conf ${{ env.PGDATA }}
+        icacls . /grant runneradmin:F /T
        ~/PostgreSQL/${{ matrix.pg }}/bin/pg_ctl start -o "${{ matrix.pg_config }}" --log=postgres.log
        ~/PostgreSQL/${{ matrix.pg }}/bin/pg_isready -U postgres -d postgres --timeout=30
        ~/PostgreSQL/${{ matrix.pg }}/bin/psql -U postgres -d postgres -c 'CREATE USER root SUPERUSER LOGIN;'
@ -129,17 +147,6 @@ jobs:
        echo "Data directory:"
        ~/PostgreSQL/${{ matrix.pg }}/bin/psql -U postgres -d postgres -c 'SHOW data_directory;'

-    - name: Setup WSL
-      if: matrix.pg != '12'
-      uses: Vampire/setup-wsl@v1
-      with:
-        additional-packages:
-          cmake
-          gcc
-          git
-          gnupg
-          make
-          postgresql-common
    - name: Install postgres for test runner
      if: matrix.pg != '12'
      shell: wsl-bash {0}
@ -158,10 +165,10 @@ jobs:
        # isolationtester is only packaged with pg14+ so we would have to build our own postgres
        # to get it for earlier versions so we skip it for < 14.
        if [[ "${{ matrix.pg }}" == "14" ]]; then
-          make -C build_wsl isolationchecklocal
+          make -C build_wsl isolationchecklocal | tee -a installcheck.log
        fi

-        make -C build_wsl regresschecklocal IGNORES="${{ matrix.ignores }}"
+        make -C build_wsl regresschecklocal IGNORES="${{ matrix.ignores }}" | tee -a installcheck.log

    - name: Setup postgres cluster for TSL tests
      if: matrix.pg != '12'
@ -186,43 +193,63 @@ jobs:
        # isolationtester is only packaged with pg14+ so we would have to build our own postgres
        # to get it for earlier versions so we skip it for < 14.
        if [[ "${{ matrix.pg }}" == "14" ]]; then
-          make -C build_wsl isolationchecklocal-t
+          make -C build_wsl isolationchecklocal-t | tee -a installcheck.log
        fi

-        make -C build_wsl -k regresschecklocal-t IGNORES="${{ matrix.tsl_ignores }}" SKIPS="${{ matrix.tsl_skips }} ${{ matrix.tsl_skips_version }}"
+        make -C build_wsl -k regresschecklocal-t IGNORES="${{ matrix.tsl_ignores }}" SKIPS="${{ matrix.tsl_skips }} ${{ matrix.tsl_skips_version }}" | tee -a installcheck.log

    - name: Show regression diffs
-      if: always() && matrix.pg != '12'
-      shell: python
      id: collectlogs
+      if: always() && matrix.pg != '12'
+      env:
+        WSLENV: GITHUB_OUTPUT
+      shell: wsl-bash {0}
      run: |
-        import re
-        import os
-        from pathlib import Path
-
-        for path in Path('build_wsl').rglob('regression.out'):
-          for line in path.open():
-            if re.search('failed', line, re.IGNORECASE):
-              print(line, end='')
-
-        for path in Path('build_wsl').rglob('regression.diffs'):
-          for line in path.open():
-            print(line, end='')
-        
-        with open(os.environ['GITHUB_OUTPUT'], 'a') as output:
-          print('regression_diff=true', file=output)
+        find . -name regression.diffs -exec cat {} + > regression.log
+        if [[ -s regression.log ]]; then echo "regression_diff=true" >>$GITHUB_OUTPUT; fi
+        grep -e 'FAILED' -e 'failed (ignored)' installcheck.log || true
+        cat regression.log

    - name: Save regression diffs
      if: always() && matrix.pg != '12' && steps.collectlogs.outputs.regression_diff == 'true'
      uses: actions/upload-artifact@v3
      with:
        name: Regression diff ${{ matrix.os }} ${{ matrix.name }} ${{ matrix.pg }}
-        path: regression.log
+        path: |
+          regression.log
+          installcheck.log

-    - name: Save postgres log
+    - name: Save PostgreSQL log
      if: always()
      uses: actions/upload-artifact@v3
      with:
-        name: Postgres log ${{ matrix.os }} ${{ matrix.name }} ${{ matrix.pg }}
+        name: PostgreSQL log ${{ matrix.os }} ${{ matrix.name }} ${{ matrix.pg }}
        path: postgres.log

+    - name: Upload test results to the database
+      if: always() && matrix.pg != '12'
+      shell: wsl-bash {0}
+      env:
+        # Update when adding new variables.
+        WSLENV: "JOB_NAME:CI_STATS_DB:GITHUB_EVENT_NAME:GITHUB_REF_NAME:GITHUB_REPOSITORY:GITHUB_RUN_ATTEMPT:GITHUB_RUN_ID:GITHUB_RUN_NUMBER:JOB_STATUS"
+        # GitHub Actions allow you neither to use the env context for the job name,
+        # nor to access the job name from the step context, so we have to
+        # duplicate it to work around this nonsense.
+        JOB_NAME: PG${{ matrix.pg }} ${{ matrix.build_type }} ${{ matrix.os }}
+        CI_STATS_DB: ${{ secrets.CI_STATS_DB }}
+        GITHUB_EVENT_NAME: ${{ github.event_name }}
+        GITHUB_REF_NAME: ${{ github.ref_name }}
+        GITHUB_REPOSITORY: ${{ github.repository }}
+        GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
+        GITHUB_RUN_ID: ${{ github.run_id }}
+        GITHUB_RUN_NUMBER: ${{ github.run_number }}
+        JOB_STATUS: ${{ job.status }}
+      run: |
+        if [[ "${{ github.event_name }}" == "pull_request" ]] ;
+        then
+            GITHUB_PR_NUMBER="${{ github.event.number }}"
+        else
+            GITHUB_PR_NUMBER=0
+        fi
+        export GITHUB_PR_NUMBER
+        scripts/upload_ci_stats.sh
--- a/scripts/upload_ci_stats.sh
+++ b/scripts/upload_ci_stats.sh
@ -0,0 +1,192 @@
+#!/usr/bin/env bash
+set -xue
+
+if ! [ -e 'installcheck.log' ]
+then
+    # Probably the previous steps have failed and we have nothing to upload.
+    echo "installcheck.log does not exist"
+    exit 0
+fi
+
+if [ -z "${CI_STATS_DB:-}" ]
+then
+    # The secret with the stats db connection string is not accessible in forks.
+    echo "The statistics database connection string is not specified"
+    exit 0
+fi
+
+PSQL=(psql "${CI_STATS_DB}" -qtAX "--set=ON_ERROR_STOP=1")
+
+# The tables we are going to use.
+DESIRED_SCHEMA="
+create extension if not exists timescaledb;
+
+create table job(
+    job_date timestamptz, -- Serves as a unique id.
+    commit_sha text,
+    job_name text,
+    repository text,
+    ref_name text,
+    event_name text,
+    pr_number int,
+    job_status text,
+    url text,
+    run_attempt int,
+    run_id bigint,
+    run_number int
+);
+
+create unique index on job(job_date);
+
+select create_hypertable('job', 'job_date');
+
+create table test(
+    job_date timestamptz,
+    test_name text,
+    test_status text,
+    test_duration float
+);
+
+create unique index on test(job_date, test_name);
+
+select create_hypertable('test', 'job_date');
+
+create table log(
+    job_date timestamptz,
+    test_name text,
+    log_contents text
+);
+
+create unique index on log(job_date, test_name);
+
+select create_hypertable('log', 'job_date');
+
+-- don't add a trailing newline because bash command substitution removes it"
+
+DROP_QUERY="
+drop table if exists test cascade;
+drop table if exists job cascade;
+drop table if exists log cascade;
+"
+
+# Recreate the tables if the schema changed.
+EXISTING_SCHEMA=$("${PSQL[@]}" -c "
+    create table if not exists _schema(create_query text, drop_query text);
+    select create_query from _schema;
+")
+
+if ! [ "${EXISTING_SCHEMA}" == "${DESIRED_SCHEMA}" ];
+then
+    "${PSQL[@]}" -v new_create="$DESIRED_SCHEMA" -v new_drop="$DROP_QUERY" <<<"
+-- Run both the old and the new drop queries and ignore errors, to try to
+-- bring the database into a predictable state even if it's current state is
+-- incorrect (e.g. _schema doesn't actually match the existing tables).
+\set ON_ERROR_STOP 0
+select drop_query from _schema \gexec
+:new_drop
+\set ON_ERROR_STOP 1
+
+-- Create new tables.
+begin;
+:new_create
+truncate table _schema;
+insert into _schema values (:'new_create', :'new_drop');
+commit;
+"
+fi
+
+# Create the job record.
+COMMIT_SHA=$(git -C "$(dirname "${BASH_SOURCE[0]}")" rev-parse @)
+export COMMIT_SHA
+
+JOB_NAME="${JOB_NAME:-test-job}"
+export JOB_NAME
+
+JOB_DATE=$("${PSQL[@]}" -c "
+insert into job values (
+    now(), '$COMMIT_SHA', '$JOB_NAME',
+    '$GITHUB_REPOSITORY', '$GITHUB_REF_NAME', '$GITHUB_EVENT_NAME',
+    '$GITHUB_PR_NUMBER', '$JOB_STATUS',
+    'https://github.com/timescale/timescaledb/actions/runs/$GITHUB_RUN_ID/attempts/$GITHUB_RUN_ATTEMPT',
+    '$GITHUB_RUN_ATTEMPT', '$GITHUB_RUN_ID', '$GITHUB_RUN_NUMBER')
+returning job_date;
+")
+export JOB_DATE
+
+# Split the regression.diffs into per-test files.
+gawk '
+    match($0, /^(diff|\+\+\+|\-\-\-) .*\/(.*)[.]out/, a) {
+        file = a[2] ".diff";
+        next;
+    }
+
+    { if (file) print $0 > file; }
+' regression.log
+
+# Snip the long sequences of "+" or "-" changes in the diffs.
+for x in *.diff;
+do
+    if ! [ -e "$x" ] ; then continue ; fi
+    gawk -v max_context_lines=10 -v min_context_lines=2 '
+        /^-/ { new_sign = "-" }
+        /^+/ { new_sign = "+" }
+        /^[^+-]/ { new_sign = " " }
+
+        {
+            if (old_sign != new_sign) {
+                to_print = lines_buffered > max_context_lines ? min_context_lines : lines_buffered;
+
+                if (lines_buffered > to_print)
+                    print "<" lines_buffered - to_print " lines skipped>";
+
+                for (i = 0; i < to_print; i++) {
+                    print buf[(NR + i - to_print) % max_context_lines]
+                }
+
+                printf("c %04d: %s\n", NR, $0);
+                old_sign = new_sign;
+                lines_printed = 0;
+                lines_buffered = 0;
+            } else {
+                if (lines_printed >= min_context_lines) {
+                    lines_buffered++;
+                    buf[NR % max_context_lines] = sprintf("b %04d: %s", NR, $0)
+                } else {
+                    lines_printed++;
+                    printf("p %04d: %s\n", NR, $0);
+                }
+            }
+        }
+
+        END {
+            to_print = lines_buffered > max_context_lines ? min_context_lines : lines_buffered;
+
+            if (lines_buffered > to_print)
+                print "<" lines_buffered - to_print " lines skipped>";
+
+            for (i = 0; i < to_print; i++) {
+                print buf[(NR + 1 + i - to_print) % max_context_lines]
+            }
+        }' "$x" > "$x.tmp"
+    mv "$x.tmp" "$x"
+done
+
+# Parse the installcheck.log to find the individual test results.
+gawk -v OFS='\t' '
+match($0, /^(test|    ) ([^ ]+)[ ]+\.\.\.[ ]+([^ ]+) (|\(.*\))[ ]+([0-9]+) ms$/, a) {
+    print ENVIRON["JOB_DATE"], a[2], tolower(a[3] (a[4] ? (" " a[4]) : "")), a[5];
+}
+' installcheck.log > tests.tsv
+
+# Save the test results into the database.
+"${PSQL[@]}" -c "\copy test from tests.tsv"
+
+# Upload the logs.
+for x in {sanitizer,stacktrace,postgres-failure}.log *.diff
+do
+    if ! [ -e "$x" ]; then continue ; fi
+    "${PSQL[@]}" <<<"
+        \set contents \`cat $x\`
+        insert into log values ('$JOB_DATE', '$(basename "$x" .diff)', :'contents');
+    "
+done
--- a/tsl/test/expected/license_tsl.out
+++ b/tsl/test/expected/license_tsl.out
--- a/tsl/test/isolation/expected/cagg_drop_chunks_iso.out
+++ b/tsl/test/isolation/expected/cagg_drop_chunks_iso.out
--- a/tsl/test/isolation/expected/cagg_multi_iso.out
+++ b/tsl/test/isolation/expected/cagg_multi_iso.out
--- a/tsl/test/isolation/expected/compression_ddl_iso.out
+++ b/tsl/test/isolation/expected/compression_ddl_iso.out
--- a/tsl/test/isolation/expected/telemetry_iso.out
+++ b/tsl/test/isolation/expected/telemetry_iso.out
--- a/tsl/test/isolation/specs/CMakeLists.txt
+++ b/tsl/test/isolation/specs/CMakeLists.txt
@ -8,8 +8,8 @@ set(TEST_TEMPLATES_MODULE_DEBUG
    remote_create_chunk.spec.in
    dist_restore_point.spec.in
    dist_cmd_exec.spec.in
-    cagg_drop_chunks.spec.in
-    telemetry.spec.in
+    cagg_drop_chunks_iso.spec.in
+    telemetry_iso.spec.in
    compression_chunk_race.spec.in
    compression_merge_race.spec.in
    decompression_chunk_and_parallel_query.in
@ -31,9 +31,9 @@ endif()
 list(
  APPEND
  TEST_FILES
-  compression_ddl.spec
+  compression_ddl_iso.spec
  cagg_insert.spec
-  cagg_multi.spec
+  cagg_multi_iso.spec
  cagg_concurrent_refresh.spec
  cagg_concurrent_refresh_dist_ht.spec
  deadlock_drop_chunks_compress.spec)
--- a/tsl/test/isolation/specs/cagg_drop_chunks_iso.spec.in
+++ b/tsl/test/isolation/specs/cagg_drop_chunks_iso.spec.in
--- a/tsl/test/isolation/specs/cagg_multi_iso.spec
+++ b/tsl/test/isolation/specs/cagg_multi_iso.spec
--- a/tsl/test/isolation/specs/compression_ddl_iso.spec
+++ b/tsl/test/isolation/specs/compression_ddl_iso.spec
--- a/tsl/test/isolation/specs/telemetry_iso.spec.in
+++ b/tsl/test/isolation/specs/telemetry_iso.spec.in
@ -20,6 +20,9 @@ setup {

 teardown {
  DROP TABLE compress;
+  DROP TABLE compressed_chunks;
+  DROP TABLE decompressed_chunks;
+  DROP TABLE telemetry;
 }

 session "s1"
--- a/tsl/test/sql/CMakeLists.txt
+++ b/tsl/test/sql/CMakeLists.txt
@ -93,7 +93,7 @@ if(CMAKE_BUILD_TYPE MATCHES Debug)
    remote_txn.sql
    transparent_decompression_queries.sql
    tsl_tables.sql
-    license.sql)
+    license_tsl.sql)
  if(USE_TELEMETRY)
    list(APPEND TEST_FILES telemetry_stats.sql)
  endif()
--- a/tsl/test/sql/license_tsl.sql
+++ b/tsl/test/sql/license_tsl.sql