Upload test results into a database

This will help us find the flaky tests or the rare failures.
This commit is contained in:
Alexander Kuzmenkov 2022-11-14 16:59:59 +04:00 committed by Alexander Kuzmenkov
parent 0360812e3c
commit 0d30155b26
16 changed files with 340 additions and 54 deletions

View File

@ -28,6 +28,7 @@ jobs:
fi
regress:
# Change the JOB_NAME variable below when changing the name.
name: PG${{ matrix.pg }}${{ matrix.snapshot }} ${{ matrix.name }} ${{ matrix.os }}
needs: matrixbuilder
runs-on: ${{ matrix.os }}
@ -59,7 +60,7 @@ jobs:
# This is needed because GitHub image macos-10.15 version
# 20210927.1 did not install OpenSSL so we install openssl
# explicitly.
brew install openssl
brew install openssl gawk
sudo perl -MCPAN -e "CPAN::Shell->notest('install', 'IPC::Run')"
sudo perl -MCPAN -e "CPAN::Shell->notest('install', 'Test::Most')"
@ -178,9 +179,11 @@ jobs:
uses: actions/upload-artifact@v3
with:
name: Regression diff ${{ matrix.os }} ${{ matrix.name }} ${{ matrix.pg }}
path: regression.log
path: |
regression.log
installcheck.log
- name: Save postmaster.log
- name: Save PostgreSQL log
if: always()
uses: actions/upload-artifact@v3
with:
@ -190,14 +193,16 @@ jobs:
- name: Stack trace
if: always() && steps.collectlogs.outputs.coredumps == 'true'
run: |
sudo coredumpctl gdb <<EOT
sudo coredumpctl gdb <<<"
set verbose on
show debug-file-directory
printf "%s\n\n", debug_query_string
printf "query = '%s'\n\n", debug_query_string
frame function ExceptionalCondition
printf "condition = '%s'\n", conditionName
bt full
EOT
" | tee stacktrace.log
./scripts/bundle_coredumps.sh
false
grep -C40 "was terminated by signal" postgres.log > postgres-failure.log ||:
- name: Coredumps
if: always() && steps.collectlogs.outputs.coredumps == 'true'
@ -215,3 +220,29 @@ jobs:
build/test/tmp_check/log
build/tsl/test/tmp_check/log
- name: Upload test results to the database
# Don't upload the results of the flaky check, because the db schema only
# supports running one test once per job.
if: always() && (! contains(matrix.name, 'Flaky'))
env:
# GitHub Actions allow you neither to use the env context for the job name,
# nor to access the job name from the step context, so we have to
# duplicate it to work around this nonsense.
JOB_NAME: PG${{ matrix.pg }}${{ matrix.snapshot }} ${{ matrix.name }} ${{ matrix.os }}
CI_STATS_DB: ${{ secrets.CI_STATS_DB }}
GITHUB_EVENT_NAME: ${{ github.event_name }}
GITHUB_REF_NAME: ${{ github.ref_name }}
GITHUB_REPOSITORY: ${{ github.repository }}
GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
GITHUB_RUN_ID: ${{ github.run_id }}
GITHUB_RUN_NUMBER: ${{ github.run_number }}
JOB_STATUS: ${{ job.status }}
run: |
if [[ "${{ github.event_name }}" == "pull_request" ]] ;
then
GITHUB_PR_NUMBER="${{ github.event.number }}"
else
GITHUB_PR_NUMBER=0
fi
export GITHUB_PR_NUMBER
scripts/upload_ci_stats.sh

View File

@ -48,6 +48,8 @@ jobs:
run: python .github/gh_config_reader.py
sanitizer:
# Change the JOB_NAME variable below when changing the name.
# Don't use the env variable here because the env context is not accessible.
name: PG${{ matrix.pg }} Sanitizer ${{ matrix.os }}
runs-on: ${{ matrix.os }}
needs: config
@ -144,9 +146,11 @@ jobs:
uses: actions/upload-artifact@v3
with:
name: Regression diff ${{ matrix.os }} ${{ env.name }} ${{ matrix.pg }}
path: regression.log
path: |
regression.log
installcheck.log
- name: Save postmaster.log
- name: Save PostgreSQL log
if: always()
uses: actions/upload-artifact@v3
with:
@ -156,12 +160,16 @@ jobs:
- name: Stack trace
if: always() && steps.collectlogs.outputs.coredumps == 'true'
run: |
sudo coredumpctl gdb <<EOT
printf "%s\n\n", debug_query_string
sudo coredumpctl gdb <<<"
set verbose on
show debug-file-directory
printf "query = '%s'\n\n", debug_query_string
frame function ExceptionalCondition
printf "condition = '%s'\n", conditionName
bt full
EOT
" | tee stacktrace.log
./scripts/bundle_coredumps.sh
false
grep -C40 "was terminated by signal" postgres.log > postgres-failure.log ||:
- name: Coredumps
if: always() && steps.collectlogs.outputs.coredumps == 'true'
@ -175,4 +183,29 @@ jobs:
uses: actions/upload-artifact@v3
with:
name: sanitizer logs ${{ matrix.os }} ${{ env.name }} ${{ matrix.pg }}
path: ${{ github.workspace }}/sanitizer.log.*
path: ${{ github.workspace }}/sanitizer.log
- name: Upload test results to the database
if: always()
env:
# GitHub Actions allow you neither to use the env context for the job name,
# nor to access the job name from the step context, so we have to
# duplicate it to work around this nonsense.
JOB_NAME: PG${{ matrix.pg }} ${{ env.name }} ${{ matrix.os }}
CI_STATS_DB: ${{ secrets.CI_STATS_DB }}
GITHUB_EVENT_NAME: ${{ github.event_name }}
GITHUB_REF_NAME: ${{ github.ref_name }}
GITHUB_REPOSITORY: ${{ github.repository }}
GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
GITHUB_RUN_ID: ${{ github.run_id }}
GITHUB_RUN_NUMBER: ${{ github.run_number }}
JOB_STATUS: ${{ job.status }}
run: |
if [[ "${{ github.event_name }}" == "pull_request" ]] ;
then
GITHUB_PR_NUMBER="${{ github.event.number }}"
else
GITHUB_PR_NUMBER=0
fi
export GITHUB_PR_NUMBER
scripts/upload_ci_stats.sh

View File

@ -33,6 +33,7 @@ jobs:
fi
build:
# Change the JOB_NAME variable below when changing the name.
name: PG${{ matrix.pg }} ${{ matrix.build_type }} ${{ matrix.os }}
runs-on: ${{ matrix.os }}
needs: config
@ -61,6 +62,21 @@ jobs:
TABLESPACE1: D:\tablespace1\
TABLESPACE2: D:\tablespace2\
steps:
- name: Setup WSL
if: matrix.pg != '12'
uses: Vampire/setup-wsl@v1
with:
additional-packages:
cmake
gawk
gcc
git
gnupg
make
postgresql-client
postgresql-common
tree
- name: Configure git
# Since we want to reuse the checkout in the WSL environment
# we have to prevent git from changing the line ending in the
@ -68,6 +84,7 @@ jobs:
run: |
git config --global core.autocrlf false
git config --global core.eol lf
- name: Checkout TimescaleDB source
uses: actions/checkout@v3
@ -116,6 +133,7 @@ jobs:
icacls ${{ env.TABLESPACE2 }} /grant runneradmin:F /T
copy build_win/test/postgresql.conf ${{ env.PGDATA }}
copy build_win/test/pg_hba.conf ${{ env.PGDATA }}
icacls . /grant runneradmin:F /T
~/PostgreSQL/${{ matrix.pg }}/bin/pg_ctl start -o "${{ matrix.pg_config }}" --log=postgres.log
~/PostgreSQL/${{ matrix.pg }}/bin/pg_isready -U postgres -d postgres --timeout=30
~/PostgreSQL/${{ matrix.pg }}/bin/psql -U postgres -d postgres -c 'CREATE USER root SUPERUSER LOGIN;'
@ -129,17 +147,6 @@ jobs:
echo "Data directory:"
~/PostgreSQL/${{ matrix.pg }}/bin/psql -U postgres -d postgres -c 'SHOW data_directory;'
- name: Setup WSL
if: matrix.pg != '12'
uses: Vampire/setup-wsl@v1
with:
additional-packages:
cmake
gcc
git
gnupg
make
postgresql-common
- name: Install postgres for test runner
if: matrix.pg != '12'
shell: wsl-bash {0}
@ -158,10 +165,10 @@ jobs:
# isolationtester is only packaged with pg14+ so we would have to build our own postgres
# to get it for earlier versions so we skip it for < 14.
if [[ "${{ matrix.pg }}" == "14" ]]; then
make -C build_wsl isolationchecklocal
make -C build_wsl isolationchecklocal | tee -a installcheck.log
fi
make -C build_wsl regresschecklocal IGNORES="${{ matrix.ignores }}"
make -C build_wsl regresschecklocal IGNORES="${{ matrix.ignores }}" | tee -a installcheck.log
- name: Setup postgres cluster for TSL tests
if: matrix.pg != '12'
@ -186,43 +193,63 @@ jobs:
# isolationtester is only packaged with pg14+ so we would have to build our own postgres
# to get it for earlier versions so we skip it for < 14.
if [[ "${{ matrix.pg }}" == "14" ]]; then
make -C build_wsl isolationchecklocal-t
make -C build_wsl isolationchecklocal-t | tee -a installcheck.log
fi
make -C build_wsl -k regresschecklocal-t IGNORES="${{ matrix.tsl_ignores }}" SKIPS="${{ matrix.tsl_skips }} ${{ matrix.tsl_skips_version }}"
make -C build_wsl -k regresschecklocal-t IGNORES="${{ matrix.tsl_ignores }}" SKIPS="${{ matrix.tsl_skips }} ${{ matrix.tsl_skips_version }}" | tee -a installcheck.log
- name: Show regression diffs
if: always() && matrix.pg != '12'
shell: python
id: collectlogs
if: always() && matrix.pg != '12'
env:
WSLENV: GITHUB_OUTPUT
shell: wsl-bash {0}
run: |
import re
import os
from pathlib import Path
for path in Path('build_wsl').rglob('regression.out'):
for line in path.open():
if re.search('failed', line, re.IGNORECASE):
print(line, end='')
for path in Path('build_wsl').rglob('regression.diffs'):
for line in path.open():
print(line, end='')
with open(os.environ['GITHUB_OUTPUT'], 'a') as output:
print('regression_diff=true', file=output)
find . -name regression.diffs -exec cat {} + > regression.log
if [[ -s regression.log ]]; then echo "regression_diff=true" >>$GITHUB_OUTPUT; fi
grep -e 'FAILED' -e 'failed (ignored)' installcheck.log || true
cat regression.log
- name: Save regression diffs
if: always() && matrix.pg != '12' && steps.collectlogs.outputs.regression_diff == 'true'
uses: actions/upload-artifact@v3
with:
name: Regression diff ${{ matrix.os }} ${{ matrix.name }} ${{ matrix.pg }}
path: regression.log
path: |
regression.log
installcheck.log
- name: Save postgres log
- name: Save PostgreSQL log
if: always()
uses: actions/upload-artifact@v3
with:
name: Postgres log ${{ matrix.os }} ${{ matrix.name }} ${{ matrix.pg }}
name: PostgreSQL log ${{ matrix.os }} ${{ matrix.name }} ${{ matrix.pg }}
path: postgres.log
- name: Upload test results to the database
if: always() && matrix.pg != '12'
shell: wsl-bash {0}
env:
# Update when adding new variables.
WSLENV: "JOB_NAME:CI_STATS_DB:GITHUB_EVENT_NAME:GITHUB_REF_NAME:GITHUB_REPOSITORY:GITHUB_RUN_ATTEMPT:GITHUB_RUN_ID:GITHUB_RUN_NUMBER:JOB_STATUS"
# GitHub Actions allow you neither to use the env context for the job name,
# nor to access the job name from the step context, so we have to
# duplicate it to work around this nonsense.
JOB_NAME: PG${{ matrix.pg }} ${{ matrix.build_type }} ${{ matrix.os }}
CI_STATS_DB: ${{ secrets.CI_STATS_DB }}
GITHUB_EVENT_NAME: ${{ github.event_name }}
GITHUB_REF_NAME: ${{ github.ref_name }}
GITHUB_REPOSITORY: ${{ github.repository }}
GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
GITHUB_RUN_ID: ${{ github.run_id }}
GITHUB_RUN_NUMBER: ${{ github.run_number }}
JOB_STATUS: ${{ job.status }}
run: |
if [[ "${{ github.event_name }}" == "pull_request" ]] ;
then
GITHUB_PR_NUMBER="${{ github.event.number }}"
else
GITHUB_PR_NUMBER=0
fi
export GITHUB_PR_NUMBER
scripts/upload_ci_stats.sh

192
scripts/upload_ci_stats.sh Executable file
View File

@ -0,0 +1,192 @@
#!/usr/bin/env bash
set -xue
if ! [ -e 'installcheck.log' ]
then
# Probably the previous steps have failed and we have nothing to upload.
echo "installcheck.log does not exist"
exit 0
fi
if [ -z "${CI_STATS_DB:-}" ]
then
# The secret with the stats db connection string is not accessible in forks.
echo "The statistics database connection string is not specified"
exit 0
fi
PSQL=(psql "${CI_STATS_DB}" -qtAX "--set=ON_ERROR_STOP=1")
# The tables we are going to use.
DESIRED_SCHEMA="
create extension if not exists timescaledb;
create table job(
job_date timestamptz, -- Serves as a unique id.
commit_sha text,
job_name text,
repository text,
ref_name text,
event_name text,
pr_number int,
job_status text,
url text,
run_attempt int,
run_id bigint,
run_number int
);
create unique index on job(job_date);
select create_hypertable('job', 'job_date');
create table test(
job_date timestamptz,
test_name text,
test_status text,
test_duration float
);
create unique index on test(job_date, test_name);
select create_hypertable('test', 'job_date');
create table log(
job_date timestamptz,
test_name text,
log_contents text
);
create unique index on log(job_date, test_name);
select create_hypertable('log', 'job_date');
-- don't add a trailing newline because bash command substitution removes it"
DROP_QUERY="
drop table if exists test cascade;
drop table if exists job cascade;
drop table if exists log cascade;
"
# Recreate the tables if the schema changed.
EXISTING_SCHEMA=$("${PSQL[@]}" -c "
create table if not exists _schema(create_query text, drop_query text);
select create_query from _schema;
")
if ! [ "${EXISTING_SCHEMA}" == "${DESIRED_SCHEMA}" ];
then
"${PSQL[@]}" -v new_create="$DESIRED_SCHEMA" -v new_drop="$DROP_QUERY" <<<"
-- Run both the old and the new drop queries and ignore errors, to try to
-- bring the database into a predictable state even if it's current state is
-- incorrect (e.g. _schema doesn't actually match the existing tables).
\set ON_ERROR_STOP 0
select drop_query from _schema \gexec
:new_drop
\set ON_ERROR_STOP 1
-- Create new tables.
begin;
:new_create
truncate table _schema;
insert into _schema values (:'new_create', :'new_drop');
commit;
"
fi
# Create the job record.
COMMIT_SHA=$(git -C "$(dirname "${BASH_SOURCE[0]}")" rev-parse @)
export COMMIT_SHA
JOB_NAME="${JOB_NAME:-test-job}"
export JOB_NAME
JOB_DATE=$("${PSQL[@]}" -c "
insert into job values (
now(), '$COMMIT_SHA', '$JOB_NAME',
'$GITHUB_REPOSITORY', '$GITHUB_REF_NAME', '$GITHUB_EVENT_NAME',
'$GITHUB_PR_NUMBER', '$JOB_STATUS',
'https://github.com/timescale/timescaledb/actions/runs/$GITHUB_RUN_ID/attempts/$GITHUB_RUN_ATTEMPT',
'$GITHUB_RUN_ATTEMPT', '$GITHUB_RUN_ID', '$GITHUB_RUN_NUMBER')
returning job_date;
")
export JOB_DATE
# Split the regression.diffs into per-test files.
gawk '
match($0, /^(diff|\+\+\+|\-\-\-) .*\/(.*)[.]out/, a) {
file = a[2] ".diff";
next;
}
{ if (file) print $0 > file; }
' regression.log
# Snip the long sequences of "+" or "-" changes in the diffs.
for x in *.diff;
do
if ! [ -e "$x" ] ; then continue ; fi
gawk -v max_context_lines=10 -v min_context_lines=2 '
/^-/ { new_sign = "-" }
/^+/ { new_sign = "+" }
/^[^+-]/ { new_sign = " " }
{
if (old_sign != new_sign) {
to_print = lines_buffered > max_context_lines ? min_context_lines : lines_buffered;
if (lines_buffered > to_print)
print "<" lines_buffered - to_print " lines skipped>";
for (i = 0; i < to_print; i++) {
print buf[(NR + i - to_print) % max_context_lines]
}
printf("c %04d: %s\n", NR, $0);
old_sign = new_sign;
lines_printed = 0;
lines_buffered = 0;
} else {
if (lines_printed >= min_context_lines) {
lines_buffered++;
buf[NR % max_context_lines] = sprintf("b %04d: %s", NR, $0)
} else {
lines_printed++;
printf("p %04d: %s\n", NR, $0);
}
}
}
END {
to_print = lines_buffered > max_context_lines ? min_context_lines : lines_buffered;
if (lines_buffered > to_print)
print "<" lines_buffered - to_print " lines skipped>";
for (i = 0; i < to_print; i++) {
print buf[(NR + 1 + i - to_print) % max_context_lines]
}
}' "$x" > "$x.tmp"
mv "$x.tmp" "$x"
done
# Parse the installcheck.log to find the individual test results.
gawk -v OFS='\t' '
match($0, /^(test| ) ([^ ]+)[ ]+\.\.\.[ ]+([^ ]+) (|\(.*\))[ ]+([0-9]+) ms$/, a) {
print ENVIRON["JOB_DATE"], a[2], tolower(a[3] (a[4] ? (" " a[4]) : "")), a[5];
}
' installcheck.log > tests.tsv
# Save the test results into the database.
"${PSQL[@]}" -c "\copy test from tests.tsv"
# Upload the logs.
for x in {sanitizer,stacktrace,postgres-failure}.log *.diff
do
if ! [ -e "$x" ]; then continue ; fi
"${PSQL[@]}" <<<"
\set contents \`cat $x\`
insert into log values ('$JOB_DATE', '$(basename "$x" .diff)', :'contents');
"
done

View File

@ -8,8 +8,8 @@ set(TEST_TEMPLATES_MODULE_DEBUG
remote_create_chunk.spec.in
dist_restore_point.spec.in
dist_cmd_exec.spec.in
cagg_drop_chunks.spec.in
telemetry.spec.in
cagg_drop_chunks_iso.spec.in
telemetry_iso.spec.in
compression_chunk_race.spec.in
compression_merge_race.spec.in
decompression_chunk_and_parallel_query.in
@ -31,9 +31,9 @@ endif()
list(
APPEND
TEST_FILES
compression_ddl.spec
compression_ddl_iso.spec
cagg_insert.spec
cagg_multi.spec
cagg_multi_iso.spec
cagg_concurrent_refresh.spec
cagg_concurrent_refresh_dist_ht.spec
deadlock_drop_chunks_compress.spec)

View File

@ -20,6 +20,9 @@ setup {
teardown {
DROP TABLE compress;
DROP TABLE compressed_chunks;
DROP TABLE decompressed_chunks;
DROP TABLE telemetry;
}
session "s1"

View File

@ -93,7 +93,7 @@ if(CMAKE_BUILD_TYPE MATCHES Debug)
remote_txn.sql
transparent_decompression_queries.sql
tsl_tables.sql
license.sql)
license_tsl.sql)
if(USE_TELEMETRY)
list(APPEND TEST_FILES telemetry_stats.sql)
endif()