1
0
mirror of https://github.com/timescale/timescaledb.git synced 2025-05-17 11:03:36 +08:00

Log internal program errors to CI database ()

SQLSmith finds many internal program errors (`elog`, code `XX000`).
Normally these errors shouldn't be triggered by user actions and
indicate a bug in the program (like `variable not found in subplan
targetlist`). We don't have a capacity to fix all of them currently,
especially since some of them seem to be the upstream ones. This commit
adds logging for these errors so that we at least can study the current
situation.
This commit is contained in:
Alexander Kuzmenkov 2024-02-09 13:28:54 +01:00 committed by GitHub
parent 96aa7e0fb9
commit d93aa5c8c1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 114 additions and 46 deletions

@ -134,8 +134,8 @@ jobs:
id: collectlogs
run: |
sudo chmod a+rw .
sudo find . -name regression.diffs -exec cat {} + > regression.log
sudo find . -name postmaster.log -exec cat {} + > postgres.log
sudo find build_abi -name regression.diffs -exec cat {} + > regression.log
sudo find build_abi -name postmaster.log -exec cat {} + > postmaster.log
if [[ -s regression.log ]]; then echo "regression_diff=true" >>$GITHUB_OUTPUT; fi
grep -e 'FAILED' -e 'failed (ignored)' -e 'not ok' installcheck.log || true
cat regression.log
@ -152,5 +152,5 @@ jobs:
uses: actions/upload-artifact@v4
with:
name: PostgreSQL log ABI Breakage ${{ matrix.dir }} PG${{ matrix.pg }}
path: postgres.log
path: postmaster.log

@ -28,7 +28,7 @@ jobs:
# If needed, install them before opening the core dump.
sudo apt-get update
sudo apt-get install 7zip clang lld llvm flex bison libipc-run-perl \
libtest-most-perl tree
libtest-most-perl tree jq
- name: Checkout TimescaleDB
uses: actions/checkout@v4
@ -134,6 +134,7 @@ jobs:
env:
PG_SRC_DIR: pgbuild
PG_INSTALL_DIR: postgresql
JOB_NAME: Fuzz decompression ${{ matrix.case.algo }} ${{ matrix.case.pgtype }} ${{ matrix.case.bulk && 'bulk' || 'rowbyrow' }}
steps:
- name: Install Linux dependencies
@ -204,7 +205,9 @@ jobs:
export PGPORT=5432
export PGDATABASE=postgres
export PATH=$HOME/$PG_INSTALL_DIR/bin:$PATH
pg_ctl -l postgres.log start
pg_ctl -o "-clogging_collector=true" -o "-clog_destination=jsonlog,stderr" \
-o "-clog_directory=$(readlink -f .)" -o "-clog_filename=postmaster.log" \
-o "-clog_error_verbosity=verbose" start
psql -c "create extension timescaledb;"
@ -265,7 +268,7 @@ jobs:
[ $errors -eq 0 ] || exit 1
# Shouldn't have any WARNINGS in the log.
! grep -F "] WARNING: " postgres.log
! grep -F "] WARNING: " postmaster.log
# Check that the server is still alive.
psql -c "select 1"
@ -285,7 +288,7 @@ jobs:
uses: actions/upload-artifact@v4
with:
name: PostgreSQL log for ${{ steps.config.outputs.name }}
path: postgres.log
path: postgres.*
- name: Save fuzzer-generated crash cases
if: always()
@ -350,7 +353,6 @@ jobs:
bt full
" 2>&1 | tee stacktrace.log
./scripts/bundle_coredumps.sh
grep -C40 "was terminated by signal" postgres.log > postgres-failure.log ||:
exit 1 # Fail the job if we have core dumps.
- name: Upload core dumps
@ -359,3 +361,24 @@ jobs:
with:
name: Coredumps for ${{ steps.config.outputs.name }}
path: coredumps
- name: Upload test results to the database
if: always()
env:
CI_STATS_DB: ${{ secrets.CI_STATS_DB }}
GITHUB_EVENT_NAME: ${{ github.event_name }}
GITHUB_REF_NAME: ${{ github.ref_name }}
GITHUB_REPOSITORY: ${{ github.repository }}
GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
GITHUB_RUN_ID: ${{ github.run_id }}
GITHUB_RUN_NUMBER: ${{ github.run_number }}
JOB_STATUS: ${{ job.status }}
run: |
if [[ "${{ github.event_name }}" == "pull_request" ]] ;
then
GITHUB_PR_NUMBER="${{ github.event.number }}"
else
GITHUB_PR_NUMBER=0
fi
export GITHUB_PR_NUMBER
scripts/upload_ci_stats.sh

@ -142,8 +142,8 @@ jobs:
id: collectlogs
shell: bash
run: |
find . -name regression.diffs -exec cat {} + > regression.log
find . -name postmaster.log -exec cat {} + > postgres.log
find build -name regression.diffs -exec cat {} + > regression.log
find build -name postmaster.log -exec cat {} + > postmaster.log
if [[ -s regression.log ]]; then echo "regression_diff=true" >>$GITHUB_OUTPUT; fi
grep -e 'FAILED' -e 'failed (ignored)' -e 'not ok' installcheck.log || true
cat regression.log
@ -205,7 +205,7 @@ jobs:
uses: actions/upload-artifact@v4
with:
name: PostgreSQL log linux-i386 PG${{ matrix.pg }}
path: postgres.log
path: postmaster.log
- name: Save TAP test logs
if: always()

@ -144,7 +144,8 @@ jobs:
run: |
BUILD_DIR=nossl ./bootstrap -DCMAKE_BUILD_TYPE=Debug \
-DPG_SOURCE_DIR=~/$PG_SRC_DIR -DPG_PATH=~/$PG_INSTALL_DIR \
${{ matrix.tsdb_build_args }} -DCODECOVERAGE=${{ matrix.coverage }} -DUSE_OPENSSL=OFF
${{ matrix.tsdb_build_args }} -DCODECOVERAGE=${{ matrix.coverage }} -DUSE_OPENSSL=OFF \
-DTEST_PG_LOG_DIRECTORY="$(readlink -f .)"
make -j $MAKE_JOBS -C nossl
make -C nossl install
make -C nossl regresscheck TESTS=telemetry
@ -153,7 +154,8 @@ jobs:
run: |
./bootstrap -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
-DPG_SOURCE_DIR=~/$PG_SRC_DIR -DPG_PATH=~/$PG_INSTALL_DIR \
${{ matrix.tsdb_build_args }} -DCODECOVERAGE=${{ matrix.coverage }}
${{ matrix.tsdb_build_args }} -DCODECOVERAGE=${{ matrix.coverage }} \
-DTEST_PG_LOG_DIRECTORY="$(readlink -f .)"
make -j $MAKE_JOBS -C build
make -C build install
@ -195,7 +197,7 @@ jobs:
id: collectlogs
run: |
find . -name regression.diffs -exec cat {} + > regression.log
find . -name postmaster.log -exec cat {} + > postgres.log
if [[ "${{ runner.os }}" == "Linux" ]] ; then
# wait in case there are in-progress coredumps
sleep 10
@ -223,7 +225,7 @@ jobs:
uses: actions/upload-artifact@v4
with:
name: PostgreSQL log ${{ matrix.os }} ${{ matrix.name }} ${{ matrix.pg }}
path: postgres.log
path: postmaster.*
- name: Stack trace Linux
if: always() && steps.collectlogs.outputs.coredumps == 'true' && runner.os == 'Linux'
@ -248,7 +250,6 @@ jobs:
info locals
" 2>&1 | tee stacktrace.log
./scripts/bundle_coredumps.sh
grep -C40 "was terminated by signal" postgres.log > postgres-failure.log ||:
- name: Stack trace macOS
if: always() && steps.collectlogs.outputs.coredumps == 'true' && runner.os == 'macOS'
@ -275,7 +276,7 @@ jobs:
# Don't upload the results of the flaky check, because the db schema only
# supports running one test once per job. Also disable uploading of the
# tests on macOS till the next MINOR release of PG
if: always() && (! contains(matrix.name, 'Flaky')) && (! runner.os == 'macOS')
if: always() && (! contains(matrix.name, 'Flaky')) && ( runner.os != 'macOS' )
env:
# GitHub Actions allow you neither to use the env context for the job name,
# nor to access the job name from the step context, so we have to

@ -140,7 +140,8 @@ jobs:
- name: Build TimescaleDB
run: |
./bootstrap -DCMAKE_BUILD_TYPE=Debug -DPG_SOURCE_DIR=~/$PG_SRC_DIR \
-DPG_PATH=~/$PG_INSTALL_DIR -DCODECOVERAGE=OFF -DREQUIRE_ALL_TESTS=ON -DTEST_GROUP_SIZE=5
-DPG_PATH=~/$PG_INSTALL_DIR -DCODECOVERAGE=OFF -DREQUIRE_ALL_TESTS=ON \
-DTEST_GROUP_SIZE=5 -DTEST_PG_LOG_DIRECTORY="$(readlink -f .)"
make -j$(nproc) -C build
make -C build install
@ -157,7 +158,6 @@ jobs:
id: collectlogs
run: |
find . -name regression.diffs -exec cat {} + > regression.log
find . -name postmaster.log -exec cat {} + > postgres.log
if [[ "${{ runner.os }}" == "Linux" ]] ; then
# wait in case there are in-progress coredumps
sleep 10
@ -183,7 +183,7 @@ jobs:
uses: actions/upload-artifact@v4
with:
name: PostgreSQL log ${{ matrix.os }} ${{ env.name }} ${{ matrix.pg }}
path: postgres.log
path: postmaster.*
- name: Stack trace
if: always() && steps.collectlogs.outputs.coredumps == 'true'
@ -202,7 +202,6 @@ jobs:
bt full
" 2>&1 | tee stacktrace.log
./scripts/bundle_coredumps.sh
grep -C40 "was terminated by signal" postgres.log > postgres-failure.log ||:
- name: Show sanitizer logs
if: always()

@ -34,7 +34,7 @@ jobs:
sudo apt-get update
sudo apt-get install gnupg systemd-coredump gdb postgresql-common \
libkrb5-dev build-essential autoconf autoconf-archive \
libboost-regex-dev libsqlite3-dev
libboost-regex-dev libsqlite3-dev jq
yes | sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh
sudo apt-get purge postgresql*
@ -75,7 +75,9 @@ jobs:
/usr/lib/postgresql/${{ matrix.pg }}/bin/pg_ctl -D ~/pgdata start \
-o "-cshared_preload_libraries=timescaledb" -o "-cmax_connections=200" \
-o "-cmax_prepared_transactions=100" -o "-cunix_socket_directories=/tmp" \
-o "-clog_statement=all" -l postgres.log
-o "-clog_statement=all" -o "-clogging_collector=true" \
-o "-clog_destination=jsonlog,stderr" -o "-clog_directory=$(readlink -f .)" \
-o "-clog_error_verbosity=verbose" -o "-clog_filename=postmaster.log"
psql -h /tmp postgres -c 'CREATE DATABASE smith;'
psql -h /tmp smith -c 'CREATE EXTENSION timescaledb;'
psql -h /tmp smith -c '\i ${{ github.workspace }}/tsl/test/shared/sql/include/shared_setup.sql'
@ -100,8 +102,6 @@ jobs:
2>&1 | tee -a sqlsmith.log
psql "host=/tmp dbname=smith" -c "select 1"
truncate --size=0 postgres.log
done
- name: Check for coredumps
@ -146,7 +146,7 @@ jobs:
uses: actions/upload-artifact@v4
with:
name: PostgreSQL log for PG${{ matrix.pg }}
path: postgres.log
path: postgres.*
- name: Upload test results to the database
if: always()

@ -149,7 +149,7 @@ jobs:
copy build_win/test/postgresql.conf ${{ env.PGDATA }}
copy build_win/test/pg_hba.conf ${{ env.PGDATA }}
icacls . /grant runneradmin:F /T
~/PostgreSQL/${{ matrix.pg }}/bin/pg_ctl start -o "${{ matrix.pg_config }}" --log=postgres.log
~/PostgreSQL/${{ matrix.pg }}/bin/pg_ctl start -o "${{ matrix.pg_config }}" --log=postmaster.log
~/PostgreSQL/${{ matrix.pg }}/bin/pg_isready -U postgres -d postgres --timeout=30
~/PostgreSQL/${{ matrix.pg }}/bin/psql -U postgres -d postgres -c 'CREATE USER root SUPERUSER LOGIN;'
echo "PG version:"
@ -191,7 +191,7 @@ jobs:
~/PostgreSQL/${{ matrix.pg }}/bin/initdb -U postgres -A trust --locale=en_US --encoding=UTF8
copy build_win/tsl/test/postgresql.conf ${{ env.PGDATA }}
copy build_win/tsl/test/pg_hba.conf ${{ env.PGDATA }}
~/PostgreSQL/${{ matrix.pg }}/bin/pg_ctl start -o "${{ matrix.pg_config }}" --log="postgres.log"
~/PostgreSQL/${{ matrix.pg }}/bin/pg_ctl start -o "${{ matrix.pg_config }}" --log="postmaster.log"
~/PostgreSQL/${{ matrix.pg }}/bin/pg_isready -U postgres -d postgres --timeout=30
~/PostgreSQL/${{ matrix.pg }}/bin/psql -U postgres -d postgres -c 'CREATE USER root SUPERUSER LOGIN;'
@ -237,7 +237,7 @@ jobs:
uses: actions/upload-artifact@v4
with:
name: PostgreSQL ${{ matrix.pg }} log ${{ matrix.os }} ${{ matrix.build_type }} Build
path: postgres.log
path: postmaster.log
- name: Upload test results to the database
if: always()

@ -56,6 +56,15 @@ create table log(
create unique index on log(job_date, test_name);
select create_hypertable('log', 'job_date');
create table ipe(
job_date timestamptz,
error text,
location text,
statement text
);
select create_hypertable('ipe', 'job_date');
"
# Create the job record.
@ -149,6 +158,15 @@ do
mv "$x.tmp" "$x"
done
# Save a snippet of logs where a backend was terminated by signal.
grep -C40 "was terminated by signal" postmaster.log > postgres-failure.log ||:
# Find internal program errors in Postgres logs.
jq 'select(.state_code == "XX000" and .error_severity != "LOG")
| [env.JOB_DATE, .message, .func_name, .statement] | @tsv
' -r postmaster.json > ipe.tsv ||:
"${PSQL[@]}" -c "\copy ipe from ipe.tsv"
# Upload the logs.
# Note that the sanitizer setting log_path means "write logs to 'log_path.pid'".
for x in sanitizer* sanitizer/* {sqlsmith/sqlsmith,sanitizer,stacktrace,postgres-failure}.log *.diff

@ -1,20 +1,27 @@
# NOTE: any changes here require changes to tsl/test/postgresql.conf. Its prefix
# must be the same as this file.
shared_preload_libraries=timescaledb
max_worker_processes=24
autovacuum=false
random_page_cost=1.0
timezone='US/Pacific'
datestyle='Postgres, MDY'
timescaledb.license='apache'
@TELEMETRY_DEFAULT_SETTING@
hba_file='@TEST_PG_HBA_FILE@'
log_destination='@TEST_PG_LOG_DESTINATION@'
log_directory='@TEST_PG_LOG_DIRECTORY@'
log_filename='postmaster.log'
log_line_prefix='%m: %u [%p] %d '
logging_collector=true
max_worker_processes=24
random_page_cost=1.0
shared_preload_libraries=timescaledb
timescaledb.last_tuned='1971-02-03 04:05:06.789012 -0300'
timescaledb.last_tuned_version='0.0.1'
timescaledb.passfile='@TEST_PASSFILE@'
timescaledb_telemetry.cloud='ci'
log_line_prefix='%m: %u [%p] %d '
timezone='US/Pacific'
# Set extra_float_digits=0 to retain the pre PG12 rounding behaviour
# of floating point numbers, which are needed to make our tests work.
extra_float_digits=0
timescaledb.passfile='@TEST_PASSFILE@'
hba_file='@TEST_PG_HBA_FILE@'
@TELEMETRY_DEFAULT_SETTING@
timescaledb.license='apache'

@ -52,6 +52,19 @@ endif()
configure_file(${PRIMARY_TEST_DIR}/pg_hba.conf.in pg_hba.conf)
set(TEST_PG_HBA_FILE ${TEST_OUTPUT_DIR}/pg_hba.conf)
# Enable json logs that are supported since PG15, to get additional information
# about errors from them for CI database.
if(PG_VERSION_MAJOR LESS 15)
set(TEST_PG_LOG_DESTINATION stderr)
else()
set(TEST_PG_LOG_DESTINATION jsonlog,stderr)
endif()
# This variable is set differently in CI. We use it to save the logs outside the
# tmp instance, because it is deleted by pg_regress on successful test
# completion, and we want to run some additional checks on the logs in any case.
option(TEST_PG_LOG_DIRECTORY "Log directory for regression tests" "log")
if(USE_TELEMETRY)
set(TELEMETRY_DEFAULT_SETTING "timescaledb.telemetry_level=off")
else()

@ -1,20 +1,27 @@
# This section has to be equivalent to test/postgresql.conf
shared_preload_libraries=timescaledb
max_worker_processes=24
autovacuum=false
random_page_cost=1.0
timezone='US/Pacific'
datestyle='Postgres, MDY'
@TELEMETRY_DEFAULT_SETTING@
hba_file='@TEST_PG_HBA_FILE@'
log_destination='@TEST_PG_LOG_DESTINATION@'
log_directory='@TEST_PG_LOG_DIRECTORY@'
log_filename='postmaster.log'
log_line_prefix='%m: %u [%p] %d '
logging_collector=true
max_worker_processes=24
random_page_cost=1.0
shared_preload_libraries=timescaledb
timescaledb.last_tuned='1971-02-03 04:05:06.789012 -0300'
timescaledb.last_tuned_version='0.0.1'
timescaledb.passfile='@TEST_PASSFILE@'
timescaledb_telemetry.cloud='ci'
log_line_prefix='%m %u [%p] %d '
timezone='US/Pacific'
# Set extra_float_digits=0 to retain the pre PG12 rounding behaviour
# of floating point numbers, which are needed to make our tests work.
extra_float_digits=0
timescaledb.passfile='@TEST_PASSFILE@'
hba_file='@TEST_PG_HBA_FILE@'
@TELEMETRY_DEFAULT_SETTING@
# This section adds additional options required by TSL.
timescaledb.license='timescale'