diff --git a/.github/workflows/cron-tests.yaml b/.github/workflows/cron-tests.yaml index 2275d5109..b4ff5b9ad 100644 --- a/.github/workflows/cron-tests.yaml +++ b/.github/workflows/cron-tests.yaml @@ -33,22 +33,6 @@ jobs: run: | ./scripts/docker-run-abi-test.sh - memory_leak: - name: Memory leak on insert - runs-on: ubuntu-latest - strategy: - fail-fast: false - env: - PG_VERSION: 12.8 - - steps: - - name: Checkout TimescaleDB - uses: actions/checkout@v2 - - - name: Memory test - run: | - ./scripts/docker-run-memory-test.sh - backup_and_restore: name: Backup and restore runs-on: ubuntu-latest diff --git a/.github/workflows/memory-tests.yaml b/.github/workflows/memory-tests.yaml new file mode 100644 index 000000000..0a000b344 --- /dev/null +++ b/.github/workflows/memory-tests.yaml @@ -0,0 +1,65 @@ +name: Memory tests +on: + schedule: + # run daily 20:00 on master branch + - cron: '0 20 * * *' + push: + branches: + - prerelease_test + - memory_test +jobs: + memory_leak: + name: Memory leak on insert PG${{ matrix.pg }} + runs-on: ubuntu-20.04 + strategy: + matrix: + pg: [12, 13, 14] + fail-fast: false + + steps: + - name: Install Dependencies + run: | + sudo apt-get update + sudo apt-get install gnupg systemd-coredump gdb postgresql-common libkrb5-dev python3-psutil + yes | sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh + sudo apt-get update + sudo apt-get install postgresql-${{ matrix.pg }} postgresql-server-dev-${{ matrix.pg }} + + - name: Checkout TimescaleDB + uses: actions/checkout@v2 + + - name: Build TimescaleDB + run: | + ./bootstrap -DCMAKE_BUILD_TYPE=Release + make -C build + sudo make -C build install + + - name: Setup database + run: | + sudo tee -a /etc/postgresql/${{ matrix.pg }}/main/postgresql.conf <<-CONF + shared_preload_libraries = 'timescaledb' + max_worker_processes = 0 + log_destination = syslog + max_wal_size = 8GB + max_wal_senders = 0 + wal_level = minimal + checkpoint_timeout = 20min + log_checkpoints = on + bgwriter_lru_maxpages = 0 + track_counts = off + fsync = off + port = 5432 + CONF + sudo grep port /etc/postgresql/${{ matrix.pg }}/main/postgresql.conf + sudo systemctl start postgresql@${{ matrix.pg }}-main.service + sudo -u postgres psql -X -c "CREATE USER runner SUPERUSER LOGIN;" + + - name: Run memory test + run: | + sudo -u postgres python ./scripts/test_memory_spikes.py & sleep 5 && psql -d postgres -v ECHO=all -X -f scripts/out_of_order_random_direct.sql + + - name: Postgres log + if: always() + run: | + sudo journalctl -u postgresql@${{ matrix.pg }}-main.service + diff --git a/scripts/docker-run-memory-test.sh b/scripts/docker-run-memory-test.sh deleted file mode 100755 index 6791c4643..000000000 --- a/scripts/docker-run-memory-test.sh +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env bash - -set -e -set -o pipefail - -SCRIPT_DIR=$(dirname $0) -BASE_DIR=${PWD}/${SCRIPT_DIR}/.. -DO_CLEANUP=true - -while getopts "d" opt; -do - case $opt in - d) - DO_CLEANUP=false - echo "!!Debug mode: Containers and temporary directory will be left on disk" - echo - ;; - esac -done - -shift $((OPTIND-1)) - -if "$DO_CLEANUP" = "true"; then - trap cleanup EXIT -fi - -cleanup() { - # Save status here so that we can return the status of the last - # command in the script and not the last command of the cleanup - # function - status="$?" - set +e # do not exit immediately on failure in cleanup handler - # docker rm -vf timescaledb-valgrind 2>/dev/null - docker rm -vf timescaledb-memory 2>/dev/null - echo "Exit status is $status" - exit $status -} - -docker_exec() { - # Echo to stderr - >&2 echo -e "\033[1m$1\033[0m: $2" - docker exec $1 /bin/bash -c "$2" -} - -wait_for_pg() { - set +e - for i in {1..10}; do - sleep 2 - - docker_exec $1 "pg_isready -U postgres" - - if [[ $? == 0 ]] ; then - # this makes the test less flaky, although not - # ideal. Apperently, pg_isready is not always a good - # indication of whether the DB is actually ready to accept - # queries - sleep 5 - set -e - return 0 - fi - done - exit 1 -} - - -docker rm -f timescaledb-memory 2>/dev/null || true -IMAGE_NAME=memory_test TAG_NAME=latest bash ${SCRIPT_DIR}/docker-build.sh - -# The odd contortion with the BASE_DIR is necessary since SCRIPT_DIR -# is relative and --volume requires an absolute path. -docker run --env TIMESCALEDB_TELEMETRY=off -d \ - --volume ${BASE_DIR}/scripts:/mnt/scripts \ - --name timescaledb-memory memory_test:latest -wait_for_pg timescaledb-memory - -echo "**** Installing python3 and psutil ****" -docker_exec timescaledb-memory "apk add --no-cache python3 && python3 -m ensurepip && pip3 install --upgrade pip && apk add --update build-base python3-dev py-psutil" - -echo "**** Testing ****" -docker_exec timescaledb-memory "python3 /mnt/scripts/test_memory_spikes.py & sleep 5 && psql -U postgres -d postgres -h localhost -v ECHO=all -X -f /mnt/scripts/out_of_order_random_direct.sql" diff --git a/scripts/test_memory_spikes.py b/scripts/test_memory_spikes.py index f4783a9c2..72997fd2d 100644 --- a/scripts/test_memory_spikes.py +++ b/scripts/test_memory_spikes.py @@ -8,9 +8,13 @@ import psutil import time import sys +from datetime import datetime DEFAULT_MEMCAP = 300 # in MB THRESHOLD_RATIO = 1.5 # ratio above which considered memory spike +WAIT_TO_STABILIZE = 30 # wait in seconds before considering memory stable +CHECK_INTERVAL = 15 +DEBUG = False # finds processes with name as argument def find_procs_by_name(name): @@ -39,32 +43,33 @@ def bytes2human(n): return "%sB" % n # prints pid of processes -def print_pid(process): - if not process: - return - for p in process: - print(p.pid, end=" ") - print() - return +def process_details(process): + return "{} {}".format(process.pid, ''.join(process.cmdline()).strip()) + +def process_stats(): + processes = find_procs_by_name('postgres') + for p in processes: + print(p, p.num_ctx_switches(), p.cpu_times(), p.memory_info(), flush=True) # return process id of new postgres process created when running SQL file def find_new_process(): # get postgres processes that are running before insertion starts base_process = find_procs_by_name('postgres') print('Processes running before inserts run: ') - print_pid(base_process) + for p in base_process: + print(process_details(p)) process_count = len(base_process) - print("Waiting 30 seconds for process running inserts to start") - time.sleep(30) # wait 30 seconds to get process that runs the inserts + print("Waiting {} seconds for process running inserts to start".format(WAIT_TO_STABILIZE), flush=True) + time.sleep(WAIT_TO_STABILIZE) # wait 30 seconds to get process that runs the inserts # continuously check for creation of new postgres process timeout = time.time() + 60 while True: # prevent infinite loop if time.time() > timeout: - print('Timed out on finding new process, should force quit SQL inserts') + print('Timed out on finding new process, should force quit SQL inserts', flush=True) sys.exit(4) process = find_procs_by_name('postgres') @@ -77,10 +82,10 @@ def find_new_process(): # We assume that the backend is the first 'new' process to start, so it will have # the lower PID for p in difference_set: - print('found process: {}'.format(p)) + print('found process: {}'.format(process_details(p))) if new_process is None or p.pid < new_process.pid: new_process = p - print('new_process: {}'.format(new_process)) + print('new_process: {}'.format(process_details(new_process))) return new_process.pid time.sleep(1) @@ -91,8 +96,8 @@ def main(): print('*** Check this pid is the same as "pg_backend_pid" from SQL command ***') print('New process running random inserts:', pid) - print('Waiting 1 minute for memory consumption to stabilize') - time.sleep(60) + print('Waiting {} seconds for memory consumption to stabilize'.format(WAIT_TO_STABILIZE), flush=True) + time.sleep(WAIT_TO_STABILIZE) # Calculate average memory consumption from 5 values over 15 seconds sum = 0 @@ -100,7 +105,7 @@ def main(): sum += p.memory_info().rss time.sleep(3) avg = sum / 5 - print('Average memory consumption: ', bytes2human(avg)) + print('Average memory consumption: ', bytes2human(avg), flush=True) cap = int(sys.argv[1] if len(sys.argv) > 1 else DEFAULT_MEMCAP) * 1024 * 1024 upper_threshold = min(cap, avg * THRESHOLD_RATIO) @@ -119,17 +124,20 @@ def main(): sys.exit(4) rss = p.memory_info().rss - print('Memory used by process ' + str(p.pid) + ': ' + bytes2human(rss)) + stamp = datetime.now().strftime("%H:%M:%S") + print('{} Memory used by process {}: {}'.format(stamp, p.pid, bytes2human(rss)), flush=True) + if DEBUG: + process_stats() # exit with error if memory above threshold if rss > upper_threshold: print('Memory consumption exceeded upper threshold') - print('Killing postgres process') + print('Killing postgres process', flush=True) p.kill() sys.exit(4) - time.sleep(30) + time.sleep(CHECK_INTERVAL) - print('No memory errors detected with out of order random inserts') + print('No memory errors detected with out of order random inserts', flush=True) sys.exit(0) # success if __name__ == '__main__':