Improve memory tests

This patch implements various improvements to the memory tests.
Memory tests are split off into separate workflow to allow running
them separately from prerelease tests by pushing to branch
memory_test. The scripts are adjusted to no longer build docker
images to run the tests but instead running the tests directly.
The python script to monitor memory usage was improved and now
flushes after printing to make output appear consistently in CI.
This patch also changes the memory test to run on PG 12,13 and 14
instead of only 12.
This commit is contained in:
Sven Klemm 2021-10-25 12:48:00 +02:00 committed by Sven Klemm
parent b27c9cbd47
commit a9e00497a2
4 changed files with 93 additions and 116 deletions

View File

@ -33,22 +33,6 @@ jobs:
run: | run: |
./scripts/docker-run-abi-test.sh ./scripts/docker-run-abi-test.sh
memory_leak:
name: Memory leak on insert
runs-on: ubuntu-latest
strategy:
fail-fast: false
env:
PG_VERSION: 12.8
steps:
- name: Checkout TimescaleDB
uses: actions/checkout@v2
- name: Memory test
run: |
./scripts/docker-run-memory-test.sh
backup_and_restore: backup_and_restore:
name: Backup and restore name: Backup and restore
runs-on: ubuntu-latest runs-on: ubuntu-latest

65
.github/workflows/memory-tests.yaml vendored Normal file
View File

@ -0,0 +1,65 @@
name: Memory tests
on:
schedule:
# run daily 20:00 on master branch
- cron: '0 20 * * *'
push:
branches:
- prerelease_test
- memory_test
jobs:
memory_leak:
name: Memory leak on insert PG${{ matrix.pg }}
runs-on: ubuntu-20.04
strategy:
matrix:
pg: [12, 13, 14]
fail-fast: false
steps:
- name: Install Dependencies
run: |
sudo apt-get update
sudo apt-get install gnupg systemd-coredump gdb postgresql-common libkrb5-dev python3-psutil
yes | sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh
sudo apt-get update
sudo apt-get install postgresql-${{ matrix.pg }} postgresql-server-dev-${{ matrix.pg }}
- name: Checkout TimescaleDB
uses: actions/checkout@v2
- name: Build TimescaleDB
run: |
./bootstrap -DCMAKE_BUILD_TYPE=Release
make -C build
sudo make -C build install
- name: Setup database
run: |
sudo tee -a /etc/postgresql/${{ matrix.pg }}/main/postgresql.conf <<-CONF
shared_preload_libraries = 'timescaledb'
max_worker_processes = 0
log_destination = syslog
max_wal_size = 8GB
max_wal_senders = 0
wal_level = minimal
checkpoint_timeout = 20min
log_checkpoints = on
bgwriter_lru_maxpages = 0
track_counts = off
fsync = off
port = 5432
CONF
sudo grep port /etc/postgresql/${{ matrix.pg }}/main/postgresql.conf
sudo systemctl start postgresql@${{ matrix.pg }}-main.service
sudo -u postgres psql -X -c "CREATE USER runner SUPERUSER LOGIN;"
- name: Run memory test
run: |
sudo -u postgres python ./scripts/test_memory_spikes.py & sleep 5 && psql -d postgres -v ECHO=all -X -f scripts/out_of_order_random_direct.sql
- name: Postgres log
if: always()
run: |
sudo journalctl -u postgresql@${{ matrix.pg }}-main.service

View File

@ -1,80 +0,0 @@
#!/usr/bin/env bash
set -e
set -o pipefail
SCRIPT_DIR=$(dirname $0)
BASE_DIR=${PWD}/${SCRIPT_DIR}/..
DO_CLEANUP=true
while getopts "d" opt;
do
case $opt in
d)
DO_CLEANUP=false
echo "!!Debug mode: Containers and temporary directory will be left on disk"
echo
;;
esac
done
shift $((OPTIND-1))
if "$DO_CLEANUP" = "true"; then
trap cleanup EXIT
fi
cleanup() {
# Save status here so that we can return the status of the last
# command in the script and not the last command of the cleanup
# function
status="$?"
set +e # do not exit immediately on failure in cleanup handler
# docker rm -vf timescaledb-valgrind 2>/dev/null
docker rm -vf timescaledb-memory 2>/dev/null
echo "Exit status is $status"
exit $status
}
docker_exec() {
# Echo to stderr
>&2 echo -e "\033[1m$1\033[0m: $2"
docker exec $1 /bin/bash -c "$2"
}
wait_for_pg() {
set +e
for i in {1..10}; do
sleep 2
docker_exec $1 "pg_isready -U postgres"
if [[ $? == 0 ]] ; then
# this makes the test less flaky, although not
# ideal. Apperently, pg_isready is not always a good
# indication of whether the DB is actually ready to accept
# queries
sleep 5
set -e
return 0
fi
done
exit 1
}
docker rm -f timescaledb-memory 2>/dev/null || true
IMAGE_NAME=memory_test TAG_NAME=latest bash ${SCRIPT_DIR}/docker-build.sh
# The odd contortion with the BASE_DIR is necessary since SCRIPT_DIR
# is relative and --volume requires an absolute path.
docker run --env TIMESCALEDB_TELEMETRY=off -d \
--volume ${BASE_DIR}/scripts:/mnt/scripts \
--name timescaledb-memory memory_test:latest
wait_for_pg timescaledb-memory
echo "**** Installing python3 and psutil ****"
docker_exec timescaledb-memory "apk add --no-cache python3 && python3 -m ensurepip && pip3 install --upgrade pip && apk add --update build-base python3-dev py-psutil"
echo "**** Testing ****"
docker_exec timescaledb-memory "python3 /mnt/scripts/test_memory_spikes.py & sleep 5 && psql -U postgres -d postgres -h localhost -v ECHO=all -X -f /mnt/scripts/out_of_order_random_direct.sql"

View File

@ -8,9 +8,13 @@
import psutil import psutil
import time import time
import sys import sys
from datetime import datetime
DEFAULT_MEMCAP = 300 # in MB DEFAULT_MEMCAP = 300 # in MB
THRESHOLD_RATIO = 1.5 # ratio above which considered memory spike THRESHOLD_RATIO = 1.5 # ratio above which considered memory spike
WAIT_TO_STABILIZE = 30 # wait in seconds before considering memory stable
CHECK_INTERVAL = 15
DEBUG = False
# finds processes with name as argument # finds processes with name as argument
def find_procs_by_name(name): def find_procs_by_name(name):
@ -39,32 +43,33 @@ def bytes2human(n):
return "%sB" % n return "%sB" % n
# prints pid of processes # prints pid of processes
def print_pid(process): def process_details(process):
if not process: return "{} {}".format(process.pid, ''.join(process.cmdline()).strip())
return
for p in process: def process_stats():
print(p.pid, end=" ") processes = find_procs_by_name('postgres')
print() for p in processes:
return print(p, p.num_ctx_switches(), p.cpu_times(), p.memory_info(), flush=True)
# return process id of new postgres process created when running SQL file # return process id of new postgres process created when running SQL file
def find_new_process(): def find_new_process():
# get postgres processes that are running before insertion starts # get postgres processes that are running before insertion starts
base_process = find_procs_by_name('postgres') base_process = find_procs_by_name('postgres')
print('Processes running before inserts run: ') print('Processes running before inserts run: ')
print_pid(base_process) for p in base_process:
print(process_details(p))
process_count = len(base_process) process_count = len(base_process)
print("Waiting 30 seconds for process running inserts to start") print("Waiting {} seconds for process running inserts to start".format(WAIT_TO_STABILIZE), flush=True)
time.sleep(30) # wait 30 seconds to get process that runs the inserts time.sleep(WAIT_TO_STABILIZE) # wait 30 seconds to get process that runs the inserts
# continuously check for creation of new postgres process # continuously check for creation of new postgres process
timeout = time.time() + 60 timeout = time.time() + 60
while True: while True:
# prevent infinite loop # prevent infinite loop
if time.time() > timeout: if time.time() > timeout:
print('Timed out on finding new process, should force quit SQL inserts') print('Timed out on finding new process, should force quit SQL inserts', flush=True)
sys.exit(4) sys.exit(4)
process = find_procs_by_name('postgres') process = find_procs_by_name('postgres')
@ -77,10 +82,10 @@ def find_new_process():
# We assume that the backend is the first 'new' process to start, so it will have # We assume that the backend is the first 'new' process to start, so it will have
# the lower PID # the lower PID
for p in difference_set: for p in difference_set:
print('found process: {}'.format(p)) print('found process: {}'.format(process_details(p)))
if new_process is None or p.pid < new_process.pid: if new_process is None or p.pid < new_process.pid:
new_process = p new_process = p
print('new_process: {}'.format(new_process)) print('new_process: {}'.format(process_details(new_process)))
return new_process.pid return new_process.pid
time.sleep(1) time.sleep(1)
@ -91,8 +96,8 @@ def main():
print('*** Check this pid is the same as "pg_backend_pid" from SQL command ***') print('*** Check this pid is the same as "pg_backend_pid" from SQL command ***')
print('New process running random inserts:', pid) print('New process running random inserts:', pid)
print('Waiting 1 minute for memory consumption to stabilize') print('Waiting {} seconds for memory consumption to stabilize'.format(WAIT_TO_STABILIZE), flush=True)
time.sleep(60) time.sleep(WAIT_TO_STABILIZE)
# Calculate average memory consumption from 5 values over 15 seconds # Calculate average memory consumption from 5 values over 15 seconds
sum = 0 sum = 0
@ -100,7 +105,7 @@ def main():
sum += p.memory_info().rss sum += p.memory_info().rss
time.sleep(3) time.sleep(3)
avg = sum / 5 avg = sum / 5
print('Average memory consumption: ', bytes2human(avg)) print('Average memory consumption: ', bytes2human(avg), flush=True)
cap = int(sys.argv[1] if len(sys.argv) > 1 else DEFAULT_MEMCAP) * 1024 * 1024 cap = int(sys.argv[1] if len(sys.argv) > 1 else DEFAULT_MEMCAP) * 1024 * 1024
upper_threshold = min(cap, avg * THRESHOLD_RATIO) upper_threshold = min(cap, avg * THRESHOLD_RATIO)
@ -119,17 +124,20 @@ def main():
sys.exit(4) sys.exit(4)
rss = p.memory_info().rss rss = p.memory_info().rss
print('Memory used by process ' + str(p.pid) + ': ' + bytes2human(rss)) stamp = datetime.now().strftime("%H:%M:%S")
print('{} Memory used by process {}: {}'.format(stamp, p.pid, bytes2human(rss)), flush=True)
if DEBUG:
process_stats()
# exit with error if memory above threshold # exit with error if memory above threshold
if rss > upper_threshold: if rss > upper_threshold:
print('Memory consumption exceeded upper threshold') print('Memory consumption exceeded upper threshold')
print('Killing postgres process') print('Killing postgres process', flush=True)
p.kill() p.kill()
sys.exit(4) sys.exit(4)
time.sleep(30) time.sleep(CHECK_INTERVAL)
print('No memory errors detected with out of order random inserts') print('No memory errors detected with out of order random inserts', flush=True)
sys.exit(0) # success sys.exit(0) # success
if __name__ == '__main__': if __name__ == '__main__':