mirror of
https://github.com/timescale/timescaledb.git
synced 2025-05-16 18:43:18 +08:00
Improve memory tests
This patch implements various improvements to the memory tests. Memory tests are split off into separate workflow to allow running them separately from prerelease tests by pushing to branch memory_test. The scripts are adjusted to no longer build docker images to run the tests but instead running the tests directly. The python script to monitor memory usage was improved and now flushes after printing to make output appear consistently in CI. This patch also changes the memory test to run on PG 12,13 and 14 instead of only 12.
This commit is contained in:
parent
b27c9cbd47
commit
a9e00497a2
16
.github/workflows/cron-tests.yaml
vendored
16
.github/workflows/cron-tests.yaml
vendored
@ -33,22 +33,6 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
./scripts/docker-run-abi-test.sh
|
./scripts/docker-run-abi-test.sh
|
||||||
|
|
||||||
memory_leak:
|
|
||||||
name: Memory leak on insert
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
env:
|
|
||||||
PG_VERSION: 12.8
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout TimescaleDB
|
|
||||||
uses: actions/checkout@v2
|
|
||||||
|
|
||||||
- name: Memory test
|
|
||||||
run: |
|
|
||||||
./scripts/docker-run-memory-test.sh
|
|
||||||
|
|
||||||
backup_and_restore:
|
backup_and_restore:
|
||||||
name: Backup and restore
|
name: Backup and restore
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
65
.github/workflows/memory-tests.yaml
vendored
Normal file
65
.github/workflows/memory-tests.yaml
vendored
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
name: Memory tests
|
||||||
|
on:
|
||||||
|
schedule:
|
||||||
|
# run daily 20:00 on master branch
|
||||||
|
- cron: '0 20 * * *'
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- prerelease_test
|
||||||
|
- memory_test
|
||||||
|
jobs:
|
||||||
|
memory_leak:
|
||||||
|
name: Memory leak on insert PG${{ matrix.pg }}
|
||||||
|
runs-on: ubuntu-20.04
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
pg: [12, 13, 14]
|
||||||
|
fail-fast: false
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Install Dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install gnupg systemd-coredump gdb postgresql-common libkrb5-dev python3-psutil
|
||||||
|
yes | sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install postgresql-${{ matrix.pg }} postgresql-server-dev-${{ matrix.pg }}
|
||||||
|
|
||||||
|
- name: Checkout TimescaleDB
|
||||||
|
uses: actions/checkout@v2
|
||||||
|
|
||||||
|
- name: Build TimescaleDB
|
||||||
|
run: |
|
||||||
|
./bootstrap -DCMAKE_BUILD_TYPE=Release
|
||||||
|
make -C build
|
||||||
|
sudo make -C build install
|
||||||
|
|
||||||
|
- name: Setup database
|
||||||
|
run: |
|
||||||
|
sudo tee -a /etc/postgresql/${{ matrix.pg }}/main/postgresql.conf <<-CONF
|
||||||
|
shared_preload_libraries = 'timescaledb'
|
||||||
|
max_worker_processes = 0
|
||||||
|
log_destination = syslog
|
||||||
|
max_wal_size = 8GB
|
||||||
|
max_wal_senders = 0
|
||||||
|
wal_level = minimal
|
||||||
|
checkpoint_timeout = 20min
|
||||||
|
log_checkpoints = on
|
||||||
|
bgwriter_lru_maxpages = 0
|
||||||
|
track_counts = off
|
||||||
|
fsync = off
|
||||||
|
port = 5432
|
||||||
|
CONF
|
||||||
|
sudo grep port /etc/postgresql/${{ matrix.pg }}/main/postgresql.conf
|
||||||
|
sudo systemctl start postgresql@${{ matrix.pg }}-main.service
|
||||||
|
sudo -u postgres psql -X -c "CREATE USER runner SUPERUSER LOGIN;"
|
||||||
|
|
||||||
|
- name: Run memory test
|
||||||
|
run: |
|
||||||
|
sudo -u postgres python ./scripts/test_memory_spikes.py & sleep 5 && psql -d postgres -v ECHO=all -X -f scripts/out_of_order_random_direct.sql
|
||||||
|
|
||||||
|
- name: Postgres log
|
||||||
|
if: always()
|
||||||
|
run: |
|
||||||
|
sudo journalctl -u postgresql@${{ matrix.pg }}-main.service
|
||||||
|
|
@ -1,80 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
|
|
||||||
set -e
|
|
||||||
set -o pipefail
|
|
||||||
|
|
||||||
SCRIPT_DIR=$(dirname $0)
|
|
||||||
BASE_DIR=${PWD}/${SCRIPT_DIR}/..
|
|
||||||
DO_CLEANUP=true
|
|
||||||
|
|
||||||
while getopts "d" opt;
|
|
||||||
do
|
|
||||||
case $opt in
|
|
||||||
d)
|
|
||||||
DO_CLEANUP=false
|
|
||||||
echo "!!Debug mode: Containers and temporary directory will be left on disk"
|
|
||||||
echo
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
done
|
|
||||||
|
|
||||||
shift $((OPTIND-1))
|
|
||||||
|
|
||||||
if "$DO_CLEANUP" = "true"; then
|
|
||||||
trap cleanup EXIT
|
|
||||||
fi
|
|
||||||
|
|
||||||
cleanup() {
|
|
||||||
# Save status here so that we can return the status of the last
|
|
||||||
# command in the script and not the last command of the cleanup
|
|
||||||
# function
|
|
||||||
status="$?"
|
|
||||||
set +e # do not exit immediately on failure in cleanup handler
|
|
||||||
# docker rm -vf timescaledb-valgrind 2>/dev/null
|
|
||||||
docker rm -vf timescaledb-memory 2>/dev/null
|
|
||||||
echo "Exit status is $status"
|
|
||||||
exit $status
|
|
||||||
}
|
|
||||||
|
|
||||||
docker_exec() {
|
|
||||||
# Echo to stderr
|
|
||||||
>&2 echo -e "\033[1m$1\033[0m: $2"
|
|
||||||
docker exec $1 /bin/bash -c "$2"
|
|
||||||
}
|
|
||||||
|
|
||||||
wait_for_pg() {
|
|
||||||
set +e
|
|
||||||
for i in {1..10}; do
|
|
||||||
sleep 2
|
|
||||||
|
|
||||||
docker_exec $1 "pg_isready -U postgres"
|
|
||||||
|
|
||||||
if [[ $? == 0 ]] ; then
|
|
||||||
# this makes the test less flaky, although not
|
|
||||||
# ideal. Apperently, pg_isready is not always a good
|
|
||||||
# indication of whether the DB is actually ready to accept
|
|
||||||
# queries
|
|
||||||
sleep 5
|
|
||||||
set -e
|
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
exit 1
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
docker rm -f timescaledb-memory 2>/dev/null || true
|
|
||||||
IMAGE_NAME=memory_test TAG_NAME=latest bash ${SCRIPT_DIR}/docker-build.sh
|
|
||||||
|
|
||||||
# The odd contortion with the BASE_DIR is necessary since SCRIPT_DIR
|
|
||||||
# is relative and --volume requires an absolute path.
|
|
||||||
docker run --env TIMESCALEDB_TELEMETRY=off -d \
|
|
||||||
--volume ${BASE_DIR}/scripts:/mnt/scripts \
|
|
||||||
--name timescaledb-memory memory_test:latest
|
|
||||||
wait_for_pg timescaledb-memory
|
|
||||||
|
|
||||||
echo "**** Installing python3 and psutil ****"
|
|
||||||
docker_exec timescaledb-memory "apk add --no-cache python3 && python3 -m ensurepip && pip3 install --upgrade pip && apk add --update build-base python3-dev py-psutil"
|
|
||||||
|
|
||||||
echo "**** Testing ****"
|
|
||||||
docker_exec timescaledb-memory "python3 /mnt/scripts/test_memory_spikes.py & sleep 5 && psql -U postgres -d postgres -h localhost -v ECHO=all -X -f /mnt/scripts/out_of_order_random_direct.sql"
|
|
@ -8,9 +8,13 @@
|
|||||||
import psutil
|
import psutil
|
||||||
import time
|
import time
|
||||||
import sys
|
import sys
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
DEFAULT_MEMCAP = 300 # in MB
|
DEFAULT_MEMCAP = 300 # in MB
|
||||||
THRESHOLD_RATIO = 1.5 # ratio above which considered memory spike
|
THRESHOLD_RATIO = 1.5 # ratio above which considered memory spike
|
||||||
|
WAIT_TO_STABILIZE = 30 # wait in seconds before considering memory stable
|
||||||
|
CHECK_INTERVAL = 15
|
||||||
|
DEBUG = False
|
||||||
|
|
||||||
# finds processes with name as argument
|
# finds processes with name as argument
|
||||||
def find_procs_by_name(name):
|
def find_procs_by_name(name):
|
||||||
@ -39,32 +43,33 @@ def bytes2human(n):
|
|||||||
return "%sB" % n
|
return "%sB" % n
|
||||||
|
|
||||||
# prints pid of processes
|
# prints pid of processes
|
||||||
def print_pid(process):
|
def process_details(process):
|
||||||
if not process:
|
return "{} {}".format(process.pid, ''.join(process.cmdline()).strip())
|
||||||
return
|
|
||||||
for p in process:
|
def process_stats():
|
||||||
print(p.pid, end=" ")
|
processes = find_procs_by_name('postgres')
|
||||||
print()
|
for p in processes:
|
||||||
return
|
print(p, p.num_ctx_switches(), p.cpu_times(), p.memory_info(), flush=True)
|
||||||
|
|
||||||
# return process id of new postgres process created when running SQL file
|
# return process id of new postgres process created when running SQL file
|
||||||
def find_new_process():
|
def find_new_process():
|
||||||
# get postgres processes that are running before insertion starts
|
# get postgres processes that are running before insertion starts
|
||||||
base_process = find_procs_by_name('postgres')
|
base_process = find_procs_by_name('postgres')
|
||||||
print('Processes running before inserts run: ')
|
print('Processes running before inserts run: ')
|
||||||
print_pid(base_process)
|
for p in base_process:
|
||||||
|
print(process_details(p))
|
||||||
|
|
||||||
process_count = len(base_process)
|
process_count = len(base_process)
|
||||||
|
|
||||||
print("Waiting 30 seconds for process running inserts to start")
|
print("Waiting {} seconds for process running inserts to start".format(WAIT_TO_STABILIZE), flush=True)
|
||||||
time.sleep(30) # wait 30 seconds to get process that runs the inserts
|
time.sleep(WAIT_TO_STABILIZE) # wait 30 seconds to get process that runs the inserts
|
||||||
|
|
||||||
# continuously check for creation of new postgres process
|
# continuously check for creation of new postgres process
|
||||||
timeout = time.time() + 60
|
timeout = time.time() + 60
|
||||||
while True:
|
while True:
|
||||||
# prevent infinite loop
|
# prevent infinite loop
|
||||||
if time.time() > timeout:
|
if time.time() > timeout:
|
||||||
print('Timed out on finding new process, should force quit SQL inserts')
|
print('Timed out on finding new process, should force quit SQL inserts', flush=True)
|
||||||
sys.exit(4)
|
sys.exit(4)
|
||||||
|
|
||||||
process = find_procs_by_name('postgres')
|
process = find_procs_by_name('postgres')
|
||||||
@ -77,10 +82,10 @@ def find_new_process():
|
|||||||
# We assume that the backend is the first 'new' process to start, so it will have
|
# We assume that the backend is the first 'new' process to start, so it will have
|
||||||
# the lower PID
|
# the lower PID
|
||||||
for p in difference_set:
|
for p in difference_set:
|
||||||
print('found process: {}'.format(p))
|
print('found process: {}'.format(process_details(p)))
|
||||||
if new_process is None or p.pid < new_process.pid:
|
if new_process is None or p.pid < new_process.pid:
|
||||||
new_process = p
|
new_process = p
|
||||||
print('new_process: {}'.format(new_process))
|
print('new_process: {}'.format(process_details(new_process)))
|
||||||
return new_process.pid
|
return new_process.pid
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
@ -91,8 +96,8 @@ def main():
|
|||||||
print('*** Check this pid is the same as "pg_backend_pid" from SQL command ***')
|
print('*** Check this pid is the same as "pg_backend_pid" from SQL command ***')
|
||||||
print('New process running random inserts:', pid)
|
print('New process running random inserts:', pid)
|
||||||
|
|
||||||
print('Waiting 1 minute for memory consumption to stabilize')
|
print('Waiting {} seconds for memory consumption to stabilize'.format(WAIT_TO_STABILIZE), flush=True)
|
||||||
time.sleep(60)
|
time.sleep(WAIT_TO_STABILIZE)
|
||||||
|
|
||||||
# Calculate average memory consumption from 5 values over 15 seconds
|
# Calculate average memory consumption from 5 values over 15 seconds
|
||||||
sum = 0
|
sum = 0
|
||||||
@ -100,7 +105,7 @@ def main():
|
|||||||
sum += p.memory_info().rss
|
sum += p.memory_info().rss
|
||||||
time.sleep(3)
|
time.sleep(3)
|
||||||
avg = sum / 5
|
avg = sum / 5
|
||||||
print('Average memory consumption: ', bytes2human(avg))
|
print('Average memory consumption: ', bytes2human(avg), flush=True)
|
||||||
|
|
||||||
cap = int(sys.argv[1] if len(sys.argv) > 1 else DEFAULT_MEMCAP) * 1024 * 1024
|
cap = int(sys.argv[1] if len(sys.argv) > 1 else DEFAULT_MEMCAP) * 1024 * 1024
|
||||||
upper_threshold = min(cap, avg * THRESHOLD_RATIO)
|
upper_threshold = min(cap, avg * THRESHOLD_RATIO)
|
||||||
@ -119,17 +124,20 @@ def main():
|
|||||||
sys.exit(4)
|
sys.exit(4)
|
||||||
|
|
||||||
rss = p.memory_info().rss
|
rss = p.memory_info().rss
|
||||||
print('Memory used by process ' + str(p.pid) + ': ' + bytes2human(rss))
|
stamp = datetime.now().strftime("%H:%M:%S")
|
||||||
|
print('{} Memory used by process {}: {}'.format(stamp, p.pid, bytes2human(rss)), flush=True)
|
||||||
|
if DEBUG:
|
||||||
|
process_stats()
|
||||||
|
|
||||||
# exit with error if memory above threshold
|
# exit with error if memory above threshold
|
||||||
if rss > upper_threshold:
|
if rss > upper_threshold:
|
||||||
print('Memory consumption exceeded upper threshold')
|
print('Memory consumption exceeded upper threshold')
|
||||||
print('Killing postgres process')
|
print('Killing postgres process', flush=True)
|
||||||
p.kill()
|
p.kill()
|
||||||
sys.exit(4)
|
sys.exit(4)
|
||||||
time.sleep(30)
|
time.sleep(CHECK_INTERVAL)
|
||||||
|
|
||||||
print('No memory errors detected with out of order random inserts')
|
print('No memory errors detected with out of order random inserts', flush=True)
|
||||||
sys.exit(0) # success
|
sys.exit(0) # success
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
Loading…
x
Reference in New Issue
Block a user