timescaledb/scripts/migrate_data.sh
Rob Kiefer 55736b9262 Add a migrate data script for large datasets
Currently TimescaleDB does not close chunks mid-insert, so large
batches will overfill a chunk. This commit adds a script to
split large CSV files into smaller batches to allow reasonable
closing of chunks.
2017-03-10 11:41:11 -05:00

36 lines
933 B
Bash
Executable File

#!/bin/bash
# This script allows one to insert a large CSV file by splitting it into
# smaller batches. We do this in order to not bypass TimescaleDB's
# chunk mechanism which currently does not close a chunk mid-insert even
# if the insert would over-fill the chunk.
if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then
echo "Usage: $0 csv_file db_name table_name"
exit 1
fi
# To avoid pw writing, add localhost:5432:*:postgres:test to ~/.pgpass
set -u
set -e
export PGUSER=${PGUSER:-postgres}
export PGHOST=${PGHOST:-localhost}
# Remove any previous split files
rm -f .timescaledb_temp_*
echo "Splitting CSV into batches of 500,000 rows..."
split -l 500000 $1 .timescaledb_temp_
echo "[OK]"
echo "Importing data..."
for f in .timescaledb_temp_*; do
tempstr="\COPY \"$3\" FROM $f CSV"
psql -v ON_ERROR_STOP=1 -X -d $2 -c ''"$tempstr"''
done
echo "[OK]"
echo "Cleaning up..."
rm -f .timescaledb_temp_*
echo "[OK]"