mirror of
https://github.com/timescale/timescaledb.git
synced 2025-05-17 19:13:16 +08:00
Currently TimescaleDB does not close chunks mid-insert, so large batches will overfill a chunk. This commit adds a script to split large CSV files into smaller batches to allow reasonable closing of chunks.
36 lines
933 B
Bash
Executable File
36 lines
933 B
Bash
Executable File
#!/bin/bash
|
|
# This script allows one to insert a large CSV file by splitting it into
|
|
# smaller batches. We do this in order to not bypass TimescaleDB's
|
|
# chunk mechanism which currently does not close a chunk mid-insert even
|
|
# if the insert would over-fill the chunk.
|
|
|
|
if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then
|
|
echo "Usage: $0 csv_file db_name table_name"
|
|
exit 1
|
|
fi
|
|
|
|
# To avoid pw writing, add localhost:5432:*:postgres:test to ~/.pgpass
|
|
set -u
|
|
set -e
|
|
|
|
export PGUSER=${PGUSER:-postgres}
|
|
export PGHOST=${PGHOST:-localhost}
|
|
|
|
# Remove any previous split files
|
|
rm -f .timescaledb_temp_*
|
|
|
|
echo "Splitting CSV into batches of 500,000 rows..."
|
|
split -l 500000 $1 .timescaledb_temp_
|
|
echo "[OK]"
|
|
|
|
echo "Importing data..."
|
|
for f in .timescaledb_temp_*; do
|
|
tempstr="\COPY \"$3\" FROM $f CSV"
|
|
psql -v ON_ERROR_STOP=1 -X -d $2 -c ''"$tempstr"''
|
|
done
|
|
echo "[OK]"
|
|
|
|
echo "Cleaning up..."
|
|
rm -f .timescaledb_temp_*
|
|
echo "[OK]"
|