#!/bin/bash # This script allows one to insert a large CSV file by splitting it into # smaller batches. We do this in order to not bypass TimescaleDB's # chunk mechanism which currently does not close a chunk mid-insert even # if the insert would over-fill the chunk. if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then echo "Usage: $0 csv_file db_name table_name" exit 1 fi # To avoid pw writing, add localhost:5432:*:postgres:test to ~/.pgpass set -u set -e export PGUSER=${PGUSER:-postgres} export PGHOST=${PGHOST:-localhost} # Remove any previous split files rm -f .timescaledb_temp_* echo "Splitting CSV into batches of 500,000 rows..." split -l 500000 $1 .timescaledb_temp_ echo "[OK]" echo "Importing data..." for f in .timescaledb_temp_*; do tempstr="\COPY \"$3\" FROM $f CSV" psql -v ON_ERROR_STOP=1 -X -d $2 -c ''"$tempstr"'' done echo "[OK]" echo "Cleaning up..." rm -f .timescaledb_temp_* echo "[OK]"