From 080011d767a6e61d09dc83b4d58f8be7dc9ecb31 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 18 Oct 2022 20:26:45 +0400 Subject: [PATCH] Speed up the dist_copy tests In some cases we can use less chunks, less data, and not truncate tables. --- tsl/test/expected/dist_copy_format_long.out | 56 +++++++++++---------- tsl/test/expected/dist_copy_long.out | 6 +-- tsl/test/sql/dist_copy_format_long.sql | 27 +++++----- tsl/test/sql/dist_copy_long.sql | 6 +-- 4 files changed, 49 insertions(+), 46 deletions(-) diff --git a/tsl/test/expected/dist_copy_format_long.out b/tsl/test/expected/dist_copy_format_long.out index 531a0a161..12be237ff 100644 --- a/tsl/test/expected/dist_copy_format_long.out +++ b/tsl/test/expected/dist_copy_format_long.out @@ -33,7 +33,7 @@ SET ROLE :ROLE_1; -- Aim to about 100 partitions, the data is from 1995 to 2022. create table uk_price_paid(price integer, "date" date, postcode1 text, postcode2 text, type smallint, is_new bool, duration smallint, addr1 text, addr2 text, street text, locality text, town text, district text, country text, category smallint); select create_distributed_hypertable('uk_price_paid', 'date', 'postcode2', - chunk_time_interval => interval '90 day'); + chunk_time_interval => interval '270 day'); NOTICE: adding not-null constraint to column "date" create_distributed_hypertable ------------------------------- @@ -41,15 +41,21 @@ NOTICE: adding not-null constraint to column "date" (1 row) -- Populate. -\copy uk_price_paid from program 'zcat < data/prices-100k-random-1.tsv.gz'; -select count(*), sum(price) from uk_price_paid; - count | sum ---------+------------- - 100000 | 20759547354 +\copy uk_price_paid from program 'zcat < data/prices-10k-random-1.tsv.gz'; +select count(*), sum(price), sum(price) / count(*) from uk_price_paid; + count | sum | ?column? +-------+------------+---------- + 10000 | 2055688013 | 205568 +(1 row) + +select count(*) from show_chunks('uk_price_paid'); + count +------- + 114 (1 row) -- Make binary file. -\copy (select * from uk_price_paid) to 'prices-100k.pgbinary' with (format binary); +\copy (select * from uk_price_paid) to 'prices-10k.pgbinary' with (format binary); -- Binary input with binary data transfer. set timescaledb.enable_connection_binary_data = true; set timescaledb.dist_copy_transfer_format = 'binary'; @@ -61,42 +67,40 @@ select create_distributed_hypertable('uk_price_paid_bin', 'date', 'postcode2', (2,public,uk_price_paid_bin,t) (1 row) -\copy uk_price_paid_bin from 'prices-100k.pgbinary' with (format binary); -select count(*), sum(price) from uk_price_paid_bin; - count | sum ---------+------------- - 100000 | 20759547354 +\copy uk_price_paid_bin from 'prices-10k.pgbinary' with (format binary); +select count(*), sum(price), sum(price) / count(*) from uk_price_paid_bin; + count | sum | ?column? +-------+------------+---------- + 10000 | 2055688013 | 205568 (1 row) -- Text input with explicit format option and binary data transfer. -truncate uk_price_paid_bin; -\copy uk_price_paid_bin from program 'zcat < data/prices-100k-random-1.tsv.gz' with (format text); -select count(*), sum(price) from uk_price_paid_bin; - count | sum ---------+------------- - 100000 | 20759547354 +\copy uk_price_paid_bin from program 'zcat < data/prices-10k-random-1.tsv.gz' with (format text); +select count(*), sum(price), sum(price) / count(*) from uk_price_paid_bin; + count | sum | ?column? +-------+------------+---------- + 20000 | 4111376026 | 205568 (1 row) -- Binary input with text data transfer. Doesn't work. set timescaledb.dist_copy_transfer_format = 'text'; \set ON_ERROR_STOP off -\copy uk_price_paid_bin from 'prices-100k.pgbinary' with (format binary); +\copy uk_price_paid_bin from 'prices-10k.pgbinary' with (format binary); ERROR: remote copy does not support binary input in combination with text transfer to data nodes \set ON_ERROR_STOP on -- Text input with text data transfer. -truncate uk_price_paid_bin; -\copy uk_price_paid_bin from program 'zcat < data/prices-100k-random-1.tsv.gz'; -select count(*), sum(price) from uk_price_paid_bin; - count | sum ---------+------------- - 100000 | 20759547354 +\copy uk_price_paid_bin from program 'zcat < data/prices-10k-random-1.tsv.gz'; +select count(*), sum(price), sum(price) / count(*) from uk_price_paid_bin; + count | sum | ?column? +-------+------------+---------- + 30000 | 6167064039 | 205568 (1 row) -- Nonsensical settings set timescaledb.dist_copy_transfer_format = 'binary'; set timescaledb.enable_connection_binary_data = false; \set ON_ERROR_STOP off -\copy uk_price_paid_bin from 'prices-100k.pgbinary' with (format binary); +\copy uk_price_paid_bin from 'prices-10k.pgbinary' with (format binary); ERROR: the requested binary format for COPY data transfer is disabled by the settings \set ON_ERROR_STOP on -- Teardown diff --git a/tsl/test/expected/dist_copy_long.out b/tsl/test/expected/dist_copy_long.out index 3c442604e..7df654776 100644 --- a/tsl/test/expected/dist_copy_long.out +++ b/tsl/test/expected/dist_copy_long.out @@ -31,7 +31,7 @@ GRANT USAGE ON FOREIGN SERVER data_node_1, data_node_2, data_node_3 TO PUBLIC; SET ROLE :ROLE_1; create table uk_price_paid(price integer, "date" date, postcode1 text, postcode2 text, type smallint, is_new bool, duration smallint, addr1 text, addr2 text, street text, locality text, town text, district text, country text, category smallint); -- Aim to about 100 partitions, the data is from 1995 to 2022. -select create_distributed_hypertable('uk_price_paid', 'date', chunk_time_interval => interval '90 day'); +select create_distributed_hypertable('uk_price_paid', 'date', chunk_time_interval => interval '270 day'); NOTICE: adding not-null constraint to column "date" create_distributed_hypertable ------------------------------- @@ -39,7 +39,7 @@ NOTICE: adding not-null constraint to column "date" (1 row) create table uk_price_paid_space2(like uk_price_paid); -select create_distributed_hypertable('uk_price_paid_space2', 'date', 'postcode2', 2, chunk_time_interval => interval '90 day'); +select create_distributed_hypertable('uk_price_paid_space2', 'date', 'postcode2', 2, chunk_time_interval => interval '270 day'); WARNING: insufficient number of partitions for dimension "postcode2" create_distributed_hypertable ----------------------------------- @@ -47,7 +47,7 @@ WARNING: insufficient number of partitions for dimension "postcode2" (1 row) create table uk_price_paid_space10(like uk_price_paid); -select create_distributed_hypertable('uk_price_paid_space10', 'date', 'postcode2', 10, chunk_time_interval => interval '90 day'); +select create_distributed_hypertable('uk_price_paid_space10', 'date', 'postcode2', 10, chunk_time_interval => interval '270 day'); create_distributed_hypertable ------------------------------------ (3,public,uk_price_paid_space10,t) diff --git a/tsl/test/sql/dist_copy_format_long.sql b/tsl/test/sql/dist_copy_format_long.sql index 7ae8de97a..217a3383c 100644 --- a/tsl/test/sql/dist_copy_format_long.sql +++ b/tsl/test/sql/dist_copy_format_long.sql @@ -25,14 +25,15 @@ SET ROLE :ROLE_1; -- Aim to about 100 partitions, the data is from 1995 to 2022. create table uk_price_paid(price integer, "date" date, postcode1 text, postcode2 text, type smallint, is_new bool, duration smallint, addr1 text, addr2 text, street text, locality text, town text, district text, country text, category smallint); select create_distributed_hypertable('uk_price_paid', 'date', 'postcode2', - chunk_time_interval => interval '90 day'); + chunk_time_interval => interval '270 day'); -- Populate. -\copy uk_price_paid from program 'zcat < data/prices-100k-random-1.tsv.gz'; -select count(*), sum(price) from uk_price_paid; +\copy uk_price_paid from program 'zcat < data/prices-10k-random-1.tsv.gz'; +select count(*), sum(price), sum(price) / count(*) from uk_price_paid; +select count(*) from show_chunks('uk_price_paid'); -- Make binary file. -\copy (select * from uk_price_paid) to 'prices-100k.pgbinary' with (format binary); +\copy (select * from uk_price_paid) to 'prices-10k.pgbinary' with (format binary); -- Binary input with binary data transfer. set timescaledb.enable_connection_binary_data = true; @@ -41,30 +42,28 @@ create table uk_price_paid_bin(like uk_price_paid); select create_distributed_hypertable('uk_price_paid_bin', 'date', 'postcode2', chunk_time_interval => interval '90 day', replication_factor => 2); -\copy uk_price_paid_bin from 'prices-100k.pgbinary' with (format binary); -select count(*), sum(price) from uk_price_paid_bin; +\copy uk_price_paid_bin from 'prices-10k.pgbinary' with (format binary); +select count(*), sum(price), sum(price) / count(*) from uk_price_paid_bin; -- Text input with explicit format option and binary data transfer. -truncate uk_price_paid_bin; -\copy uk_price_paid_bin from program 'zcat < data/prices-100k-random-1.tsv.gz' with (format text); -select count(*), sum(price) from uk_price_paid_bin; +\copy uk_price_paid_bin from program 'zcat < data/prices-10k-random-1.tsv.gz' with (format text); +select count(*), sum(price), sum(price) / count(*) from uk_price_paid_bin; -- Binary input with text data transfer. Doesn't work. set timescaledb.dist_copy_transfer_format = 'text'; \set ON_ERROR_STOP off -\copy uk_price_paid_bin from 'prices-100k.pgbinary' with (format binary); +\copy uk_price_paid_bin from 'prices-10k.pgbinary' with (format binary); \set ON_ERROR_STOP on -- Text input with text data transfer. -truncate uk_price_paid_bin; -\copy uk_price_paid_bin from program 'zcat < data/prices-100k-random-1.tsv.gz'; -select count(*), sum(price) from uk_price_paid_bin; +\copy uk_price_paid_bin from program 'zcat < data/prices-10k-random-1.tsv.gz'; +select count(*), sum(price), sum(price) / count(*) from uk_price_paid_bin; -- Nonsensical settings set timescaledb.dist_copy_transfer_format = 'binary'; set timescaledb.enable_connection_binary_data = false; \set ON_ERROR_STOP off -\copy uk_price_paid_bin from 'prices-100k.pgbinary' with (format binary); +\copy uk_price_paid_bin from 'prices-10k.pgbinary' with (format binary); \set ON_ERROR_STOP on -- Teardown diff --git a/tsl/test/sql/dist_copy_long.sql b/tsl/test/sql/dist_copy_long.sql index 69a888ec8..4b89ab6b2 100644 --- a/tsl/test/sql/dist_copy_long.sql +++ b/tsl/test/sql/dist_copy_long.sql @@ -23,13 +23,13 @@ SET ROLE :ROLE_1; create table uk_price_paid(price integer, "date" date, postcode1 text, postcode2 text, type smallint, is_new bool, duration smallint, addr1 text, addr2 text, street text, locality text, town text, district text, country text, category smallint); -- Aim to about 100 partitions, the data is from 1995 to 2022. -select create_distributed_hypertable('uk_price_paid', 'date', chunk_time_interval => interval '90 day'); +select create_distributed_hypertable('uk_price_paid', 'date', chunk_time_interval => interval '270 day'); create table uk_price_paid_space2(like uk_price_paid); -select create_distributed_hypertable('uk_price_paid_space2', 'date', 'postcode2', 2, chunk_time_interval => interval '90 day'); +select create_distributed_hypertable('uk_price_paid_space2', 'date', 'postcode2', 2, chunk_time_interval => interval '270 day'); create table uk_price_paid_space10(like uk_price_paid); -select create_distributed_hypertable('uk_price_paid_space10', 'date', 'postcode2', 10, chunk_time_interval => interval '90 day'); +select create_distributed_hypertable('uk_price_paid_space10', 'date', 'postcode2', 10, chunk_time_interval => interval '270 day'); \copy uk_price_paid_space2 from program 'zcat < data/prices-10k-random-1.tsv.gz';