Speed up the dist_copy tests

In some cases we can use less chunks, less data, and not truncate
tables.
This commit is contained in:
Alexander Kuzmenkov 2022-10-18 20:26:45 +04:00 committed by Alexander Kuzmenkov
parent 702ac53c0a
commit 080011d767
4 changed files with 49 additions and 46 deletions

View File

@ -33,7 +33,7 @@ SET ROLE :ROLE_1;
-- Aim to about 100 partitions, the data is from 1995 to 2022. -- Aim to about 100 partitions, the data is from 1995 to 2022.
create table uk_price_paid(price integer, "date" date, postcode1 text, postcode2 text, type smallint, is_new bool, duration smallint, addr1 text, addr2 text, street text, locality text, town text, district text, country text, category smallint); create table uk_price_paid(price integer, "date" date, postcode1 text, postcode2 text, type smallint, is_new bool, duration smallint, addr1 text, addr2 text, street text, locality text, town text, district text, country text, category smallint);
select create_distributed_hypertable('uk_price_paid', 'date', 'postcode2', select create_distributed_hypertable('uk_price_paid', 'date', 'postcode2',
chunk_time_interval => interval '90 day'); chunk_time_interval => interval '270 day');
NOTICE: adding not-null constraint to column "date" NOTICE: adding not-null constraint to column "date"
create_distributed_hypertable create_distributed_hypertable
------------------------------- -------------------------------
@ -41,15 +41,21 @@ NOTICE: adding not-null constraint to column "date"
(1 row) (1 row)
-- Populate. -- Populate.
\copy uk_price_paid from program 'zcat < data/prices-100k-random-1.tsv.gz'; \copy uk_price_paid from program 'zcat < data/prices-10k-random-1.tsv.gz';
select count(*), sum(price) from uk_price_paid; select count(*), sum(price), sum(price) / count(*) from uk_price_paid;
count | sum count | sum | ?column?
--------+------------- -------+------------+----------
100000 | 20759547354 10000 | 2055688013 | 205568
(1 row)
select count(*) from show_chunks('uk_price_paid');
count
-------
114
(1 row) (1 row)
-- Make binary file. -- Make binary file.
\copy (select * from uk_price_paid) to 'prices-100k.pgbinary' with (format binary); \copy (select * from uk_price_paid) to 'prices-10k.pgbinary' with (format binary);
-- Binary input with binary data transfer. -- Binary input with binary data transfer.
set timescaledb.enable_connection_binary_data = true; set timescaledb.enable_connection_binary_data = true;
set timescaledb.dist_copy_transfer_format = 'binary'; set timescaledb.dist_copy_transfer_format = 'binary';
@ -61,42 +67,40 @@ select create_distributed_hypertable('uk_price_paid_bin', 'date', 'postcode2',
(2,public,uk_price_paid_bin,t) (2,public,uk_price_paid_bin,t)
(1 row) (1 row)
\copy uk_price_paid_bin from 'prices-100k.pgbinary' with (format binary); \copy uk_price_paid_bin from 'prices-10k.pgbinary' with (format binary);
select count(*), sum(price) from uk_price_paid_bin; select count(*), sum(price), sum(price) / count(*) from uk_price_paid_bin;
count | sum count | sum | ?column?
--------+------------- -------+------------+----------
100000 | 20759547354 10000 | 2055688013 | 205568
(1 row) (1 row)
-- Text input with explicit format option and binary data transfer. -- Text input with explicit format option and binary data transfer.
truncate uk_price_paid_bin; \copy uk_price_paid_bin from program 'zcat < data/prices-10k-random-1.tsv.gz' with (format text);
\copy uk_price_paid_bin from program 'zcat < data/prices-100k-random-1.tsv.gz' with (format text); select count(*), sum(price), sum(price) / count(*) from uk_price_paid_bin;
select count(*), sum(price) from uk_price_paid_bin; count | sum | ?column?
count | sum -------+------------+----------
--------+------------- 20000 | 4111376026 | 205568
100000 | 20759547354
(1 row) (1 row)
-- Binary input with text data transfer. Doesn't work. -- Binary input with text data transfer. Doesn't work.
set timescaledb.dist_copy_transfer_format = 'text'; set timescaledb.dist_copy_transfer_format = 'text';
\set ON_ERROR_STOP off \set ON_ERROR_STOP off
\copy uk_price_paid_bin from 'prices-100k.pgbinary' with (format binary); \copy uk_price_paid_bin from 'prices-10k.pgbinary' with (format binary);
ERROR: remote copy does not support binary input in combination with text transfer to data nodes ERROR: remote copy does not support binary input in combination with text transfer to data nodes
\set ON_ERROR_STOP on \set ON_ERROR_STOP on
-- Text input with text data transfer. -- Text input with text data transfer.
truncate uk_price_paid_bin; \copy uk_price_paid_bin from program 'zcat < data/prices-10k-random-1.tsv.gz';
\copy uk_price_paid_bin from program 'zcat < data/prices-100k-random-1.tsv.gz'; select count(*), sum(price), sum(price) / count(*) from uk_price_paid_bin;
select count(*), sum(price) from uk_price_paid_bin; count | sum | ?column?
count | sum -------+------------+----------
--------+------------- 30000 | 6167064039 | 205568
100000 | 20759547354
(1 row) (1 row)
-- Nonsensical settings -- Nonsensical settings
set timescaledb.dist_copy_transfer_format = 'binary'; set timescaledb.dist_copy_transfer_format = 'binary';
set timescaledb.enable_connection_binary_data = false; set timescaledb.enable_connection_binary_data = false;
\set ON_ERROR_STOP off \set ON_ERROR_STOP off
\copy uk_price_paid_bin from 'prices-100k.pgbinary' with (format binary); \copy uk_price_paid_bin from 'prices-10k.pgbinary' with (format binary);
ERROR: the requested binary format for COPY data transfer is disabled by the settings ERROR: the requested binary format for COPY data transfer is disabled by the settings
\set ON_ERROR_STOP on \set ON_ERROR_STOP on
-- Teardown -- Teardown

View File

@ -31,7 +31,7 @@ GRANT USAGE ON FOREIGN SERVER data_node_1, data_node_2, data_node_3 TO PUBLIC;
SET ROLE :ROLE_1; SET ROLE :ROLE_1;
create table uk_price_paid(price integer, "date" date, postcode1 text, postcode2 text, type smallint, is_new bool, duration smallint, addr1 text, addr2 text, street text, locality text, town text, district text, country text, category smallint); create table uk_price_paid(price integer, "date" date, postcode1 text, postcode2 text, type smallint, is_new bool, duration smallint, addr1 text, addr2 text, street text, locality text, town text, district text, country text, category smallint);
-- Aim to about 100 partitions, the data is from 1995 to 2022. -- Aim to about 100 partitions, the data is from 1995 to 2022.
select create_distributed_hypertable('uk_price_paid', 'date', chunk_time_interval => interval '90 day'); select create_distributed_hypertable('uk_price_paid', 'date', chunk_time_interval => interval '270 day');
NOTICE: adding not-null constraint to column "date" NOTICE: adding not-null constraint to column "date"
create_distributed_hypertable create_distributed_hypertable
------------------------------- -------------------------------
@ -39,7 +39,7 @@ NOTICE: adding not-null constraint to column "date"
(1 row) (1 row)
create table uk_price_paid_space2(like uk_price_paid); create table uk_price_paid_space2(like uk_price_paid);
select create_distributed_hypertable('uk_price_paid_space2', 'date', 'postcode2', 2, chunk_time_interval => interval '90 day'); select create_distributed_hypertable('uk_price_paid_space2', 'date', 'postcode2', 2, chunk_time_interval => interval '270 day');
WARNING: insufficient number of partitions for dimension "postcode2" WARNING: insufficient number of partitions for dimension "postcode2"
create_distributed_hypertable create_distributed_hypertable
----------------------------------- -----------------------------------
@ -47,7 +47,7 @@ WARNING: insufficient number of partitions for dimension "postcode2"
(1 row) (1 row)
create table uk_price_paid_space10(like uk_price_paid); create table uk_price_paid_space10(like uk_price_paid);
select create_distributed_hypertable('uk_price_paid_space10', 'date', 'postcode2', 10, chunk_time_interval => interval '90 day'); select create_distributed_hypertable('uk_price_paid_space10', 'date', 'postcode2', 10, chunk_time_interval => interval '270 day');
create_distributed_hypertable create_distributed_hypertable
------------------------------------ ------------------------------------
(3,public,uk_price_paid_space10,t) (3,public,uk_price_paid_space10,t)

View File

@ -25,14 +25,15 @@ SET ROLE :ROLE_1;
-- Aim to about 100 partitions, the data is from 1995 to 2022. -- Aim to about 100 partitions, the data is from 1995 to 2022.
create table uk_price_paid(price integer, "date" date, postcode1 text, postcode2 text, type smallint, is_new bool, duration smallint, addr1 text, addr2 text, street text, locality text, town text, district text, country text, category smallint); create table uk_price_paid(price integer, "date" date, postcode1 text, postcode2 text, type smallint, is_new bool, duration smallint, addr1 text, addr2 text, street text, locality text, town text, district text, country text, category smallint);
select create_distributed_hypertable('uk_price_paid', 'date', 'postcode2', select create_distributed_hypertable('uk_price_paid', 'date', 'postcode2',
chunk_time_interval => interval '90 day'); chunk_time_interval => interval '270 day');
-- Populate. -- Populate.
\copy uk_price_paid from program 'zcat < data/prices-100k-random-1.tsv.gz'; \copy uk_price_paid from program 'zcat < data/prices-10k-random-1.tsv.gz';
select count(*), sum(price) from uk_price_paid; select count(*), sum(price), sum(price) / count(*) from uk_price_paid;
select count(*) from show_chunks('uk_price_paid');
-- Make binary file. -- Make binary file.
\copy (select * from uk_price_paid) to 'prices-100k.pgbinary' with (format binary); \copy (select * from uk_price_paid) to 'prices-10k.pgbinary' with (format binary);
-- Binary input with binary data transfer. -- Binary input with binary data transfer.
set timescaledb.enable_connection_binary_data = true; set timescaledb.enable_connection_binary_data = true;
@ -41,30 +42,28 @@ create table uk_price_paid_bin(like uk_price_paid);
select create_distributed_hypertable('uk_price_paid_bin', 'date', 'postcode2', select create_distributed_hypertable('uk_price_paid_bin', 'date', 'postcode2',
chunk_time_interval => interval '90 day', replication_factor => 2); chunk_time_interval => interval '90 day', replication_factor => 2);
\copy uk_price_paid_bin from 'prices-100k.pgbinary' with (format binary); \copy uk_price_paid_bin from 'prices-10k.pgbinary' with (format binary);
select count(*), sum(price) from uk_price_paid_bin; select count(*), sum(price), sum(price) / count(*) from uk_price_paid_bin;
-- Text input with explicit format option and binary data transfer. -- Text input with explicit format option and binary data transfer.
truncate uk_price_paid_bin; \copy uk_price_paid_bin from program 'zcat < data/prices-10k-random-1.tsv.gz' with (format text);
\copy uk_price_paid_bin from program 'zcat < data/prices-100k-random-1.tsv.gz' with (format text); select count(*), sum(price), sum(price) / count(*) from uk_price_paid_bin;
select count(*), sum(price) from uk_price_paid_bin;
-- Binary input with text data transfer. Doesn't work. -- Binary input with text data transfer. Doesn't work.
set timescaledb.dist_copy_transfer_format = 'text'; set timescaledb.dist_copy_transfer_format = 'text';
\set ON_ERROR_STOP off \set ON_ERROR_STOP off
\copy uk_price_paid_bin from 'prices-100k.pgbinary' with (format binary); \copy uk_price_paid_bin from 'prices-10k.pgbinary' with (format binary);
\set ON_ERROR_STOP on \set ON_ERROR_STOP on
-- Text input with text data transfer. -- Text input with text data transfer.
truncate uk_price_paid_bin; \copy uk_price_paid_bin from program 'zcat < data/prices-10k-random-1.tsv.gz';
\copy uk_price_paid_bin from program 'zcat < data/prices-100k-random-1.tsv.gz'; select count(*), sum(price), sum(price) / count(*) from uk_price_paid_bin;
select count(*), sum(price) from uk_price_paid_bin;
-- Nonsensical settings -- Nonsensical settings
set timescaledb.dist_copy_transfer_format = 'binary'; set timescaledb.dist_copy_transfer_format = 'binary';
set timescaledb.enable_connection_binary_data = false; set timescaledb.enable_connection_binary_data = false;
\set ON_ERROR_STOP off \set ON_ERROR_STOP off
\copy uk_price_paid_bin from 'prices-100k.pgbinary' with (format binary); \copy uk_price_paid_bin from 'prices-10k.pgbinary' with (format binary);
\set ON_ERROR_STOP on \set ON_ERROR_STOP on
-- Teardown -- Teardown

View File

@ -23,13 +23,13 @@ SET ROLE :ROLE_1;
create table uk_price_paid(price integer, "date" date, postcode1 text, postcode2 text, type smallint, is_new bool, duration smallint, addr1 text, addr2 text, street text, locality text, town text, district text, country text, category smallint); create table uk_price_paid(price integer, "date" date, postcode1 text, postcode2 text, type smallint, is_new bool, duration smallint, addr1 text, addr2 text, street text, locality text, town text, district text, country text, category smallint);
-- Aim to about 100 partitions, the data is from 1995 to 2022. -- Aim to about 100 partitions, the data is from 1995 to 2022.
select create_distributed_hypertable('uk_price_paid', 'date', chunk_time_interval => interval '90 day'); select create_distributed_hypertable('uk_price_paid', 'date', chunk_time_interval => interval '270 day');
create table uk_price_paid_space2(like uk_price_paid); create table uk_price_paid_space2(like uk_price_paid);
select create_distributed_hypertable('uk_price_paid_space2', 'date', 'postcode2', 2, chunk_time_interval => interval '90 day'); select create_distributed_hypertable('uk_price_paid_space2', 'date', 'postcode2', 2, chunk_time_interval => interval '270 day');
create table uk_price_paid_space10(like uk_price_paid); create table uk_price_paid_space10(like uk_price_paid);
select create_distributed_hypertable('uk_price_paid_space10', 'date', 'postcode2', 10, chunk_time_interval => interval '90 day'); select create_distributed_hypertable('uk_price_paid_space10', 'date', 'postcode2', 10, chunk_time_interval => interval '270 day');
\copy uk_price_paid_space2 from program 'zcat < data/prices-10k-random-1.tsv.gz'; \copy uk_price_paid_space2 from program 'zcat < data/prices-10k-random-1.tsv.gz';