Speed up the dist_copy tests

In some cases we can use less chunks, less data, and not truncate
tables.
This commit is contained in:
Alexander Kuzmenkov 2022-10-18 20:26:45 +04:00 committed by Alexander Kuzmenkov
parent 702ac53c0a
commit 080011d767
4 changed files with 49 additions and 46 deletions

View File

@ -33,7 +33,7 @@ SET ROLE :ROLE_1;
-- Aim to about 100 partitions, the data is from 1995 to 2022.
create table uk_price_paid(price integer, "date" date, postcode1 text, postcode2 text, type smallint, is_new bool, duration smallint, addr1 text, addr2 text, street text, locality text, town text, district text, country text, category smallint);
select create_distributed_hypertable('uk_price_paid', 'date', 'postcode2',
chunk_time_interval => interval '90 day');
chunk_time_interval => interval '270 day');
NOTICE: adding not-null constraint to column "date"
create_distributed_hypertable
-------------------------------
@ -41,15 +41,21 @@ NOTICE: adding not-null constraint to column "date"
(1 row)
-- Populate.
\copy uk_price_paid from program 'zcat < data/prices-100k-random-1.tsv.gz';
select count(*), sum(price) from uk_price_paid;
count | sum
--------+-------------
100000 | 20759547354
\copy uk_price_paid from program 'zcat < data/prices-10k-random-1.tsv.gz';
select count(*), sum(price), sum(price) / count(*) from uk_price_paid;
count | sum | ?column?
-------+------------+----------
10000 | 2055688013 | 205568
(1 row)
select count(*) from show_chunks('uk_price_paid');
count
-------
114
(1 row)
-- Make binary file.
\copy (select * from uk_price_paid) to 'prices-100k.pgbinary' with (format binary);
\copy (select * from uk_price_paid) to 'prices-10k.pgbinary' with (format binary);
-- Binary input with binary data transfer.
set timescaledb.enable_connection_binary_data = true;
set timescaledb.dist_copy_transfer_format = 'binary';
@ -61,42 +67,40 @@ select create_distributed_hypertable('uk_price_paid_bin', 'date', 'postcode2',
(2,public,uk_price_paid_bin,t)
(1 row)
\copy uk_price_paid_bin from 'prices-100k.pgbinary' with (format binary);
select count(*), sum(price) from uk_price_paid_bin;
count | sum
--------+-------------
100000 | 20759547354
\copy uk_price_paid_bin from 'prices-10k.pgbinary' with (format binary);
select count(*), sum(price), sum(price) / count(*) from uk_price_paid_bin;
count | sum | ?column?
-------+------------+----------
10000 | 2055688013 | 205568
(1 row)
-- Text input with explicit format option and binary data transfer.
truncate uk_price_paid_bin;
\copy uk_price_paid_bin from program 'zcat < data/prices-100k-random-1.tsv.gz' with (format text);
select count(*), sum(price) from uk_price_paid_bin;
count | sum
--------+-------------
100000 | 20759547354
\copy uk_price_paid_bin from program 'zcat < data/prices-10k-random-1.tsv.gz' with (format text);
select count(*), sum(price), sum(price) / count(*) from uk_price_paid_bin;
count | sum | ?column?
-------+------------+----------
20000 | 4111376026 | 205568
(1 row)
-- Binary input with text data transfer. Doesn't work.
set timescaledb.dist_copy_transfer_format = 'text';
\set ON_ERROR_STOP off
\copy uk_price_paid_bin from 'prices-100k.pgbinary' with (format binary);
\copy uk_price_paid_bin from 'prices-10k.pgbinary' with (format binary);
ERROR: remote copy does not support binary input in combination with text transfer to data nodes
\set ON_ERROR_STOP on
-- Text input with text data transfer.
truncate uk_price_paid_bin;
\copy uk_price_paid_bin from program 'zcat < data/prices-100k-random-1.tsv.gz';
select count(*), sum(price) from uk_price_paid_bin;
count | sum
--------+-------------
100000 | 20759547354
\copy uk_price_paid_bin from program 'zcat < data/prices-10k-random-1.tsv.gz';
select count(*), sum(price), sum(price) / count(*) from uk_price_paid_bin;
count | sum | ?column?
-------+------------+----------
30000 | 6167064039 | 205568
(1 row)
-- Nonsensical settings
set timescaledb.dist_copy_transfer_format = 'binary';
set timescaledb.enable_connection_binary_data = false;
\set ON_ERROR_STOP off
\copy uk_price_paid_bin from 'prices-100k.pgbinary' with (format binary);
\copy uk_price_paid_bin from 'prices-10k.pgbinary' with (format binary);
ERROR: the requested binary format for COPY data transfer is disabled by the settings
\set ON_ERROR_STOP on
-- Teardown

View File

@ -31,7 +31,7 @@ GRANT USAGE ON FOREIGN SERVER data_node_1, data_node_2, data_node_3 TO PUBLIC;
SET ROLE :ROLE_1;
create table uk_price_paid(price integer, "date" date, postcode1 text, postcode2 text, type smallint, is_new bool, duration smallint, addr1 text, addr2 text, street text, locality text, town text, district text, country text, category smallint);
-- Aim to about 100 partitions, the data is from 1995 to 2022.
select create_distributed_hypertable('uk_price_paid', 'date', chunk_time_interval => interval '90 day');
select create_distributed_hypertable('uk_price_paid', 'date', chunk_time_interval => interval '270 day');
NOTICE: adding not-null constraint to column "date"
create_distributed_hypertable
-------------------------------
@ -39,7 +39,7 @@ NOTICE: adding not-null constraint to column "date"
(1 row)
create table uk_price_paid_space2(like uk_price_paid);
select create_distributed_hypertable('uk_price_paid_space2', 'date', 'postcode2', 2, chunk_time_interval => interval '90 day');
select create_distributed_hypertable('uk_price_paid_space2', 'date', 'postcode2', 2, chunk_time_interval => interval '270 day');
WARNING: insufficient number of partitions for dimension "postcode2"
create_distributed_hypertable
-----------------------------------
@ -47,7 +47,7 @@ WARNING: insufficient number of partitions for dimension "postcode2"
(1 row)
create table uk_price_paid_space10(like uk_price_paid);
select create_distributed_hypertable('uk_price_paid_space10', 'date', 'postcode2', 10, chunk_time_interval => interval '90 day');
select create_distributed_hypertable('uk_price_paid_space10', 'date', 'postcode2', 10, chunk_time_interval => interval '270 day');
create_distributed_hypertable
------------------------------------
(3,public,uk_price_paid_space10,t)

View File

@ -25,14 +25,15 @@ SET ROLE :ROLE_1;
-- Aim to about 100 partitions, the data is from 1995 to 2022.
create table uk_price_paid(price integer, "date" date, postcode1 text, postcode2 text, type smallint, is_new bool, duration smallint, addr1 text, addr2 text, street text, locality text, town text, district text, country text, category smallint);
select create_distributed_hypertable('uk_price_paid', 'date', 'postcode2',
chunk_time_interval => interval '90 day');
chunk_time_interval => interval '270 day');
-- Populate.
\copy uk_price_paid from program 'zcat < data/prices-100k-random-1.tsv.gz';
select count(*), sum(price) from uk_price_paid;
\copy uk_price_paid from program 'zcat < data/prices-10k-random-1.tsv.gz';
select count(*), sum(price), sum(price) / count(*) from uk_price_paid;
select count(*) from show_chunks('uk_price_paid');
-- Make binary file.
\copy (select * from uk_price_paid) to 'prices-100k.pgbinary' with (format binary);
\copy (select * from uk_price_paid) to 'prices-10k.pgbinary' with (format binary);
-- Binary input with binary data transfer.
set timescaledb.enable_connection_binary_data = true;
@ -41,30 +42,28 @@ create table uk_price_paid_bin(like uk_price_paid);
select create_distributed_hypertable('uk_price_paid_bin', 'date', 'postcode2',
chunk_time_interval => interval '90 day', replication_factor => 2);
\copy uk_price_paid_bin from 'prices-100k.pgbinary' with (format binary);
select count(*), sum(price) from uk_price_paid_bin;
\copy uk_price_paid_bin from 'prices-10k.pgbinary' with (format binary);
select count(*), sum(price), sum(price) / count(*) from uk_price_paid_bin;
-- Text input with explicit format option and binary data transfer.
truncate uk_price_paid_bin;
\copy uk_price_paid_bin from program 'zcat < data/prices-100k-random-1.tsv.gz' with (format text);
select count(*), sum(price) from uk_price_paid_bin;
\copy uk_price_paid_bin from program 'zcat < data/prices-10k-random-1.tsv.gz' with (format text);
select count(*), sum(price), sum(price) / count(*) from uk_price_paid_bin;
-- Binary input with text data transfer. Doesn't work.
set timescaledb.dist_copy_transfer_format = 'text';
\set ON_ERROR_STOP off
\copy uk_price_paid_bin from 'prices-100k.pgbinary' with (format binary);
\copy uk_price_paid_bin from 'prices-10k.pgbinary' with (format binary);
\set ON_ERROR_STOP on
-- Text input with text data transfer.
truncate uk_price_paid_bin;
\copy uk_price_paid_bin from program 'zcat < data/prices-100k-random-1.tsv.gz';
select count(*), sum(price) from uk_price_paid_bin;
\copy uk_price_paid_bin from program 'zcat < data/prices-10k-random-1.tsv.gz';
select count(*), sum(price), sum(price) / count(*) from uk_price_paid_bin;
-- Nonsensical settings
set timescaledb.dist_copy_transfer_format = 'binary';
set timescaledb.enable_connection_binary_data = false;
\set ON_ERROR_STOP off
\copy uk_price_paid_bin from 'prices-100k.pgbinary' with (format binary);
\copy uk_price_paid_bin from 'prices-10k.pgbinary' with (format binary);
\set ON_ERROR_STOP on
-- Teardown

View File

@ -23,13 +23,13 @@ SET ROLE :ROLE_1;
create table uk_price_paid(price integer, "date" date, postcode1 text, postcode2 text, type smallint, is_new bool, duration smallint, addr1 text, addr2 text, street text, locality text, town text, district text, country text, category smallint);
-- Aim to about 100 partitions, the data is from 1995 to 2022.
select create_distributed_hypertable('uk_price_paid', 'date', chunk_time_interval => interval '90 day');
select create_distributed_hypertable('uk_price_paid', 'date', chunk_time_interval => interval '270 day');
create table uk_price_paid_space2(like uk_price_paid);
select create_distributed_hypertable('uk_price_paid_space2', 'date', 'postcode2', 2, chunk_time_interval => interval '90 day');
select create_distributed_hypertable('uk_price_paid_space2', 'date', 'postcode2', 2, chunk_time_interval => interval '270 day');
create table uk_price_paid_space10(like uk_price_paid);
select create_distributed_hypertable('uk_price_paid_space10', 'date', 'postcode2', 10, chunk_time_interval => interval '90 day');
select create_distributed_hypertable('uk_price_paid_space10', 'date', 'postcode2', 10, chunk_time_interval => interval '270 day');
\copy uk_price_paid_space2 from program 'zcat < data/prices-10k-random-1.tsv.gz';