Add a distributed COPY test with more data

Use data sets with 10k and 100k rows with a variety of partitioning
settings. This may help to catch some rare corner cases.
This commit is contained in:
Alexander Kuzmenkov 2022-07-21 15:53:15 +03:00 committed by Alexander Kuzmenkov
parent 296601b1d7
commit 1f6b0240a3
5 changed files with 211 additions and 0 deletions

View File

@ -0,0 +1,132 @@
-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
-- Test distributed COPY with a bigger data set to help find rare effects.
\c :TEST_DBNAME :ROLE_CLUSTER_SUPERUSER;
\set DN_DBNAME_1 :TEST_DBNAME _1
\set DN_DBNAME_2 :TEST_DBNAME _2
\set DN_DBNAME_3 :TEST_DBNAME _3
SELECT 1 FROM add_data_node('data_node_1', host => 'localhost',
database => :'DN_DBNAME_1');
?column?
----------
1
(1 row)
SELECT 1 FROM add_data_node('data_node_2', host => 'localhost',
database => :'DN_DBNAME_2');
?column?
----------
1
(1 row)
SELECT 1 FROM add_data_node('data_node_3', host => 'localhost',
database => :'DN_DBNAME_3');
?column?
----------
1
(1 row)
GRANT USAGE ON FOREIGN SERVER data_node_1, data_node_2, data_node_3 TO PUBLIC;
SET ROLE :ROLE_1;
create table uk_price_paid(price integer, "date" date, postcode1 text, postcode2 text, type smallint, is_new bool, duration smallint, addr1 text, addr2 text, street text, locality text, town text, district text, country text, category smallint);
-- Aim to about 100 partitions, the data is from 1995 to 2022.
select create_distributed_hypertable('uk_price_paid', 'date', chunk_time_interval => interval '90 day');
NOTICE: adding not-null constraint to column "date"
create_distributed_hypertable
-------------------------------
(1,public,uk_price_paid,t)
(1 row)
create table uk_price_paid_space2(like uk_price_paid);
select create_distributed_hypertable('uk_price_paid_space2', 'date', 'postcode2', 2, chunk_time_interval => interval '90 day');
WARNING: insufficient number of partitions for dimension "postcode2"
create_distributed_hypertable
-----------------------------------
(2,public,uk_price_paid_space2,t)
(1 row)
create table uk_price_paid_space10(like uk_price_paid);
select create_distributed_hypertable('uk_price_paid_space10', 'date', 'postcode2', 10, chunk_time_interval => interval '90 day');
create_distributed_hypertable
------------------------------------
(3,public,uk_price_paid_space10,t)
(1 row)
\copy uk_price_paid_space2 from program 'zcat data/prices-10k-random-1.tsv.gz';
select count(*) from uk_price_paid_space2;
count
-------
10000
(1 row)
\copy uk_price_paid_space2 from program 'zcat data/prices-10k-random-1.tsv.gz';
select count(*) from uk_price_paid_space2;
count
-------
20000
(1 row)
\copy uk_price_paid_space10 from program 'zcat data/prices-10k-random-1.tsv.gz';
select count(*) from uk_price_paid_space10;
count
-------
10000
(1 row)
\copy uk_price_paid_space10 from program 'zcat data/prices-10k-random-1.tsv.gz';
select count(*) from uk_price_paid_space10;
count
-------
20000
(1 row)
set timescaledb.max_open_chunks_per_insert = 1;
\copy uk_price_paid from program 'zcat data/prices-100k-random-1.tsv.gz';
select count(*) from uk_price_paid;
count
--------
100000
(1 row)
\copy uk_price_paid from program 'zcat data/prices-100k-random-1.tsv.gz';
select count(*) from uk_price_paid;
count
--------
200000
(1 row)
truncate uk_price_paid;
set timescaledb.max_open_chunks_per_insert = 2;
\copy uk_price_paid from program 'zcat data/prices-100k-random-1.tsv.gz';
select count(*) from uk_price_paid;
count
--------
100000
(1 row)
\copy uk_price_paid from program 'zcat data/prices-100k-random-1.tsv.gz';
select count(*) from uk_price_paid;
count
--------
200000
(1 row)
truncate uk_price_paid;
set timescaledb.max_open_chunks_per_insert = 1117;
\copy uk_price_paid from program 'zcat data/prices-100k-random-1.tsv.gz';
select count(*) from uk_price_paid;
count
--------
100000
(1 row)
\copy uk_price_paid from program 'zcat data/prices-100k-random-1.tsv.gz';
select count(*) from uk_price_paid;
count
--------
200000
(1 row)
truncate uk_price_paid;
reset timescaledb.max_open_chunks_per_insert;

View File

@ -69,6 +69,7 @@ if(CMAKE_BUILD_TYPE MATCHES Debug)
dist_api_calls.sql
dist_commands.sql
dist_compression.sql
dist_copy_long.sql
dist_ddl.sql
dist_partial_agg.sql
dist_policy.sql

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,78 @@
-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
-- Test distributed COPY with a bigger data set to help find rare effects.
\c :TEST_DBNAME :ROLE_CLUSTER_SUPERUSER;
\set DN_DBNAME_1 :TEST_DBNAME _1
\set DN_DBNAME_2 :TEST_DBNAME _2
\set DN_DBNAME_3 :TEST_DBNAME _3
SELECT 1 FROM add_data_node('data_node_1', host => 'localhost',
database => :'DN_DBNAME_1');
SELECT 1 FROM add_data_node('data_node_2', host => 'localhost',
database => :'DN_DBNAME_2');
SELECT 1 FROM add_data_node('data_node_3', host => 'localhost',
database => :'DN_DBNAME_3');
GRANT USAGE ON FOREIGN SERVER data_node_1, data_node_2, data_node_3 TO PUBLIC;
SET ROLE :ROLE_1;
create table uk_price_paid(price integer, "date" date, postcode1 text, postcode2 text, type smallint, is_new bool, duration smallint, addr1 text, addr2 text, street text, locality text, town text, district text, country text, category smallint);
-- Aim to about 100 partitions, the data is from 1995 to 2022.
select create_distributed_hypertable('uk_price_paid', 'date', chunk_time_interval => interval '90 day');
create table uk_price_paid_space2(like uk_price_paid);
select create_distributed_hypertable('uk_price_paid_space2', 'date', 'postcode2', 2, chunk_time_interval => interval '90 day');
create table uk_price_paid_space10(like uk_price_paid);
select create_distributed_hypertable('uk_price_paid_space10', 'date', 'postcode2', 10, chunk_time_interval => interval '90 day');
\copy uk_price_paid_space2 from program 'zcat data/prices-10k-random-1.tsv.gz';
select count(*) from uk_price_paid_space2;
\copy uk_price_paid_space2 from program 'zcat data/prices-10k-random-1.tsv.gz';
select count(*) from uk_price_paid_space2;
\copy uk_price_paid_space10 from program 'zcat data/prices-10k-random-1.tsv.gz';
select count(*) from uk_price_paid_space10;
\copy uk_price_paid_space10 from program 'zcat data/prices-10k-random-1.tsv.gz';
select count(*) from uk_price_paid_space10;
set timescaledb.max_open_chunks_per_insert = 1;
\copy uk_price_paid from program 'zcat data/prices-100k-random-1.tsv.gz';
select count(*) from uk_price_paid;
\copy uk_price_paid from program 'zcat data/prices-100k-random-1.tsv.gz';
select count(*) from uk_price_paid;
truncate uk_price_paid;
set timescaledb.max_open_chunks_per_insert = 2;
\copy uk_price_paid from program 'zcat data/prices-100k-random-1.tsv.gz';
select count(*) from uk_price_paid;
\copy uk_price_paid from program 'zcat data/prices-100k-random-1.tsv.gz';
select count(*) from uk_price_paid;
truncate uk_price_paid;
set timescaledb.max_open_chunks_per_insert = 1117;
\copy uk_price_paid from program 'zcat data/prices-100k-random-1.tsv.gz';
select count(*) from uk_price_paid;
\copy uk_price_paid from program 'zcat data/prices-100k-random-1.tsv.gz';
select count(*) from uk_price_paid;
truncate uk_price_paid;
reset timescaledb.max_open_chunks_per_insert;