timescaledb/tsl/test/sql/transparent_decompression_ordered_index.sql.in

-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.

\set TEST_BASE_NAME transparent_decompression_ordered_index
SELECT format('include/%s_query.sql', :'TEST_BASE_NAME') AS "TEST_QUERY_NAME",
    format('%s/results/%s_results_uncompressed.out', :'TEST_OUTPUT_DIR', :'TEST_BASE_NAME') AS "TEST_RESULTS_UNCOMPRESSED",
    format('%s/results/%s_results_compressed.out', :'TEST_OUTPUT_DIR', :'TEST_BASE_NAME') AS "TEST_RESULTS_COMPRESSED" \gset

SELECT format('\! diff %s %s', :'TEST_RESULTS_UNCOMPRESSED', :'TEST_RESULTS_COMPRESSED') AS "DIFF_CMD" \gset

-- disable memoize node to make EXPLAIN output comparable between PG14 and previous versions
SELECT CASE WHEN current_setting('server_version_num')::int/10000 >= 14 THEN set_config('enable_memoize','off',false) ELSE 'off' END AS enable_memoize;

-- Testing Index Scan backwards ----
-- We want more than 1 segment in atleast 1 of the chunks

CREATE TABLE metrics_ordered_idx (
    time timestamptz NOT NULL,
    device_id int,
    device_id_peer int,
    v0 int
);

SELECT create_hypertable ('metrics_ordered_idx', 'time', chunk_time_interval => '2days'::interval);

ALTER TABLE metrics_ordered_idx SET (timescaledb.compress, timescaledb.compress_orderby = 'time ASC', timescaledb.compress_segmentby = 'device_id,device_id_peer');

INSERT INTO metrics_ordered_idx (time, device_id, device_id_peer, v0)
SELECT time,
    device_id,
    0,
    device_id
FROM generate_series('2000-01-13 0:00:00+0'::timestamptz, '2000-01-15 23:55:00+0', '15m') gtime (time),
    generate_series(1, 5, 1) gdevice (device_id);

INSERT INTO metrics_ordered_idx (time, device_id, device_id_peer, v0)
SELECT generate_series('2000-01-20 0:00:00+0'::timestamptz, '2000-01-20 11:55:00+0', '15m'),
    3,
    3,
    generate_series(1, 5, 1);

INSERT INTO metrics_ordered_idx (time, device_id, device_id_peer, v0)
SELECT generate_series('2018-01-20 0:00:00+0'::timestamptz, '2018-01-20 11:55:00+0', '15m'),
    4,
    5,
    generate_series(1, 5, 1);

INSERT INTO metrics_ordered_idx (time, device_id, device_id_peer, v0)
SELECT '2020-01-01 0:00:00+0',
    generate_series(4, 7, 1),
    5,
    generate_series(1, 5, 1);

-- misisng values device_id = 7
CREATE TABLE device_tbl (
    device_id int,
    descr text
);

INSERT INTO device_tbl
SELECT generate_series(1, 6, 1),
    'devicex';

INSERT INTO device_tbl
SELECT 8,
    'device8';

ANALYZE device_tbl;

-- table for joins ---
CREATE TABLE nodetime (
    node int,
    start_time timestamp,
    stop_time timestamp
);

INSERT INTO nodetime
    VALUES (4, '2018-01-06 00:00'::timestamp, '2018-12-02 12:00'::timestamp);

-- run queries on uncompressed hypertable and store result
\set PREFIX ''
\set PREFIX_VERBOSE ''
\set ECHO none
SET client_min_messages TO error;

\o :TEST_RESULTS_UNCOMPRESSED
\ir include/transparent_decompression_ordered_index.sql
\ir include/transparent_decompression_constraintaware.sql
\o
RESET client_min_messages;

\set ECHO all
--compress all chunks for metrics_ordered_idx table --
SELECT compress_chunk (c.schema_name || '.' || c.table_name)
FROM _timescaledb_catalog.chunk c
    INNER JOIN _timescaledb_catalog.hypertable ht ON c.hypertable_id = ht.id
WHERE ht.table_name = 'metrics_ordered_idx'
ORDER BY c.id;
-- reindexing compressed hypertable to update statistics
DO
$$
DECLARE
  hyper_id int;
BEGIN
  SELECT h.compressed_hypertable_id
  INTO hyper_id
  FROM _timescaledb_catalog.hypertable h
  WHERE h.table_name = 'metrics_ordered_idx';
  EXECUTE format('REINDEX TABLE _timescaledb_internal._compressed_hypertable_%s',
    hyper_id);
END;
$$;

-- run queries on compressed hypertable and store result
\set PREFIX ''
\set PREFIX_VERBOSE ''
\set ECHO none
SET client_min_messages TO error;

SET enable_seqscan = FALSE;

\o :TEST_RESULTS_COMPRESSED
\ir include/transparent_decompression_ordered_index.sql
\ir include/transparent_decompression_constraintaware.sql
SET enable_seqscan = TRUE;

\o
RESET client_min_messages;

\set ECHO all
-- diff compressed and uncompressed results
:DIFF_CMD

-- This is to illustrate that we have some null device_id values. This fact
-- might influence the runtime chunk exclusion when doing joins on device_id.
select count(*) from metrics_ordered_idx
where extract(minute from time) = 0 and device_id is null
;

\set PREFIX 'EXPLAIN (analyze, costs off, timing off, summary off)'
\set PREFIX_VERBOSE 'EXPLAIN (analyze, costs off, timing off, summary off, verbose)'
-- we disable parallelism here otherwise EXPLAIN ANALYZE output
-- will be not stable and differ depending on worker assignment

SET max_parallel_workers_per_gather TO 0;

SET enable_seqscan = FALSE;

-- get explain for queries on hypertable with compression
\ir include/transparent_decompression_ordered_indexplan.sql
SET enable_seqscan = FALSE;

\ir include/transparent_decompression_ordered_index.sql
SET enable_seqscan = TRUE;

\ir include/transparent_decompression_constraintaware.sql

-- github bug 2917 with UNION ALL that references compressed ht
CREATE TABLE entity
(
  oid bigint PRIMARY KEY,
  type text,
  name text
);

INSERT INTO entity values(10, 'VMEM', 'cpu');

CREATE TABLE entity_m2
(
 timec         timestamp with time zone  NOT NULL,
 entity_oid    bigint                    ,
 entity_hash   bigint                    ,
 type          text               ,
 current       double precision,
 capacity      double precision,
 utilization   double precision,
 peak          double precision
);

SELECT create_hypertable('entity_m2', 'timec', chunk_time_interval=>'30 days'::interval);

INSERT INTO entity_m2 values (
 '2020-12-21 15:47:58.778-05' , 10 , -7792214420424674003 , 'VMEM' , 0,  2097152 ,           0 ,  100);
INSERT INTO entity_m2 values (
 '2020-12-21 16:47:58.778-05' , 10 , -7792214420424674003 , 'VMEM' , 0,  2097152 ,           0 ,  100);

ALTER TABLE entity_m2 SET (timescaledb.compress,
timescaledb.compress_segmentby = 'entity_oid',
timescaledb.compress_orderby = 'type, timec');

SELECT compress_chunk(c) FROM show_chunks('entity_m2') c;

CREATE TABLE entity_m1
(
timec         timestamp with time zone  ,
 entity_oid    bigint                    ,
 entity_hash   bigint                    ,
 type          text               ,
 current       double precision,
 capacity      double precision,
 utilization   double precision
);

SELECT create_hypertable('entity_m1', 'timec', chunk_time_interval=>'30 days'::interval);
INSERT INTO entity_m1 values (
 '2020-12-21 16:47:58.778-05' , 10 , -7792214420424674003 , 'VMEM' , 0,  100 ,           0 );


create view metric_view as
 SELECT m2.timec,
    m2.entity_oid,
    m2.entity_hash,
    m2.type,
    m2.current,
    m2.capacity,
    m2.utilization,
    m2.peak
   FROM entity_m2 m2
UNION ALL
 SELECT m1.timec,
    m1.entity_oid,
    m1.entity_hash,
    m1.type,
    m1.current,
    m1.capacity,
    m1.utilization,
    NULL::double precision AS peak
   FROM entity_m1 m1;

SET enable_bitmapscan = false;
SET enable_hashjoin = false;
SET enable_mergejoin = false;
SELECT m.timec, avg(m.utilization) AS avg_util
   FROM  metric_view m, entity e
   WHERE m.type = 'VMEM'
   AND m.timec BETWEEN '2020-12-21T00:00:00'::timestamptz - interval '7 day' AND date_trunc('day', '2020-12-22T00:00:00'::timestamptz)
   AND m.entity_oid = e.oid
   GROUP BY 1 ORDER BY 1;

--now compress the other table too and rerun the query --
ALTER TABLE entity_m1 SET (timescaledb.compress,
timescaledb.compress_segmentby = 'entity_oid',
timescaledb.compress_orderby = 'type, timec');
SELECT compress_chunk(c) FROM show_chunks('entity_m1') c;
SELECT m.timec, avg(m.utilization) AS avg_util
   FROM  metric_view m, entity e
   WHERE m.type = 'VMEM'
   AND m.timec BETWEEN '2020-12-21T00:00:00'::timestamptz - interval '7 day' AND date_trunc('day', '2020-12-22T00:00:00'::timestamptz)
   AND m.entity_oid = e.oid
   GROUP BY 1 ORDER BY 1;
RESET enable_bitmapscan ;
RESET enable_hashjoin ;
RESET enable_mergejoin;

-- end github bug 2917