Add dump/restore support for Hypercore TAM

Add support for dumping and restoring hypertables that have chunks
that use the Hypercore TAM.

Dumping a Hypercore table requires special consideration because its
data is internally stored in two separate relations: one for
compressed data and one for non-compressed data. The TAM returns data
from both relations, but they may be dumped as separate tables. This
risks dumping the compressed data twice: once via the TAM and once via
the compressed table in compressed format.

The `pg_dump` tool uses `COPY TO` to create dumps of each table, and,
to avoid data duplication when used on Hypercore tables, this change
introduces a GUC that allows selecting one of these two behaviors:

1. A `COPY TO` on a Hypercore table returns all data via the TAM,
   including data stored in the compressed relation. A `COPY TO` on
   the internal compressed relation returns no data.

2. A `COPY TO` on a Hypercore returns only non-compressed data, while
   a `COPY TO` on the compressed relation returns compressed data. A
   `SELECT` still returns all the data as normal.

The second approach is the default because it is consistent with
compression when Hypercore TAM is not used. It will produce a
`pg_dump` archive that includes data in compressed form (if data was
compressed when dumped). Conversely, option (1) will produce an
archive that looks identical to a dump from an non-compressed table.

There are pros and cons of each dump format. A non-compressed archive
is a platform-agnostic logical dump that can be restored to any
platform and architecture, while a compressed archive includes data
that is compressed in a platform-dependent way and needs to be
restored to a compatible system.

A test is added that tests both these settings and corresponding
dumping and restoring.
This commit is contained in:
Erik Nordström 2024-09-18 14:07:33 +02:00 committed by Erik Nordström
parent 67bcd2ea87
commit a38332a8af
10 changed files with 664 additions and 7 deletions

View File

@ -108,6 +108,12 @@ static const struct config_enum_entry transparent_decompression_options[] = {
{ NULL, 0, false }
};
static const struct config_enum_entry hypercore_copy_to_options[] = {
{ "all_data", HYPERCORE_COPY_ALL_DATA, false },
{ "no_compressed_data", HYPERCORE_COPY_NO_COMPRESSED_DATA, false },
{ NULL, 0, false }
};
bool ts_guc_enable_deprecation_warnings = true;
bool ts_guc_enable_optimizations = true;
bool ts_guc_restoring = false;
@ -156,6 +162,8 @@ bool ts_guc_enable_tss_callbacks = true;
TSDLLEXPORT bool ts_guc_enable_delete_after_compression = false;
TSDLLEXPORT bool ts_guc_enable_merge_on_cagg_refresh = false;
TSDLLEXPORT char *ts_guc_hypercore_indexam_whitelist;
TSDLLEXPORT HypercoreCopyToBehavior ts_guc_hypercore_copy_to_behavior =
HYPERCORE_COPY_NO_COMPRESSED_DATA;
/* default value of ts_guc_max_open_chunks_per_insert and
* ts_guc_max_cached_chunks_per_hypertable will be set as their respective boot-value when the
@ -172,6 +180,7 @@ char *ts_last_tune_time = NULL;
char *ts_last_tune_version = NULL;
bool ts_guc_debug_require_batch_sorted_merge = false;
bool ts_guc_debug_allow_cagg_with_deprecated_funcs = false;
#ifdef TS_DEBUG
@ -1032,6 +1041,20 @@ _guc_init(void)
/* assign_hook= */ NULL,
/* show_hook= */ NULL);
DefineCustomEnumVariable(MAKE_EXTOPTION("hypercore_copy_to_behavior"),
"The behavior of COPY TO on a hypercore table",
"Set to 'all_data' to return both compressed and uncompressed data "
"via the Hypercore table when using COPY TO. Set to "
"'no_compressed_data' to skip compressed data.",
/* valueAddr= */ (int *) &ts_guc_hypercore_copy_to_behavior,
/* bootValue= */ HYPERCORE_COPY_NO_COMPRESSED_DATA,
/* options= */ hypercore_copy_to_options,
/* context= */ PGC_USERSET,
0,
NULL,
NULL,
NULL);
#ifdef TS_DEBUG
DefineCustomBoolVariable(/* name= */ MAKE_EXTOPTION("shutdown_bgw_scheduler"),
/* short_desc= */ "immediately shutdown the bgw scheduler",

View File

@ -124,6 +124,25 @@ extern TSDLLEXPORT bool ts_guc_debug_require_batch_sorted_merge;
extern TSDLLEXPORT bool ts_guc_debug_allow_cagg_with_deprecated_funcs;
extern TSDLLEXPORT char *ts_guc_hypercore_indexam_whitelist;
/*
* Defines the behavior of COPY TO when used on a Hypercore table.
*
* If set to COPY_ALL_DATA, all data is copied from a Hypercore table,
* including compressed data (but in uncompressed form) from the internal
* compressed relation. When doing a COPY TO on the internal compressed
* relation, no data is returned.
*
* If set to COPY_NO_COMPRESSED_DATA, then only uncompressed data is copied
* (if any). This behavior is compatible with compression without hypercore.
*/
typedef enum HypercoreCopyToBehavior
{
HYPERCORE_COPY_NO_COMPRESSED_DATA,
HYPERCORE_COPY_ALL_DATA,
} HypercoreCopyToBehavior;
extern TSDLLEXPORT HypercoreCopyToBehavior ts_guc_hypercore_copy_to_behavior;
void _guc_init(void);
typedef enum

View File

@ -1,6 +1,7 @@
DUMPFILE=${DUMPFILE:-$1}
EXTRA_PGOPTIONS=${EXTRA_PGOPTIONS:-$2}
# Override PGOPTIONS to remove verbose output
PGOPTIONS='--client-min-messages=warning'
PGOPTIONS="--client-min-messages=warning $EXTRA_PGOPTIONS"
export PGOPTIONS

View File

@ -1141,6 +1141,7 @@ fetch_unmatched_uncompressed_chunk_into_tuplesort(Tuplesortstate *segment_tuples
TableScanDesc scan;
TupleTableSlot *slot = table_slot_create(uncompressed_chunk_rel, NULL);
Snapshot snapshot = GetLatestSnapshot();
scan = table_beginscan(uncompressed_chunk_rel, snapshot, 0, NULL);
hypercore_scan_set_skip_compressed(scan, true);
@ -1209,11 +1210,10 @@ fetch_matching_uncompressed_chunk_into_tuplesort(Tuplesortstate *segment_tupleso
}
snapshot = GetLatestSnapshot();
/* Let compression TAM know it should only return tuples from the
* non-compressed relation. */
scan = table_beginscan(uncompressed_chunk_rel, snapshot, nsegbycols_nonnull, scankey);
hypercore_scan_set_skip_compressed(scan, true);
TupleTableSlot *slot = table_slot_create(uncompressed_chunk_rel, NULL);
while (table_scan_getnextslot(scan, ForwardScanDirection, slot))

View File

@ -62,6 +62,7 @@
#include "compression/compression.h"
#include "compression/create.h"
#include "debug_assert.h"
#include "extension.h"
#include "guc.h"
#include "hypercore_handler.h"
#include "relstats.h"
@ -80,6 +81,20 @@ static void convert_to_hypercore_finish(Oid relid);
static List *partially_compressed_relids = NIL; /* Relids that needs to have
* updated status set at end of
* transaction */
/*
* For COPY <hypercore_rel> TO commands, track the relid of the hypercore
* being copied from. It is needed to filter out compressed data in the COPY
* scan so that pg_dump does not dump compressed data twice: once in
* uncompressed format via the hypercore rel and once in compressed format in
* the internal compressed rel that gets dumped separately.
*/
static Oid hypercore_skip_compressed_data_relid = InvalidOid;
void
hypercore_skip_compressed_data_for_relation(Oid relid)
{
hypercore_skip_compressed_data_relid = relid;
}
static bool hypercore_truncate_compressed = true;
@ -188,7 +203,7 @@ static HypercoreInfo *
lazy_build_hypercore_info_cache(Relation rel, bool create_chunk_constraints,
bool *compressed_relation_created)
{
Assert(OidIsValid(rel->rd_id) && !ts_is_hypertable(rel->rd_id));
Assert(OidIsValid(rel->rd_id) && (!ts_extension_is_loaded() || !ts_is_hypertable(rel->rd_id)));
HypercoreInfo *hsinfo;
CompressionSettings *settings;
@ -519,6 +534,27 @@ get_scan_type(uint32 flags)
}
#endif
static inline bool
should_skip_compressed_data(const TableScanDesc scan)
{
/*
* Skip compressed data in a scan if any of these apply:
*
* 1. Transparent decompression (DecompressChunk) is enabled for
* Hypercore TAM.
*
* 2. The scan was started with a flag indicating no compressed data
* should be returned.
*
* 3. A COPY <hypercore> TO <file> on the Hypercore TAM table is executed
* and we want to ensure such commands issued by pg_dump doesn't lead
* to dumping compressed data twice.
*/
return (ts_guc_enable_transparent_decompression == 2) ||
RelationGetRelid(scan->rs_rd) == hypercore_skip_compressed_data_relid ||
(scan->rs_flags & SO_HYPERCORE_SKIP_COMPRESSED);
}
static TableScanDesc
hypercore_beginscan(Relation relation, Snapshot snapshot, int nkeys, ScanKey keys,
ParallelTableScanDesc parallel_scan, uint32 flags)
@ -555,7 +591,7 @@ hypercore_beginscan(Relation relation, Snapshot snapshot, int nkeys, ScanKey key
HypercoreInfo *hsinfo = RelationGetHypercoreInfo(relation);
scan->compressed_rel = table_open(hsinfo->compressed_relid, AccessShareLock);
if ((ts_guc_enable_transparent_decompression == 2) || (flags & SO_HYPERCORE_SKIP_COMPRESSED))
if (should_skip_compressed_data(&scan->rs_base))
{
/*
* Don't read compressed data if transparent decompression is enabled
@ -564,8 +600,7 @@ hypercore_beginscan(Relation relation, Snapshot snapshot, int nkeys, ScanKey key
* Transparent decompression reads compressed data itself, directly
* from the compressed chunk, so avoid reading it again here.
*/
scan->hs_scan_state = HYPERCORE_SCAN_NON_COMPRESSED;
scan->rs_base.rs_flags |= SO_HYPERCORE_SKIP_COMPRESSED;
hypercore_scan_set_skip_compressed(&scan->rs_base, true);
}
initscan(scan, keys, nkeys);
@ -662,6 +697,9 @@ hypercore_endscan(TableScanDesc sscan)
pfree(scan->rs_base.rs_key);
pfree(scan);
/* Clear the COPY TO filter state */
hypercore_skip_compressed_data_relid = InvalidOid;
}
static bool
@ -3440,6 +3478,7 @@ hypercore_xact_event(XactEvent event, void *arg)
Ensure(OidIsValid(hsinfo->compressed_relid),
"hypercore \"%s\" has no compressed data relation",
get_rel_name(relid));
Chunk *chunk = ts_chunk_get_by_relid(relid, true);
ts_chunk_set_partial(chunk);
table_close(rel, NoLock);

View File

@ -29,6 +29,7 @@ extern Datum hypercore_handler(PG_FUNCTION_ARGS);
extern void hypercore_xact_event(XactEvent event, void *arg);
extern bool hypercore_set_truncate_compressed(bool onoff);
extern void hypercore_scan_set_skip_compressed(TableScanDesc scan, bool skip);
extern void hypercore_skip_compressed_data_for_relation(Oid relid);
typedef struct ColumnCompressionSettings
{

View File

@ -16,12 +16,99 @@
#include "compression/create.h"
#include "continuous_aggs/create.h"
#include "guc.h"
#include "hypercore/hypercore_handler.h"
#include "hypercore/utils.h"
#include "hypertable_cache.h"
#include "process_utility.h"
#include "ts_catalog/continuous_agg.h"
/*
* Process a COPY (TO) on a table using Hypercore TAM.
*
* A table using Hypercore TAM stores data in two relations; one for
* compressed data and one for non-compressed data. Normally, a COPY returns
* all data (compressed and non-compressed in the Hypercore TAM case) via the
* TAM. However, a pg_dump will also separately dump the internal compressed
* relation, which risks dumping compressed data twice.
*
* When detecting a COPY TO command, we can do one of:
*
* 1. Return all data via the TAM but nothing via the internal relation.
* 2. Return only non-compressed data via the TAM and compressed data (in
* compressed format) via the internal relation.
*
* Option 2 is the default as that is compatible with compression without
* Hypercore TAM.
*/
static DDLResult
process_copy(ProcessUtilityArgs *args)
{
CopyStmt *stmt = castNode(CopyStmt, args->parsetree);
if (!stmt->relation || stmt->is_from)
return DDL_CONTINUE;
Oid relid = RangeVarGetRelid(stmt->relation, NoLock, false);
Oid amoid = ts_get_rel_am(relid);
/* Check if the is the user-facing Hypercore TAM relation */
if (ts_is_hypercore_am(amoid))
{
if (ts_guc_hypercore_copy_to_behavior == HYPERCORE_COPY_NO_COMPRESSED_DATA)
{
hypercore_skip_compressed_data_for_relation(relid);
ereport(NOTICE,
(errmsg("skipping compressed data when copying \"%s\"", get_rel_name(relid)),
errdetail(
"Use timescaledb.hypercore_copy_to_behavior to change this behavior.")));
}
}
else if (ts_guc_hypercore_copy_to_behavior == HYPERCORE_COPY_ALL_DATA)
{
/* Check if this is the internal compressed relation of a Hypercore
* TAM */
const Chunk *chunk = ts_chunk_get_by_relid(relid, false);
if (!chunk)
return DDL_CONTINUE;
const Chunk *parent = ts_chunk_get_compressed_chunk_parent(chunk);
if (parent && ts_is_hypercore_am(ts_get_rel_am(parent->table_id)))
{
/* To avoid returning compressed data twice in a pg_dump, replace
* the 'COPY <relation> TO' with 'COPY (select where false) TO' so
* that the COPY on the internal compressed relation returns no
* data. The data is instead returned in uncompressed form via the
* parent hypercore relation. */
SelectStmt *select = makeNode(SelectStmt);
A_Const *aconst = makeNode(A_Const);
#if PG15_LT
aconst->val.type = T_Integer;
aconst->val.val.ival = 0;
#else
aconst->val.boolval.boolval = false;
aconst->val.boolval.type = T_Boolean;
#endif
select->whereClause = (Node *) aconst;
stmt->relation = NULL;
stmt->attlist = NIL;
stmt->query = (Node *) select;
ereport(NOTICE,
(errmsg("skipping data for internal Hypercore relation \"%s\"",
get_rel_name(chunk->table_id)),
errdetail("Use COPY TO on Hypercore relation \"%s\" to return data in "
"uncompressed form"
" or use timescaledb.hypercore_copy_to_behavior "
"to change this behavior.",
get_rel_name(parent->table_id))));
}
}
return DDL_CONTINUE;
}
DDLResult
tsl_ddl_command_start(ProcessUtilityArgs *args)
{
@ -99,6 +186,9 @@ tsl_ddl_command_start(ProcessUtilityArgs *args)
result = DDL_DONE;
break;
}
case T_CopyStmt:
result = process_copy(args);
break;
default:
break;
}

View File

@ -0,0 +1,318 @@
-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
select setseed(0.3);
setseed
---------
(1 row)
create table hyperdump (time timestamptz not null, device int, tempc float, tempf float generated always as (tempc * 2 + 30) stored, status text default 'idle');
select create_hypertable('hyperdump', by_range('time'), create_default_indexes => false);
create_hypertable
-------------------
(1,t)
(1 row)
insert into hyperdump (time, device, tempc)
select t, ceil(random()*10), random()*60
from generate_series('2022-06-01'::timestamptz, '2022-07-01', '5m') t;
create index time_device_idx on hyperdump (device, time desc);
alter table hyperdump set (
timescaledb.compress_orderby='time',
timescaledb.compress_segmentby='device');
\set TEST_BASE_NAME hypercore_pgdump
SELECT
format('%s/results/%s_results_original.out', :'TEST_OUTPUT_DIR', :'TEST_BASE_NAME') AS "TEST_RESULTS_ORIGINAL",
format('%s/results/%s_results_restored.out', :'TEST_OUTPUT_DIR', :'TEST_BASE_NAME') AS "TEST_RESULTS_RESTORED" \gset
-- Save uncompressed table query output as a reference to compare against
\o :TEST_RESULTS_ORIGINAL
select * from hyperdump order by time, device;
\o
explain (costs off)
select * from hyperdump where device = 2 and time < '2022-06-03';
QUERY PLAN
------------------------------------------------------------------------------------------------------------
Append
-> Index Scan using _hyper_1_1_chunk_time_device_idx on _hyper_1_1_chunk
Index Cond: ((device = 2) AND ("time" < 'Fri Jun 03 00:00:00 2022 PDT'::timestamp with time zone))
-> Index Scan using _hyper_1_2_chunk_time_device_idx on _hyper_1_2_chunk
Index Cond: ((device = 2) AND ("time" < 'Fri Jun 03 00:00:00 2022 PDT'::timestamp with time zone))
(5 rows)
-- Convert to hypercore
select compress_chunk(ch, hypercore_use_access_method=>true) from show_chunks('hyperdump') ch;
compress_chunk
----------------------------------------
_timescaledb_internal._hyper_1_1_chunk
_timescaledb_internal._hyper_1_2_chunk
_timescaledb_internal._hyper_1_3_chunk
_timescaledb_internal._hyper_1_4_chunk
_timescaledb_internal._hyper_1_5_chunk
_timescaledb_internal._hyper_1_6_chunk
(6 rows)
reindex table hyperdump;
explain (costs off)
select * from hyperdump where device = 2 and time < '2022-06-03';
QUERY PLAN
------------------------------------------------------------------------------------------------------------
Append
-> Index Scan using _hyper_1_1_chunk_time_device_idx on _hyper_1_1_chunk
Index Cond: ((device = 2) AND ("time" < 'Fri Jun 03 00:00:00 2022 PDT'::timestamp with time zone))
-> Index Scan using _hyper_1_2_chunk_time_device_idx on _hyper_1_2_chunk
Index Cond: ((device = 2) AND ("time" < 'Fri Jun 03 00:00:00 2022 PDT'::timestamp with time zone))
(5 rows)
\set ON_ERROR_STOP 0
-- Test unsupported GUC values
set timescaledb.hypercore_copy_to_behavior=0;
ERROR: invalid value for parameter "timescaledb.hypercore_copy_to_behavior": "0"
set timescaledb.hypercore_copy_to_behavior=null;
ERROR: syntax error at or near "null" at character 44
set timescaledb.hypercore_copy_to_behavior='dummy';
ERROR: invalid value for parameter "timescaledb.hypercore_copy_to_behavior": "dummy"
\set ON_ERROR_STOP 1
set timescaledb.hypercore_copy_to_behavior='all_data';
select chunk from show_chunks('hyperdump') chunk offset 2 limit 1 \gset
\d+ :chunk
Table "_timescaledb_internal._hyper_1_3_chunk"
Column | Type | Collation | Nullable | Default | Storage | Stats target | Description
--------+--------------------------+-----------+----------+---------------------------------------------------------------------------------+----------+--------------+-------------
time | timestamp with time zone | | not null | | plain | |
device | integer | | | | plain | |
tempc | double precision | | | | plain | |
tempf | double precision | | | generated always as (tempc * 2::double precision + 30::double precision) stored | plain | |
status | text | | | 'idle'::text | extended | |
Indexes:
"_hyper_1_3_chunk_time_device_idx" btree (device, "time" DESC)
Check constraints:
"constraint_3" CHECK ("time" >= 'Wed Jun 08 17:00:00 2022 PDT'::timestamp with time zone AND "time" < 'Wed Jun 15 17:00:00 2022 PDT'::timestamp with time zone)
Inherits: hyperdump
SELECT format('%I.%I', c2.schema_name, c2.table_name)::regclass AS cchunk
FROM _timescaledb_catalog.chunk c1
INNER JOIN _timescaledb_catalog.chunk c2
ON (c1.compressed_chunk_id = c2.id) offset 2 limit 1 \gset
-- This should not return any data when in this COPY mode.
copy :cchunk to stdout;
NOTICE: skipping data for internal Hypercore relation "compress_hyper_2_9_chunk"
---
-- Create a "compressed" dump where only uncompressed data is
-- returned dumped via the TAM relation. The rest of the data is
-- dumped via the internal compressed relation. This is compatible
-- with compression without TAM.
--
-- When restoring from the compressed dump, it will create hypercore
-- relations that are also compressed.
--
\c postgres :ROLE_SUPERUSER
\! utils/pg_dump_aux_dump.sh dump/hypercore-dump-compress.sql -ctimescaledb.hypercore_copy_to_behavior='no_compressed_data'
\c :TEST_DBNAME
create extension timescaledb;
select timescaledb_pre_restore();
timescaledb_pre_restore
-------------------------
t
(1 row)
\! utils/pg_dump_aux_restore.sh dump/hypercore-dump-compress.sql
select timescaledb_post_restore();
timescaledb_post_restore
--------------------------
t
(1 row)
-- Background workers restarted by post restore, so stop again
select _timescaledb_functions.stop_background_workers();
stop_background_workers
-------------------------
t
(1 row)
\c :TEST_DBNAME :ROLE_DEFAULT_PERM_USER
\o :TEST_RESULTS_RESTORED
select * from hyperdump order by time, device;
\o
select chunk from show_chunks('hyperdump') chunk offset 2 limit 1 \gset
\d+ hyperdump
Table "public.hyperdump"
Column | Type | Collation | Nullable | Default | Storage | Stats target | Description
--------+--------------------------+-----------+----------+---------------------------------------------------------------------------------+----------+--------------+-------------
time | timestamp with time zone | | not null | | plain | |
device | integer | | | | plain | |
tempc | double precision | | | | plain | |
tempf | double precision | | | generated always as (tempc * 2::double precision + 30::double precision) stored | plain | |
status | text | | | 'idle'::text | extended | |
Indexes:
"time_device_idx" btree (device, "time" DESC)
Triggers:
ts_insert_blocker BEFORE INSERT ON hyperdump FOR EACH ROW EXECUTE FUNCTION _timescaledb_functions.insert_blocker()
Child tables: _timescaledb_internal._hyper_1_1_chunk,
_timescaledb_internal._hyper_1_2_chunk,
_timescaledb_internal._hyper_1_3_chunk,
_timescaledb_internal._hyper_1_4_chunk,
_timescaledb_internal._hyper_1_5_chunk,
_timescaledb_internal._hyper_1_6_chunk
\d+ :chunk
Table "_timescaledb_internal._hyper_1_3_chunk"
Column | Type | Collation | Nullable | Default | Storage | Stats target | Description
--------+--------------------------+-----------+----------+---------------------------------------------------------------------------------+----------+--------------+-------------
time | timestamp with time zone | | not null | | plain | |
device | integer | | | | plain | |
tempc | double precision | | | | plain | |
tempf | double precision | | | generated always as (tempc * 2::double precision + 30::double precision) stored | plain | |
status | text | | | 'idle'::text | extended | |
Indexes:
"_hyper_1_3_chunk_time_device_idx" btree (device, "time" DESC)
Check constraints:
"constraint_3" CHECK ("time" >= 'Wed Jun 08 17:00:00 2022 PDT'::timestamp with time zone AND "time" < 'Wed Jun 15 17:00:00 2022 PDT'::timestamp with time zone)
Inherits: hyperdump
explain (costs off)
select * from hyperdump where time < '2022-06-03';
QUERY PLAN
------------------------------------------------------------------------------------------------
Append
-> Custom Scan (ColumnarScan) on _hyper_1_1_chunk
Vectorized Filter: ("time" < 'Fri Jun 03 00:00:00 2022 PDT'::timestamp with time zone)
-> Custom Scan (ColumnarScan) on _hyper_1_2_chunk
Vectorized Filter: ("time" < 'Fri Jun 03 00:00:00 2022 PDT'::timestamp with time zone)
(5 rows)
reindex table hyperdump;
explain (costs off)
select * from hyperdump where time < '2022-06-03';
QUERY PLAN
------------------------------------------------------------------------------------------------
Append
-> Custom Scan (ColumnarScan) on _hyper_1_1_chunk
Vectorized Filter: ("time" < 'Fri Jun 03 00:00:00 2022 PDT'::timestamp with time zone)
-> Custom Scan (ColumnarScan) on _hyper_1_2_chunk
Vectorized Filter: ("time" < 'Fri Jun 03 00:00:00 2022 PDT'::timestamp with time zone)
(5 rows)
select format('\! diff -u --label "hypercore original" --label "hypercore restored" %s %s', :'TEST_RESULTS_ORIGINAL', :'TEST_RESULTS_RESTORED') as "DIFF_CMD" \gset
-- Original output and restored output should be the same, i.e., no
-- diff
:DIFF_CMD
SELECT format('%s/results/%s_results_restored_2.out', :'TEST_OUTPUT_DIR', :'TEST_BASE_NAME') AS "TEST_RESULTS_RESTORED" \gset
reindex table hyperdump;
explain (costs off)
select * from hyperdump where device = 2 and time < '2022-06-03';
QUERY PLAN
------------------------------------------------------------------------------------------------------------
Append
-> Index Scan using _hyper_1_1_chunk_time_device_idx on _hyper_1_1_chunk
Index Cond: ((device = 2) AND ("time" < 'Fri Jun 03 00:00:00 2022 PDT'::timestamp with time zone))
-> Index Scan using _hyper_1_2_chunk_time_device_idx on _hyper_1_2_chunk
Index Cond: ((device = 2) AND ("time" < 'Fri Jun 03 00:00:00 2022 PDT'::timestamp with time zone))
(5 rows)
---
-- Create an "uncompressed" dump where _all_ data is dumped via the
-- TAM relation. No data is dumped via the internal compressed
-- relation. This dump is compatible with uncompressed hypertables.
--
-- When restoring from the uncompressed dump, it will create
-- hypercore relations that are also uncompressed.
--
\c postgres :ROLE_SUPERUSER
\! utils/pg_dump_aux_dump.sh dump/hypercore-dump-uncompress.sql -ctimescaledb.hypercore_copy_to_behavior='all_data'
\c :TEST_DBNAME
create extension timescaledb;
select timescaledb_pre_restore();
timescaledb_pre_restore
-------------------------
t
(1 row)
\! utils/pg_dump_aux_restore.sh dump/hypercore-dump-uncompress.sql
select timescaledb_post_restore();
timescaledb_post_restore
--------------------------
t
(1 row)
-- Background workers restarted by post restore, so stop again
select _timescaledb_functions.stop_background_workers();
stop_background_workers
-------------------------
t
(1 row)
\c :TEST_DBNAME :ROLE_DEFAULT_PERM_USER
select chunk from show_chunks('hyperdump') chunk offset 2 limit 1 \gset
-- If restore is OK, and TAM is used, we should see a ColumnarScan
explain (costs off)
select * from hyperdump order by time, device limit 10;
QUERY PLAN
--------------------------------------------------------------------
Limit
-> Sort
Sort Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device
-> Append
-> Custom Scan (ColumnarScan) on _hyper_1_1_chunk
-> Custom Scan (ColumnarScan) on _hyper_1_2_chunk
-> Custom Scan (ColumnarScan) on _hyper_1_3_chunk
-> Custom Scan (ColumnarScan) on _hyper_1_4_chunk
-> Custom Scan (ColumnarScan) on _hyper_1_5_chunk
-> Custom Scan (ColumnarScan) on _hyper_1_6_chunk
(10 rows)
--
-- After restore, the status of the compressed chunks should be
-- partial since, with an uncompressed dump, the restore inserts data
-- via the hypercore relation in uncompressed form.
select c1.table_name, c1.status from _timescaledb_catalog.chunk c1
join _timescaledb_catalog.chunk c2 on (c1.compressed_chunk_id = c2.id)
order by c2.table_name;
table_name | status
------------------+--------
_hyper_1_4_chunk | 9
_hyper_1_5_chunk | 9
_hyper_1_6_chunk | 9
_hyper_1_1_chunk | 9
_hyper_1_2_chunk | 9
_hyper_1_3_chunk | 9
(6 rows)
-- Check that a compressed chunk holds no data
SELECT format('%I.%I', c2.schema_name, c2.table_name)::regclass AS cchunk
FROM _timescaledb_catalog.chunk c1
INNER JOIN _timescaledb_catalog.chunk c2
ON (c1.compressed_chunk_id = c2.id) offset 2 limit 1 \gset
-- Compressed relation should hold no data
select count(*) from :cchunk;
count
-------
0
(1 row)
-- Compress all chunks
select compress_chunk(ch) from show_chunks('hyperdump') ch;
compress_chunk
----------------------------------------
_timescaledb_internal._hyper_1_1_chunk
_timescaledb_internal._hyper_1_2_chunk
_timescaledb_internal._hyper_1_3_chunk
_timescaledb_internal._hyper_1_4_chunk
_timescaledb_internal._hyper_1_5_chunk
_timescaledb_internal._hyper_1_6_chunk
(6 rows)
-- Data should now be compressed
select count(*) from :cchunk;
count
-------
10
(1 row)
-- Output data to a file for comparison with original contents
\o :TEST_RESULTS_RESTORED
select * from hyperdump order by time, device;
\o
select format('\! diff -u --label "hypercore original" --label "hypercore restored" %s %s', :'TEST_RESULTS_ORIGINAL', :'TEST_RESULTS_RESTORED') as "DIFF_CMD" \gset
-- Outputs should be the same, i.e., no diff
:DIFF_CMD

View File

@ -149,6 +149,7 @@ if((${PG_VERSION_MAJOR} GREATER_EQUAL "15"))
hypercore_cursor.sql
hypercore_ddl.sql
hypercore_delete.sql
hypercore_dump_restore.sql
hypercore_index_btree.sql
hypercore_index_hash.sql
hypercore_insert.sql

View File

@ -0,0 +1,165 @@
-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
select setseed(0.3);
create table hyperdump (time timestamptz not null, device int, tempc float, tempf float generated always as (tempc * 2 + 30) stored, status text default 'idle');
select create_hypertable('hyperdump', by_range('time'), create_default_indexes => false);
insert into hyperdump (time, device, tempc)
select t, ceil(random()*10), random()*60
from generate_series('2022-06-01'::timestamptz, '2022-07-01', '5m') t;
create index time_device_idx on hyperdump (device, time desc);
alter table hyperdump set (
timescaledb.compress_orderby='time',
timescaledb.compress_segmentby='device');
\set TEST_BASE_NAME hypercore_pgdump
SELECT
format('%s/results/%s_results_original.out', :'TEST_OUTPUT_DIR', :'TEST_BASE_NAME') AS "TEST_RESULTS_ORIGINAL",
format('%s/results/%s_results_restored.out', :'TEST_OUTPUT_DIR', :'TEST_BASE_NAME') AS "TEST_RESULTS_RESTORED" \gset
-- Save uncompressed table query output as a reference to compare against
\o :TEST_RESULTS_ORIGINAL
select * from hyperdump order by time, device;
\o
explain (costs off)
select * from hyperdump where device = 2 and time < '2022-06-03';
-- Convert to hypercore
select compress_chunk(ch, hypercore_use_access_method=>true) from show_chunks('hyperdump') ch;
reindex table hyperdump;
explain (costs off)
select * from hyperdump where device = 2 and time < '2022-06-03';
\set ON_ERROR_STOP 0
-- Test unsupported GUC values
set timescaledb.hypercore_copy_to_behavior=0;
set timescaledb.hypercore_copy_to_behavior=null;
set timescaledb.hypercore_copy_to_behavior='dummy';
\set ON_ERROR_STOP 1
set timescaledb.hypercore_copy_to_behavior='all_data';
select chunk from show_chunks('hyperdump') chunk offset 2 limit 1 \gset
\d+ :chunk
SELECT format('%I.%I', c2.schema_name, c2.table_name)::regclass AS cchunk
FROM _timescaledb_catalog.chunk c1
INNER JOIN _timescaledb_catalog.chunk c2
ON (c1.compressed_chunk_id = c2.id) offset 2 limit 1 \gset
-- This should not return any data when in this COPY mode.
copy :cchunk to stdout;
---
-- Create a "compressed" dump where only uncompressed data is
-- returned dumped via the TAM relation. The rest of the data is
-- dumped via the internal compressed relation. This is compatible
-- with compression without TAM.
--
-- When restoring from the compressed dump, it will create hypercore
-- relations that are also compressed.
--
\c postgres :ROLE_SUPERUSER
\! utils/pg_dump_aux_dump.sh dump/hypercore-dump-compress.sql -ctimescaledb.hypercore_copy_to_behavior='no_compressed_data'
\c :TEST_DBNAME
create extension timescaledb;
select timescaledb_pre_restore();
\! utils/pg_dump_aux_restore.sh dump/hypercore-dump-compress.sql
select timescaledb_post_restore();
-- Background workers restarted by post restore, so stop again
select _timescaledb_functions.stop_background_workers();
\c :TEST_DBNAME :ROLE_DEFAULT_PERM_USER
\o :TEST_RESULTS_RESTORED
select * from hyperdump order by time, device;
\o
select chunk from show_chunks('hyperdump') chunk offset 2 limit 1 \gset
\d+ hyperdump
\d+ :chunk
explain (costs off)
select * from hyperdump where time < '2022-06-03';
reindex table hyperdump;
explain (costs off)
select * from hyperdump where time < '2022-06-03';
select format('\! diff -u --label "hypercore original" --label "hypercore restored" %s %s', :'TEST_RESULTS_ORIGINAL', :'TEST_RESULTS_RESTORED') as "DIFF_CMD" \gset
-- Original output and restored output should be the same, i.e., no
-- diff
:DIFF_CMD
SELECT format('%s/results/%s_results_restored_2.out', :'TEST_OUTPUT_DIR', :'TEST_BASE_NAME') AS "TEST_RESULTS_RESTORED" \gset
reindex table hyperdump;
explain (costs off)
select * from hyperdump where device = 2 and time < '2022-06-03';
---
-- Create an "uncompressed" dump where _all_ data is dumped via the
-- TAM relation. No data is dumped via the internal compressed
-- relation. This dump is compatible with uncompressed hypertables.
--
-- When restoring from the uncompressed dump, it will create
-- hypercore relations that are also uncompressed.
--
\c postgres :ROLE_SUPERUSER
\! utils/pg_dump_aux_dump.sh dump/hypercore-dump-uncompress.sql -ctimescaledb.hypercore_copy_to_behavior='all_data'
\c :TEST_DBNAME
create extension timescaledb;
select timescaledb_pre_restore();
\! utils/pg_dump_aux_restore.sh dump/hypercore-dump-uncompress.sql
select timescaledb_post_restore();
-- Background workers restarted by post restore, so stop again
select _timescaledb_functions.stop_background_workers();
\c :TEST_DBNAME :ROLE_DEFAULT_PERM_USER
select chunk from show_chunks('hyperdump') chunk offset 2 limit 1 \gset
-- If restore is OK, and TAM is used, we should see a ColumnarScan
explain (costs off)
select * from hyperdump order by time, device limit 10;
--
-- After restore, the status of the compressed chunks should be
-- partial since, with an uncompressed dump, the restore inserts data
-- via the hypercore relation in uncompressed form.
select c1.table_name, c1.status from _timescaledb_catalog.chunk c1
join _timescaledb_catalog.chunk c2 on (c1.compressed_chunk_id = c2.id)
order by c2.table_name;
-- Check that a compressed chunk holds no data
SELECT format('%I.%I', c2.schema_name, c2.table_name)::regclass AS cchunk
FROM _timescaledb_catalog.chunk c1
INNER JOIN _timescaledb_catalog.chunk c2
ON (c1.compressed_chunk_id = c2.id) offset 2 limit 1 \gset
-- Compressed relation should hold no data
select count(*) from :cchunk;
-- Compress all chunks
select compress_chunk(ch) from show_chunks('hyperdump') ch;
-- Data should now be compressed
select count(*) from :cchunk;
-- Output data to a file for comparison with original contents
\o :TEST_RESULTS_RESTORED
select * from hyperdump order by time, device;
\o
select format('\! diff -u --label "hypercore original" --label "hypercore restored" %s %s', :'TEST_RESULTS_ORIGINAL', :'TEST_RESULTS_RESTORED') as "DIFF_CMD" \gset
-- Outputs should be the same, i.e., no diff
:DIFF_CMD