1
0
mirror of https://github.com/timescale/timescaledb.git synced 2025-05-16 18:43:18 +08:00

Move ANALYZE after heap scan during compression

Depending on the statistics target, running ANALYZE on a chunk before
compression can cause a lot of random IO operations for chunks that
are bigger than the number of pages ANALYZE needs to read. By moving
that operation after the heap is loaded into memory for sorting,
we increase the chance of hitting cache and reducing disk operations
necessary to execute compression jobs.
This commit is contained in:
Ante Kresic 2022-09-26 14:53:01 +02:00 committed by Ante Kresic
parent 9c819882f3
commit cc110a33a2
3 changed files with 31 additions and 17 deletions

@ -27,6 +27,7 @@ argument or resolve the type ambiguity by casting to the intended type.
* #4720 Fix chunk exclusion for prepared statements and dst changes
* #4738 Fix the assorted epoll_ctl() errors that could occur with COPY into a distributed hypertable
* #4739 Fix continuous aggregate migrate check constraint
* #4756 Improve compression job IO performance
* #4745 Fix FK constraint violation error while insert into hypertable which references partitioned table
**Thanks**

@ -159,7 +159,7 @@ compresschunkcxt_init(CompressChunkCxt *cxt, Cache *hcache, Oid hypertable_relid
}
static void
preserve_uncompressed_chunk_stats(Oid chunk_relid)
disable_autovacuum_on_chunk(Oid chunk_relid)
{
AlterTableCmd at_cmd = {
.type = T_AlterTableCmd,
@ -167,20 +167,6 @@ preserve_uncompressed_chunk_stats(Oid chunk_relid)
.def = (Node *) list_make1(
makeDefElem("autovacuum_enabled", (Node *) makeString("false"), -1)),
};
VacuumRelation vr = {
.type = T_VacuumRelation,
.relation = NULL,
.oid = chunk_relid,
.va_cols = NIL,
};
VacuumStmt vs = {
.type = T_VacuumStmt,
.rels = list_make1(&vr),
.is_vacuumcmd = false,
.options = NIL,
};
ExecVacuum(NULL, &vs, true);
ts_alter_table_with_event_trigger(chunk_relid, NULL, list_make1(&at_cmd), false);
}
@ -234,8 +220,8 @@ compress_chunk_impl(Oid hypertable_relid, Oid chunk_relid)
LockRelationOid(cxt.compress_ht->main_table_relid, AccessShareLock);
LockRelationOid(cxt.srcht_chunk->table_id, ShareLock);
/* Perform an analyze on the chunk to get up-to-date stats before compressing */
preserve_uncompressed_chunk_stats(chunk_relid);
/* Disabling autovacuum on chunk which should be empty while in compressed state */
disable_autovacuum_on_chunk(chunk_relid);
/* acquire locks on catalog tables to keep till end of txn */
LockRelationOid(catalog_get_table_id(ts_catalog_get(), HYPERTABLE_COMPRESSION),

@ -20,6 +20,7 @@
#include <funcapi.h>
#include <libpq/pqformat.h>
#include <miscadmin.h>
#include <nodes/pg_list.h>
#include <storage/lmgr.h>
#include <storage/predicate.h>
#include <utils/builtins.h>
@ -390,6 +391,7 @@ static void compress_chunk_populate_sort_info_for_column(Oid table,
const ColumnCompressionInfo *column,
AttrNumber *att_nums, Oid *sort_operator,
Oid *collation, bool *nulls_first);
static void run_analyze_on_chunk(Oid chunk_relid);
static Tuplesortstate *
compress_chunk_sort_relation(Relation in_rel, int n_keys, const ColumnCompressionInfo **keys)
@ -441,6 +443,12 @@ compress_chunk_sort_relation(Relation in_rel, int n_keys, const ColumnCompressio
heap_endscan(heapScan);
/* Perform an analyze on the chunk to get up-to-date stats before compressing.
* We do it at this point because we've just read out the entire chunk into
* tuplesort, so its pages are likely to be cached and we can save on I/O.
*/
run_analyze_on_chunk(in_rel->rd_id);
ExecDropSingleTupleTableSlot(heap_tuple_slot);
tuplesort_performsort(tuplesortstate);
@ -488,6 +496,25 @@ compress_chunk_populate_sort_info_for_column(Oid table, const ColumnCompressionI
ReleaseSysCache(tp);
}
static void
run_analyze_on_chunk(Oid chunk_relid)
{
VacuumRelation vr = {
.type = T_VacuumRelation,
.relation = NULL,
.oid = chunk_relid,
.va_cols = NIL,
};
VacuumStmt vs = {
.type = T_VacuumStmt,
.rels = list_make1(&vr),
.is_vacuumcmd = false,
.options = NIL,
};
ExecVacuum(NULL, &vs, true);
}
/********************
** row_compressor **
********************/