Move ANALYZE after heap scan during compression

Depending on the statistics target, running ANALYZE on a chunk before compression can cause a lot of random IO operations for chunks that are bigger than the number of pages ANALYZE needs to read. By moving that operation after the heap is loaded into memory for sorting, we increase the chance of hitting cache and reducing disk operations necessary to execute compression jobs.
2025-05-16 18:43:18 +08:00 · 2022-09-26 14:53:01 +02:00 · 2022-09-26 14:53:01 +02:00 · cc110a33a2
commit cc110a33a2
parent 9c819882f3
3 changed files with 31 additions and 17 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -27,6 +27,7 @@ argument or resolve the type ambiguity by casting to the intended type.
 * #4720 Fix chunk exclusion for prepared statements and dst changes
 * #4738 Fix the assorted epoll_ctl() errors that could occur with COPY into a distributed hypertable
 * #4739 Fix continuous aggregate migrate check constraint
+* #4756 Improve compression job IO performance
 * #4745 Fix FK constraint violation error while insert into hypertable which references partitioned table

 **Thanks**
--- a/tsl/src/compression/api.c
+++ b/tsl/src/compression/api.c
@ -159,7 +159,7 @@ compresschunkcxt_init(CompressChunkCxt *cxt, Cache *hcache, Oid hypertable_relid
 }

 static void
-preserve_uncompressed_chunk_stats(Oid chunk_relid)
+disable_autovacuum_on_chunk(Oid chunk_relid)
 {
 	AlterTableCmd at_cmd = {
 		.type = T_AlterTableCmd,
@ -167,20 +167,6 @@ preserve_uncompressed_chunk_stats(Oid chunk_relid)
 		.def = (Node *) list_make1(
 			makeDefElem("autovacuum_enabled", (Node *) makeString("false"), -1)),
 	};
-	VacuumRelation vr = {
-		.type = T_VacuumRelation,
-		.relation = NULL,
-		.oid = chunk_relid,
-		.va_cols = NIL,
-	};
-	VacuumStmt vs = {
-		.type = T_VacuumStmt,
-		.rels = list_make1(&vr),
-		.is_vacuumcmd = false,
-		.options = NIL,
-	};
-
-	ExecVacuum(NULL, &vs, true);
 	ts_alter_table_with_event_trigger(chunk_relid, NULL, list_make1(&at_cmd), false);
 }

@ -234,8 +220,8 @@ compress_chunk_impl(Oid hypertable_relid, Oid chunk_relid)
 	LockRelationOid(cxt.compress_ht->main_table_relid, AccessShareLock);
 	LockRelationOid(cxt.srcht_chunk->table_id, ShareLock);

-	/* Perform an analyze on the chunk to get up-to-date stats before compressing */
-	preserve_uncompressed_chunk_stats(chunk_relid);
+	/* Disabling autovacuum on chunk which should be empty while in compressed state */
+	disable_autovacuum_on_chunk(chunk_relid);

 	/* acquire locks on catalog tables to keep till end of txn */
 	LockRelationOid(catalog_get_table_id(ts_catalog_get(), HYPERTABLE_COMPRESSION),
--- a/tsl/src/compression/compression.c
+++ b/tsl/src/compression/compression.c
@ -20,6 +20,7 @@
 #include <funcapi.h>
 #include <libpq/pqformat.h>
 #include <miscadmin.h>
+#include <nodes/pg_list.h>
 #include <storage/lmgr.h>
 #include <storage/predicate.h>
 #include <utils/builtins.h>
@ -390,6 +391,7 @@ static void compress_chunk_populate_sort_info_for_column(Oid table,
 														 const ColumnCompressionInfo *column,
 														 AttrNumber *att_nums, Oid *sort_operator,
 														 Oid *collation, bool *nulls_first);
+static void run_analyze_on_chunk(Oid chunk_relid);

 static Tuplesortstate *
 compress_chunk_sort_relation(Relation in_rel, int n_keys, const ColumnCompressionInfo **keys)
@ -441,6 +443,12 @@ compress_chunk_sort_relation(Relation in_rel, int n_keys, const ColumnCompressio

 	heap_endscan(heapScan);

+	/* Perform an analyze on the chunk to get up-to-date stats before compressing.
+	 * We do it at this point because we've just read out the entire chunk into
+	 * tuplesort, so its pages are likely to be cached and we can save on I/O.
+	 */
+	run_analyze_on_chunk(in_rel->rd_id);
+
 	ExecDropSingleTupleTableSlot(heap_tuple_slot);

 	tuplesort_performsort(tuplesortstate);
@ -488,6 +496,25 @@ compress_chunk_populate_sort_info_for_column(Oid table, const ColumnCompressionI
 	ReleaseSysCache(tp);
 }

+static void
+run_analyze_on_chunk(Oid chunk_relid)
+{
+	VacuumRelation vr = {
+		.type = T_VacuumRelation,
+		.relation = NULL,
+		.oid = chunk_relid,
+		.va_cols = NIL,
+	};
+	VacuumStmt vs = {
+		.type = T_VacuumStmt,
+		.rels = list_make1(&vr),
+		.is_vacuumcmd = false,
+		.options = NIL,
+	};
+
+	ExecVacuum(NULL, &vs, true);
+}
+
 /********************
 ** row_compressor **
 ********************/