From 827684f3e2718c09e93914d60b88e79488374b33 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 15 Mar 2023 17:27:32 +0400 Subject: [PATCH] Use prepared statements for parameterized data node scans This allows us to avoid replanning the inner query on each new loop, speeding up the joins. --- .git-blame-ignore-revs | 6 + src/guc.c | 6 +- src/guc.h | 5 +- src/planner/planner.c | 10 +- test/src/test_utils.c | 77 +++- tsl/src/fdw/data_node_scan_exec.c | 4 + tsl/src/fdw/data_node_scan_plan.c | 27 +- tsl/src/fdw/fdw.c | 7 + tsl/src/fdw/scan_exec.c | 140 ++++-- tsl/src/fdw/scan_exec.h | 14 +- tsl/src/remote/CMakeLists.txt | 4 +- tsl/src/remote/connection.c | 59 ++- tsl/src/remote/copy_fetcher.c | 21 +- tsl/src/remote/cursor_fetcher.c | 14 +- tsl/src/remote/data_fetcher.c | 25 +- tsl/src/remote/data_fetcher.h | 27 +- tsl/src/remote/prepared_statement_fetcher.c | 416 +++++++++++++++++ tsl/src/remote/prepared_statement_fetcher.h | 17 + tsl/test/expected/data_fetcher.out | 29 +- tsl/test/expected/dist_param.out | 117 +++++ tsl/test/expected/dist_remote_error-12.out | 1 + tsl/test/expected/dist_remote_error-13.out | 1 + tsl/test/expected/dist_remote_error-14.out | 430 +++++++++++++++++ tsl/test/expected/dist_remote_error-15.out | 433 ++++++++++++++++++ tsl/test/expected/remote_connection.out | 2 +- .../shared/expected/dist_fetcher_type-12.out | 124 ++++- .../shared/expected/dist_fetcher_type-13.out | 124 ++++- .../shared/expected/dist_fetcher_type-14.out | 124 ++++- .../shared/expected/dist_fetcher_type-15.out | 124 ++++- .../shared/expected/dist_remote_error-12.out | 229 --------- .../shared/expected/dist_remote_error-13.out | 229 --------- .../shared/expected/dist_remote_error-14.out | 229 --------- .../shared/expected/dist_remote_error-15.out | 231 ---------- tsl/test/shared/sql/CMakeLists.txt | 3 +- tsl/test/shared/sql/dist_fetcher_type.sql.in | 80 +++- .../sql/include/dist_remote_error_setup.sql | 79 ---- tsl/test/sql/.gitignore | 1 + tsl/test/sql/CMakeLists.txt | 1 + tsl/test/sql/data_fetcher.sql | 17 +- tsl/test/sql/dist_param.sql | 83 ++++ .../{shared => }/sql/dist_remote_error.sql.in | 173 +++++-- .../sql/include/dist_remote_error_setup.sql | 122 +++++ 42 files changed, 2685 insertions(+), 1180 deletions(-) create mode 100644 tsl/src/remote/prepared_statement_fetcher.c create mode 100644 tsl/src/remote/prepared_statement_fetcher.h create mode 120000 tsl/test/expected/dist_remote_error-12.out create mode 120000 tsl/test/expected/dist_remote_error-13.out create mode 100644 tsl/test/expected/dist_remote_error-14.out create mode 100644 tsl/test/expected/dist_remote_error-15.out delete mode 100644 tsl/test/shared/expected/dist_remote_error-12.out delete mode 100644 tsl/test/shared/expected/dist_remote_error-13.out delete mode 100644 tsl/test/shared/expected/dist_remote_error-14.out delete mode 100644 tsl/test/shared/expected/dist_remote_error-15.out delete mode 100644 tsl/test/shared/sql/include/dist_remote_error_setup.sql rename tsl/test/{shared => }/sql/dist_remote_error.sql.in (53%) create mode 100644 tsl/test/sql/include/dist_remote_error_setup.sql diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 7f24a0d0c..42e46cc19 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -24,3 +24,9 @@ a4356f342f1732857a1d8057f71219b50f1919b2 # Cosmetic changes to create.c 230f368f4e5d146ce5f919cc5999b236997befaf +# Adding python and yaml linters +9133319081aef92705f1405087822fc281d215d4 +44cd71a602ba96029001de6e97a1b44488730080 +f75a51def79796ff7fef58ec950c859fe4e71618 +21a3f8206c0de98932867096637c7d1e3d04d925 + diff --git a/src/guc.c b/src/guc.c index e4a9eb80e..34cdc89b9 100644 --- a/src/guc.c +++ b/src/guc.c @@ -42,9 +42,10 @@ static const struct config_enum_entry telemetry_level_options[] = { #endif static const struct config_enum_entry remote_data_fetchers[] = { + { "auto", AutoFetcherType, false }, { "copy", CopyFetcherType, false }, { "cursor", CursorFetcherType, false }, - { "auto", AutoFetcherType, false }, + { "prepared", PreparedStatementFetcherType, false }, { NULL, 0, false } }; @@ -337,8 +338,7 @@ _guc_init(void) DefineCustomBoolVariable("timescaledb.enable_parameterized_data_node_scan", "Enable parameterized data node scans", "Disable this as a workaround in case these plans are incorrectly " - "chosen " - "by the query planner when they are suboptimal", + "chosen by the query planner when they are suboptimal", &ts_guc_enable_parameterized_data_node_scan, true, PGC_USERSET, diff --git a/src/guc.h b/src/guc.h index a4268306a..97ae85488 100644 --- a/src/guc.h +++ b/src/guc.h @@ -61,9 +61,10 @@ extern TSDLLEXPORT bool ts_guc_enable_compression_indexscan; typedef enum DataFetcherType { - CursorFetcherType, + AutoFetcherType = 1, /* Skip 0 to better catch uninitialized values. */ CopyFetcherType, - AutoFetcherType, + CursorFetcherType, + PreparedStatementFetcherType, } DataFetcherType; extern TSDLLEXPORT DataFetcherType ts_guc_remote_data_fetcher; diff --git a/src/planner/planner.c b/src/planner/planner.c index 034328175..3a3187317 100644 --- a/src/planner/planner.c +++ b/src/planner/planner.c @@ -538,12 +538,14 @@ timescaledb_planner(Query *parse, int cursor_opts, ParamListInfo bound_params) if (context.num_distributed_tables >= 2) { - if (ts_guc_remote_data_fetcher == CopyFetcherType) + if (ts_guc_remote_data_fetcher != CursorFetcherType && + ts_guc_remote_data_fetcher != AutoFetcherType) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("COPY fetcher not supported"), - errhint("COPY fetching of data is not supported in " + errmsg("only cursor fetcher is supported for this query"), + errhint("COPY or prepared statement fetching of data is not " + "supported in " "queries with multiple distributed hypertables." " Use cursor fetcher instead."))); } @@ -561,6 +563,8 @@ timescaledb_planner(Query *parse, int cursor_opts, ParamListInfo bound_params) } } } + + Assert(ts_data_node_fetcher_scan_type != AutoFetcherType); } if (prev_planner_hook != NULL) diff --git a/test/src/test_utils.c b/test/src/test_utils.c index 0add9df35..979622e05 100644 --- a/test/src/test_utils.c +++ b/test/src/test_utils.c @@ -87,7 +87,7 @@ ts_test_error_injection(PG_FUNCTION_ARGS) } static int -throw_after_n_rows(int max_rows, int severity) +transaction_row_counter(void) { static LocalTransactionId last_lxid = 0; static int rows_seen = 0; @@ -99,7 +99,13 @@ throw_after_n_rows(int max_rows, int severity) last_lxid = MyProc->lxid; } - rows_seen++; + return rows_seen++; +} + +static int +throw_after_n_rows(int max_rows, int severity) +{ + int rows_seen = transaction_row_counter(); if (max_rows <= rows_seen) { @@ -124,6 +130,24 @@ ts_debug_shippable_fatal_after_n_rows(PG_FUNCTION_ARGS) PG_RETURN_INT32(throw_after_n_rows(PG_GETARG_INT32(0), FATAL)); } +/* + * After how many rows should we error out according to the user-set option. + */ +static int +get_error_after_rows() +{ + int error_after = 7103; /* default is an arbitrary prime */ + + const char *error_after_option = + GetConfigOption("timescaledb.debug_broken_sendrecv_error_after", true, false); + if (error_after_option) + { + error_after = pg_strtoint32(error_after_option); + } + + return error_after; +} + /* * Broken send/receive functions for int4 that throw after an (arbitrarily * chosen prime or configured) number of rows. @@ -131,21 +155,12 @@ ts_debug_shippable_fatal_after_n_rows(PG_FUNCTION_ARGS) static void broken_sendrecv_throw() { - int throw_after = 7103; /* an arbitrary prime */ - const char *throw_after_option = - GetConfigOption("timescaledb.debug_broken_sendrecv_throw_after", true, false); - - if (throw_after_option) - { - throw_after = pg_strtoint32(throw_after_option); - } - /* * Use ERROR, not FATAL, because PG versions < 14 are unable to report a * FATAL error to the access node before closing the connection, so the test * results would be different. */ - (void) throw_after_n_rows(throw_after, ERROR); + (void) throw_after_n_rows(get_error_after_rows(), ERROR); } TS_FUNCTION_INFO_V1(ts_debug_broken_int4recv); @@ -166,11 +181,25 @@ ts_debug_broken_int4send(PG_FUNCTION_ARGS) return int4send(fcinfo); } -TS_FUNCTION_INFO_V1(ts_debug_sleepy_int4recv); +/* An incorrect int4out that sometimes returns not a number. */ +TS_FUNCTION_INFO_V1(ts_debug_incorrect_int4out); -/* Sleep after some rows. */ Datum -ts_debug_sleepy_int4recv(PG_FUNCTION_ARGS) +ts_debug_incorrect_int4out(PG_FUNCTION_ARGS) +{ + int rows_seen = transaction_row_counter(); + + if (rows_seen >= get_error_after_rows()) + { + PG_RETURN_CSTRING("surprise"); + } + + return int4out(fcinfo); +} + +/* Sleeps after a certain number of calls. */ +static void +ts_debug_sleepy_function() { static LocalTransactionId last_lxid = 0; static int rows_seen = 0; @@ -184,7 +213,7 @@ ts_debug_sleepy_int4recv(PG_FUNCTION_ARGS) rows_seen++; - if (rows_seen >= 1000) + if (rows_seen >= 997) { (void) WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, @@ -194,10 +223,26 @@ ts_debug_sleepy_int4recv(PG_FUNCTION_ARGS) rows_seen = 0; } +} +TS_FUNCTION_INFO_V1(ts_debug_sleepy_int4recv); + +Datum +ts_debug_sleepy_int4recv(PG_FUNCTION_ARGS) +{ + ts_debug_sleepy_function(); return int4recv(fcinfo); } +TS_FUNCTION_INFO_V1(ts_debug_sleepy_int4send); + +Datum +ts_debug_sleepy_int4send(PG_FUNCTION_ARGS) +{ + ts_debug_sleepy_function(); + return int4send(fcinfo); +} + TS_FUNCTION_INFO_V1(ts_bgw_wait); Datum ts_bgw_wait(PG_FUNCTION_ARGS) diff --git a/tsl/src/fdw/data_node_scan_exec.c b/tsl/src/fdw/data_node_scan_exec.c index 7e24b6d18..3a596030d 100644 --- a/tsl/src/fdw/data_node_scan_exec.c +++ b/tsl/src/fdw/data_node_scan_exec.c @@ -44,6 +44,9 @@ data_node_scan_begin(CustomScanState *node, EState *estate, int eflags) List *fdw_exprs = linitial(cscan->custom_exprs); List *recheck_quals = lsecond(cscan->custom_exprs); List *fdw_private = list_nth(cscan->custom_private, DataNodeScanFdwPrivate); + sss->fsstate.planned_fetcher_type = + intVal(list_nth(cscan->custom_private, DataNodeScanFetcherType)); + Assert(sss->fsstate.planned_fetcher_type != AutoFetcherType); if ((eflags & EXEC_FLAG_EXPLAIN_ONLY) && !ts_guc_enable_remote_explain) return; @@ -167,5 +170,6 @@ data_node_scan_state_create(CustomScan *cscan) dnss->async_state.fetch_data = fetch_data; dnss->fsstate.planned_fetcher_type = intVal(list_nth(cscan->custom_private, DataNodeScanFetcherType)); + Assert(dnss->fsstate.planned_fetcher_type != AutoFetcherType); return (Node *) dnss; } diff --git a/tsl/src/fdw/data_node_scan_plan.c b/tsl/src/fdw/data_node_scan_plan.c index 5819f475c..17f400595 100644 --- a/tsl/src/fdw/data_node_scan_plan.c +++ b/tsl/src/fdw/data_node_scan_plan.c @@ -1669,11 +1669,34 @@ data_node_scan_plan_create(PlannerInfo *root, RelOptInfo *rel, CustomPath *best_ "columns."))); /* Should have determined the fetcher type by now. */ - Assert(ts_data_node_fetcher_scan_type != AutoFetcherType); + DataFetcherType fetcher_type = ts_data_node_fetcher_scan_type; + Assert(fetcher_type != AutoFetcherType); + + /* Check if we should use prepared statement data fetcher. */ + if (fetcher_type == CopyFetcherType && list_length(scaninfo.params_list) > 0 && + ts_guc_remote_data_fetcher == AutoFetcherType) + { + /* + * The path is parameterized by either Nested Loop params or InitPlan + * params. We can distinguish the join by presence of Path.param_info. + * + * For joins, it is optimal to use Prepared Statement fetcher, because + * this plan is likely to be ran multiple times, and this avoids + * re-planning the query on each inner loop. + * + * For InitPlans, COPY fetcher would be more optimal. Now it's not + * technically possible to use it, because the COPY statements cannot be + * parameterized. We need support for this case in deparsing, to encode + * the parameter values into the query itself. For now, also use the + * Prepared Statement fetcher for this case, because it does not prevent + * parallelism, unlike Cursor. + */ + fetcher_type = PreparedStatementFetcherType; + } cscan->custom_private = list_make3(scaninfo.fdw_private, list_make1_int(scaninfo.systemcol), - makeInteger(ts_data_node_fetcher_scan_type)); + makeInteger(fetcher_type)); return &cscan->scan.plan; } diff --git a/tsl/src/fdw/fdw.c b/tsl/src/fdw/fdw.c index 4f2369843..773c757b2 100644 --- a/tsl/src/fdw/fdw.c +++ b/tsl/src/fdw/fdw.c @@ -168,6 +168,13 @@ begin_foreign_scan(ForeignScanState *node, int eflags) node->fdw_state = (TsFdwScanState *) palloc0(sizeof(TsFdwScanState)); + /* + * This is a per-chunk FDW scan, not per-data-node scan, so we're going to + * scan multiple tables per data node, so we only can use the cursor data + * fetcher. + */ + ((TsFdwScanState *) node->fdw_state)->planned_fetcher_type = CursorFetcherType; + fdw_scan_init(&node->ss, node->fdw_state, fsplan->fs_relids, diff --git a/tsl/src/fdw/scan_exec.c b/tsl/src/fdw/scan_exec.c index 78b74e404..44b809cd6 100644 --- a/tsl/src/fdw/scan_exec.c +++ b/tsl/src/fdw/scan_exec.c @@ -20,6 +20,7 @@ #include "utils.h" #include "remote/data_fetcher.h" #include "remote/copy_fetcher.h" +#include "remote/prepared_statement_fetcher.h" #include "remote/cursor_fetcher.h" #include "guc.h" #include "planner.h" @@ -114,6 +115,7 @@ create_data_fetcher(ScanState *ss, TsFdwScanState *fsstate) { oldcontext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); fill_query_params_array(econtext, fsstate->param_flinfo, fsstate->param_exprs, values); + MemoryContextSwitchTo(oldcontext); /* * Notice that we do not specify param types, thus forcing the data @@ -124,58 +126,21 @@ create_data_fetcher(ScanState *ss, TsFdwScanState *fsstate) * types. */ params = stmt_params_create_from_values(values, num_params); - MemoryContextSwitchTo(oldcontext); } oldcontext = MemoryContextSwitchTo(econtext->ecxt_per_query_memory); - TupleFactory *tf = tuplefactory_create_for_scan(ss, fsstate->retrieved_attrs); - - if (!tuplefactory_is_binary(tf) && fsstate->planned_fetcher_type == CopyFetcherType) - { - if (ts_guc_remote_data_fetcher == AutoFetcherType) - { - /* - * The user-set fetcher type was auto, and the planner decided to - * use COPY fetcher, but at execution time (now) we found out - * there is no binary serialization for some data types. In this - * case we can revert to cursor fetcher which supports text - * serialization. - */ - fsstate->planned_fetcher_type = CursorFetcherType; - } - else - { - ereport(ERROR, - (errmsg("cannot use COPY fetcher because some of the column types do not " - "have binary serialization"))); - } - } - - /* - * COPY fetcher uses COPY statement that don't work with prepared - * statements. If this plan is parameterized, this means we'll have to - * revert to cursor fetcher. - */ - if (num_params > 0 && fsstate->planned_fetcher_type == CopyFetcherType) - { - if (ts_guc_remote_data_fetcher == AutoFetcherType) - { - fsstate->planned_fetcher_type = CursorFetcherType; - } - else - { - ereport(ERROR, - (errmsg("cannot use COPY fetcher because the plan is parameterized"), - errhint("Set \"timescaledb.remote_data_fetcher\" to \"cursor\" to explicitly " - "set the fetcher type or use \"auto\" to select the fetcher type " - "automatically."))); - } - } - if (fsstate->planned_fetcher_type == CursorFetcherType) { - fetcher = cursor_fetcher_create_for_scan(fsstate->conn, fsstate->query, params, tf); + fetcher = + cursor_fetcher_create_for_scan(fsstate->conn, fsstate->query, params, fsstate->tf); + } + else if (fsstate->planned_fetcher_type == PreparedStatementFetcherType) + { + fetcher = prepared_statement_fetcher_create_for_scan(fsstate->conn, + fsstate->query, + params, + fsstate->tf); } else { @@ -184,7 +149,7 @@ create_data_fetcher(ScanState *ss, TsFdwScanState *fsstate) * point, so we shouldn't see 'auto' here. */ Assert(fsstate->planned_fetcher_type == CopyFetcherType); - fetcher = copy_fetcher_create_for_scan(fsstate->conn, fsstate->query, params, tf); + fetcher = copy_fetcher_create_for_scan(fsstate->conn, fsstate->query, params, fsstate->tf); } fsstate->fetcher = fetcher; @@ -319,6 +284,58 @@ fdw_scan_init(ScanState *ss, TsFdwScanState *fsstate, Bitmapset *scanrelids, Lis &fsstate->param_values); fsstate->fetcher = NULL; + + fsstate->tf = tuplefactory_create_for_scan(ss, fsstate->retrieved_attrs); + + Assert(fsstate->planned_fetcher_type != AutoFetcherType); + + /* + * If the planner tells us to use the cursor fetcher because there are + * multiple distributed hypertables per query, we have no other option. + */ + if (fsstate->planned_fetcher_type == CursorFetcherType) + { + return; + } + + if (!tuplefactory_is_binary(fsstate->tf) && fsstate->planned_fetcher_type == CopyFetcherType) + { + if (ts_guc_remote_data_fetcher == AutoFetcherType) + { + /* + * The user-set fetcher type was auto, and the planner decided to + * use COPY fetcher, but at execution time (now) we found out + * there is no binary serialization for some data types. In this + * case we can revert to cursor fetcher which supports text + * serialization. + */ + fsstate->planned_fetcher_type = CursorFetcherType; + } + else + { + ereport(ERROR, + (errmsg("cannot use COPY fetcher because some of the column types do not " + "have binary serialization"))); + } + } + + /* + * COPY fetcher uses COPY statement that don't work with prepared + * statements. We only end up here in case the COPY fetcher was chosen by + * the user, so error out. + * Note that this can be optimized for parameters coming from initplans, + * where the parameter takes only one value and technically we could deparse + * it into the query string and use a non-parameterized COPY statement. + */ + if (num_params > 0 && fsstate->planned_fetcher_type == CopyFetcherType) + { + Assert(ts_guc_remote_data_fetcher == CopyFetcherType); + ereport(ERROR, + (errmsg("cannot use COPY fetcher because the plan is parameterized"), + errhint("Set \"timescaledb.remote_data_fetcher\" to \"cursor\" to explicitly " + "set the fetcher type or use \"auto\" to select the fetcher type " + "automatically."))); + } } TupleTableSlot * @@ -343,6 +360,7 @@ fdw_scan_rescan(ScanState *ss, TsFdwScanState *fsstate) /* If we haven't created the cursor yet, nothing to do. */ if (NULL == fsstate->fetcher) return; + /* * If any internal parameters affecting this node have changed, we'd * better destroy and recreate the cursor. Otherwise, rewinding it should @@ -351,11 +369,33 @@ fdw_scan_rescan(ScanState *ss, TsFdwScanState *fsstate) */ if (ss->ps.chgParam != NULL) { - data_fetcher_free(fsstate->fetcher); - fsstate->fetcher = NULL; + int num_params = fsstate->num_params; + Assert(num_params > 0); + + ExprContext *econtext = ss->ps.ps_ExprContext; + + /* + * Construct array of query parameter values in text format. + */ + const char **values = fsstate->param_values; + fill_query_params_array(econtext, fsstate->param_flinfo, fsstate->param_exprs, values); + + /* + * Notice that we do not specify param types, thus forcing the data + * node to infer types for all parameters. Since we explicitly cast + * every parameter (see deparse.c), the "inference" is trivial and + * will produce the desired result. This allows us to avoid assuming + * that the data node has the same OIDs we do for the parameters' + * types. + */ + StmtParams *params = stmt_params_create_from_values(values, num_params); + + fetcher->funcs->rescan(fsstate->fetcher, params); } else + { fetcher->funcs->rewind(fsstate->fetcher); + } } void @@ -448,6 +488,8 @@ explain_fetcher_type(DataFetcherType type) return "COPY"; case CursorFetcherType: return "Cursor"; + case PreparedStatementFetcherType: + return "Prepared statement"; default: Assert(false); return ""; diff --git a/tsl/src/fdw/scan_exec.h b/tsl/src/fdw/scan_exec.h index e77c6a321..59bd56183 100644 --- a/tsl/src/fdw/scan_exec.h +++ b/tsl/src/fdw/scan_exec.h @@ -31,7 +31,8 @@ typedef struct TsFdwScanState List *retrieved_attrs; /* list of retrieved attribute numbers */ /* for remote query execution */ - struct TSConnection *conn; /* connection for the scan */ + struct TSConnection *conn; /* connection for the scan */ + TupleFactory *tf; struct DataFetcher *fetcher; /* fetches tuples from data node */ int num_params; /* number of parameters passed to query */ FmgrInfo *param_flinfo; /* output conversion functions for them */ @@ -39,10 +40,13 @@ typedef struct TsFdwScanState const char **param_values; /* textual values of query parameters */ int fetch_size; /* number of tuples per fetch */ /* - * The type of data fetcher to use. Note that we still can revert to - * cursor fetcher if COPY fetcher was chosen automatically, but binary - * serialization turns out to be unavailable for some of the data types. We - * only check this when we execute the query. + * The type of data fetcher to use as determined by the planner. Can be + * either Cursor when there are multiple distributed hypertables, or COPY. + * Note that we still can revert to cursor fetcher if binary serialization + * is unavailable for some data types. We can also prefer the prepared + * statement data fetcher when the query is parameterized. We only check + * this when we execute the query. + * */ DataFetcherType planned_fetcher_type; int row_counter; diff --git a/tsl/src/remote/CMakeLists.txt b/tsl/src/remote/CMakeLists.txt index 1bb485dde..9a55dffa9 100644 --- a/tsl/src/remote/CMakeLists.txt +++ b/tsl/src/remote/CMakeLists.txt @@ -2,6 +2,7 @@ set(SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/async.c ${CMAKE_CURRENT_SOURCE_DIR}/connection.c ${CMAKE_CURRENT_SOURCE_DIR}/connection_cache.c + ${CMAKE_CURRENT_SOURCE_DIR}/copy_fetcher.c ${CMAKE_CURRENT_SOURCE_DIR}/cursor_fetcher.c ${CMAKE_CURRENT_SOURCE_DIR}/data_fetcher.c ${CMAKE_CURRENT_SOURCE_DIR}/data_format.c @@ -9,14 +10,15 @@ set(SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/dist_commands.c ${CMAKE_CURRENT_SOURCE_DIR}/dist_copy.c ${CMAKE_CURRENT_SOURCE_DIR}/dist_ddl.c + ${CMAKE_CURRENT_SOURCE_DIR}/prepared_statement_fetcher.c ${CMAKE_CURRENT_SOURCE_DIR}/copy_fetcher.c ${CMAKE_CURRENT_SOURCE_DIR}/healthcheck.c ${CMAKE_CURRENT_SOURCE_DIR}/stmt_params.c ${CMAKE_CURRENT_SOURCE_DIR}/tuplefactory.c ${CMAKE_CURRENT_SOURCE_DIR}/txn.c - ${CMAKE_CURRENT_SOURCE_DIR}/txn_store.c ${CMAKE_CURRENT_SOURCE_DIR}/txn_id.c ${CMAKE_CURRENT_SOURCE_DIR}/txn_resolve.c + ${CMAKE_CURRENT_SOURCE_DIR}/txn_store.c ${CMAKE_CURRENT_SOURCE_DIR}/utils.c) target_sources(${TSL_LIBRARY_NAME} PRIVATE ${SOURCES}) target_include_directories(${TSL_LIBRARY_NAME} PRIVATE ${PG_INCLUDEDIR}) diff --git a/tsl/src/remote/connection.c b/tsl/src/remote/connection.c index 0b9b2c152..c4bc8ad5d 100644 --- a/tsl/src/remote/connection.c +++ b/tsl/src/remote/connection.c @@ -190,6 +190,18 @@ fill_simple_error(TSConnectionError *err, int errcode, const char *errmsg, const err->errcode = errcode; err->msg = errmsg; + if (err->msg == NULL || strlen(err->msg) == 0) + { + char *connection_message = PQerrorMessage(conn->pg_conn); + if (connection_message) + { + err->msg = pstrdup(connection_message); + } + else + { + err->msg = "unknown error"; + } + } err->host = pstrdup(PQhost(conn->pg_conn)); err->nodename = pstrdup(remote_connection_node_name(conn)); @@ -324,8 +336,32 @@ fill_result_error(TSConnectionError *err, int errcode, const char *errmsg, const err->remote.hint = get_error_field_copy(res, PG_DIAG_MESSAGE_HINT); err->remote.context = get_error_field_copy(res, PG_DIAG_CONTEXT); err->remote.stmtpos = get_error_field_copy(res, PG_DIAG_STATEMENT_POSITION); - if (err->remote.msg == NULL) - err->remote.msg = pstrdup(PQresultErrorMessage(res)); + /* + * Try to find at least some non-empty error message. The result error + * message may be not set if a node segfaults. + */ + if (err->remote.msg == NULL || strlen(err->remote.msg) == 0) + { + char *result_message = PQresultErrorMessage(res); + if (result_message && strlen(result_message)) + { + err->remote.msg = pstrdup(result_message); + } + } + + if (err->remote.msg == NULL || strlen(err->remote.msg) == 0) + { + char *connection_message = PQerrorMessage(entry->conn->pg_conn); + if (connection_message && strlen(connection_message)) + { + err->remote.msg = pstrdup(connection_message); + } + } + + if (err->remote.msg == NULL || strlen(err->remote.msg) == 0) + { + err->remote.msg = "unknown error"; + } sqlstate = err->remote.sqlstate; @@ -708,6 +744,11 @@ static const char *default_connection_options[] = { "SET datestyle = ISO", "SET intervalstyle = postgres", "SET extra_float_digits = 3", + /* + * Prepared statement data fetcher sets it to "force" which might be + * suboptimal for other kinds of queries. + */ + "RESET plan_cache_mode", "SET statement_timeout = 0", NULL, }; @@ -910,13 +951,13 @@ remote_connection_node_name(const TSConnection *conn) void remote_connection_get_error(const TSConnection *conn, TSConnectionError *err) { - fill_connection_error(err, ERRCODE_CONNECTION_FAILURE, "", conn); + fill_connection_error(err, ERRCODE_CONNECTION_FAILURE, NULL, conn); } void remote_connection_get_result_error(const PGresult *res, TSConnectionError *err) { - fill_result_error(err, ERRCODE_CONNECTION_EXCEPTION, "", res); + fill_result_error(err, ERRCODE_CONNECTION_EXCEPTION, NULL, res); } static long @@ -2265,13 +2306,13 @@ remote_connection_begin_copy(TSConnection *conn, const char *copycmd, bool binar #ifndef NDEBUG /* Set some variables for testing. */ - const char *throw_after_option = - GetConfigOption("timescaledb.debug_broken_sendrecv_throw_after", true, false); - if (throw_after_option) + const char *error_after_option = + GetConfigOption("timescaledb.debug_broken_sendrecv_error_after", true, false); + if (error_after_option) { res = PQexec(pg_conn, - psprintf("set timescaledb.debug_broken_sendrecv_throw_after = '%s';", - throw_after_option)); + psprintf("set timescaledb.debug_broken_sendrecv_error_after = '%s';", + error_after_option)); if (PQresultStatus(res) != PGRES_COMMAND_OK) { remote_connection_get_result_error(res, err); diff --git a/tsl/src/remote/copy_fetcher.c b/tsl/src/remote/copy_fetcher.c index 88ab4e3b6..04a303df4 100644 --- a/tsl/src/remote/copy_fetcher.c +++ b/tsl/src/remote/copy_fetcher.c @@ -31,17 +31,18 @@ static int copy_fetcher_fetch_data(DataFetcher *df); static void copy_fetcher_set_fetch_size(DataFetcher *df, int fetch_size); static void copy_fetcher_set_tuple_memcontext(DataFetcher *df, MemoryContext mctx); static void copy_fetcher_store_next_tuple(DataFetcher *df, TupleTableSlot *slot); -static void copy_fetcher_rescan(DataFetcher *df); +static void copy_fetcher_rewind(DataFetcher *df); static void copy_fetcher_close(DataFetcher *df); static DataFetcherFuncs funcs = { - .send_fetch_request = copy_fetcher_send_fetch_request, + .close = copy_fetcher_close, .fetch_data = copy_fetcher_fetch_data, + .rescan = data_fetcher_rescan, + .rewind = copy_fetcher_rewind, + .send_fetch_request = copy_fetcher_send_fetch_request, .set_fetch_size = copy_fetcher_set_fetch_size, .set_tuple_mctx = copy_fetcher_set_tuple_memcontext, .store_next_tuple = copy_fetcher_store_next_tuple, - .rewind = copy_fetcher_rescan, - .close = copy_fetcher_close, }; static void @@ -113,7 +114,7 @@ copy_fetcher_send_fetch_request(DataFetcher *df) * Single-row mode doesn't really influence the COPY queries, but setting * it here is a convenient way to prevent concurrent COPY requests on the * same connection. This can happen if we have multiple tables on the same - * data node and still use the row-by-row fetcher. + * data node and still use the COPY fetcher. */ if (!async_request_set_single_row_mode(req)) { @@ -122,7 +123,7 @@ copy_fetcher_send_fetch_request(DataFetcher *df) errmsg("could not set single-row mode on connection to \"%s\"", remote_connection_node_name(fetcher->state.conn)), errdetail("The aborted statement is: %s.", fetcher->state.stmt), - errhint("Copy fetcher is not supported together with sub-queries." + errhint("COPY fetcher is not supported together with sub-queries." " Use cursor fetcher instead."))); } @@ -544,6 +545,12 @@ copy_fetcher_complete(CopyFetcher *fetcher) attconv->typmods[att]); nulls[att] = false; } + + /* + * We expect one row per message here, check that no data is + * left. + */ + Assert(copy_data.cursor = copy_data.len); } MemoryContextSwitchTo(fetcher->state.batch_mctx); PQfreemem(copy_data.data); @@ -670,7 +677,7 @@ copy_fetcher_close(DataFetcher *df) } static void -copy_fetcher_rescan(DataFetcher *df) +copy_fetcher_rewind(DataFetcher *df) { CopyFetcher *fetcher = cast_fetcher(CopyFetcher, df); diff --git a/tsl/src/remote/cursor_fetcher.c b/tsl/src/remote/cursor_fetcher.c index 754be01ec..9c8ba731b 100644 --- a/tsl/src/remote/cursor_fetcher.c +++ b/tsl/src/remote/cursor_fetcher.c @@ -55,13 +55,14 @@ static void cursor_fetcher_rewind(DataFetcher *df); static void cursor_fetcher_close(DataFetcher *df); static DataFetcherFuncs funcs = { - .send_fetch_request = cursor_fetcher_send_fetch_request, + .close = cursor_fetcher_close, .fetch_data = cursor_fetcher_fetch_data, + .rescan = data_fetcher_rescan, + .rewind = cursor_fetcher_rewind, + .send_fetch_request = cursor_fetcher_send_fetch_request, .set_fetch_size = cursor_fetcher_set_fetch_size, .set_tuple_mctx = cursor_fetcher_set_tuple_memcontext, .store_next_tuple = cursor_fetcher_store_next_tuple, - .rewind = cursor_fetcher_rewind, - .close = cursor_fetcher_close, }; static void @@ -327,7 +328,14 @@ cursor_fetcher_fetch_data(DataFetcher *df) return 0; if (!cursor->state.open) + { + if (cursor->create_req == NULL) + { + cursor_create_req(cursor); + } + cursor_fetcher_wait_until_open(df); + } if (cursor->state.data_req == NULL) cursor_fetcher_send_fetch_request(df); diff --git a/tsl/src/remote/data_fetcher.c b/tsl/src/remote/data_fetcher.c index 6a6945e53..84cb2a3cc 100644 --- a/tsl/src/remote/data_fetcher.c +++ b/tsl/src/remote/data_fetcher.c @@ -28,11 +28,13 @@ data_fetcher_init(DataFetcher *df, TSConnection *conn, const char *stmt, StmtPar df->tf = tf; tuplefactory_set_per_tuple_mctx_reset(df->tf, false); - df->batch_mctx = - AllocSetContextCreate(CurrentMemoryContext, "cursor tuple data", ALLOCSET_DEFAULT_SIZES); + df->batch_mctx = AllocSetContextCreate(CurrentMemoryContext, + "data fetcher tuple batch data", + ALLOCSET_DEFAULT_SIZES); df->tuple_mctx = df->batch_mctx; - df->req_mctx = - AllocSetContextCreate(CurrentMemoryContext, "async req/resp", ALLOCSET_DEFAULT_SIZES); + df->req_mctx = AllocSetContextCreate(CurrentMemoryContext, + "data fetcher async request/response", + ALLOCSET_DEFAULT_SIZES); df->fetch_size = DEFAULT_FETCH_SIZE; } @@ -44,7 +46,7 @@ data_fetcher_validate(DataFetcher *df) if (df->next_tuple_idx != 0 && df->next_tuple_idx < df->num_tuples) ereport(ERROR, (errcode(ERRCODE_TS_INTERNAL_ERROR), - errmsg("invalid cursor state. sql: %s", df->stmt), + errmsg("invalid data fetcher state. sql: %s", df->stmt), errhint("Shouldn't fetch new data before consuming existing."))); } @@ -112,6 +114,19 @@ data_fetcher_reset(DataFetcher *df) MemoryContextReset(df->batch_mctx); } +/* + * This is the default implementation of starting the scan with the new + * parameters. It just closes the current scan and updates the parameter + * values, and the next scan is initialized from scratch. The prepared statement + * fetcher is more efficient than that, and reuses the prepared statement. + */ +void +data_fetcher_rescan(DataFetcher *df, StmtParams *params) +{ + df->funcs->close(df); + df->stmt_params = params; +} + void data_fetcher_free(DataFetcher *df) { diff --git a/tsl/src/remote/data_fetcher.h b/tsl/src/remote/data_fetcher.h index 6db6b78af..3a4b861b2 100644 --- a/tsl/src/remote/data_fetcher.h +++ b/tsl/src/remote/data_fetcher.h @@ -20,17 +20,33 @@ typedef struct DataFetcher DataFetcher; typedef struct DataFetcherFuncs { + void (*close)(DataFetcher *data_fetcher); + + /* + * Read data in response to a fetch request. If no request has been sent, + * send it first. + */ + int (*fetch_data)(DataFetcher *data_fetcher); + + /* + * Restart the parameterized remote scan with the new parameter values. + */ + void (*rescan)(DataFetcher *data_fetcher, StmtParams *params); + + /* + * Restart the non-parameterized remote scan. This happens in some nested + * loop-type plans. Ideally we should materialize the data locally in this + * case, probably on plan level by putting a Materialize node above it. + */ + void (*rewind)(DataFetcher *data_fetcher); + /* Send a request for new data. This doesn't read the data itself */ void (*send_fetch_request)(DataFetcher *data_fetcher); - /* Read data in response to a fetch request. If no request has been sent, - * send it first. */ - int (*fetch_data)(DataFetcher *data_fetcher); + /* Set the fetch (batch) size */ void (*set_fetch_size)(DataFetcher *data_fetcher, int fetch_size); void (*set_tuple_mctx)(DataFetcher *data_fetcher, MemoryContext mctx); void (*store_next_tuple)(DataFetcher *data_fetcher, TupleTableSlot *slot); - void (*rewind)(DataFetcher *data_fetcher); - void (*close)(DataFetcher *data_fetcher); } DataFetcherFuncs; typedef struct DataFetcher @@ -71,6 +87,7 @@ extern void data_fetcher_set_fetch_size(DataFetcher *df, int fetch_size); extern void data_fetcher_set_tuple_mctx(DataFetcher *df, MemoryContext mctx); extern void data_fetcher_validate(DataFetcher *df); extern void data_fetcher_reset(DataFetcher *df); +extern void data_fetcher_rescan(DataFetcher *df, StmtParams *params); #ifdef USE_ASSERT_CHECKING static inline DataFetcher * diff --git a/tsl/src/remote/prepared_statement_fetcher.c b/tsl/src/remote/prepared_statement_fetcher.c new file mode 100644 index 000000000..74362714d --- /dev/null +++ b/tsl/src/remote/prepared_statement_fetcher.c @@ -0,0 +1,416 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ + +#include + +#include "prepared_statement_fetcher.h" +#include "tuplefactory.h" +#include "async.h" + +typedef struct PreparedStatementFetcher +{ + DataFetcher state; + + /* Data for virtual tuples of the current retrieved batch. */ + Datum *batch_values; + bool *batch_nulls; +} PreparedStatementFetcher; + +static void prepared_statement_fetcher_send_fetch_request(DataFetcher *df); +static void prepared_statement_fetcher_reset(PreparedStatementFetcher *fetcher); +static int prepared_statement_fetcher_fetch_data(DataFetcher *df); +static void prepared_statement_fetcher_set_fetch_size(DataFetcher *df, int fetch_size); +static void prepared_statement_fetcher_set_tuple_memcontext(DataFetcher *df, MemoryContext mctx); +static void prepared_statement_fetcher_store_next_tuple(DataFetcher *df, TupleTableSlot *slot); +static void prepared_statement_fetcher_rewind(DataFetcher *df); +static void prepared_statement_fetcher_rescan(DataFetcher *df, StmtParams *params); +static void prepared_statement_fetcher_close(DataFetcher *df); + +static DataFetcherFuncs funcs = { + .close = prepared_statement_fetcher_close, + .fetch_data = prepared_statement_fetcher_fetch_data, + .rescan = prepared_statement_fetcher_rescan, + .rewind = prepared_statement_fetcher_rewind, + .send_fetch_request = prepared_statement_fetcher_send_fetch_request, + .set_fetch_size = prepared_statement_fetcher_set_fetch_size, + .set_tuple_mctx = prepared_statement_fetcher_set_tuple_memcontext, + .store_next_tuple = prepared_statement_fetcher_store_next_tuple, +}; + +static void +prepared_statement_fetcher_set_fetch_size(DataFetcher *df, int fetch_size) +{ + PreparedStatementFetcher *fetcher = cast_fetcher(PreparedStatementFetcher, df); + data_fetcher_set_fetch_size(&fetcher->state, fetch_size); +} + +static void +prepared_statement_fetcher_set_tuple_memcontext(DataFetcher *df, MemoryContext mctx) +{ + PreparedStatementFetcher *fetcher = cast_fetcher(PreparedStatementFetcher, df); + data_fetcher_set_tuple_mctx(&fetcher->state, mctx); +} + +static void +prepared_statement_fetcher_reset(PreparedStatementFetcher *fetcher) +{ + /* Drain the connection, reporting any errors. */ + TSConnection *conn = fetcher->state.conn; + PGresult *res; + while ((res = remote_connection_get_result(conn, TS_NO_TIMEOUT)) != NULL) + { + char *sqlstate = PQresultErrorField(res, PG_DIAG_SQLSTATE); + if (sqlstate != NULL && strcmp(sqlstate, "00000") == 0) + { + remote_result_elog(res, ERROR); + } + PQclear(res); + } + + fetcher->state.open = false; + data_fetcher_reset(&fetcher->state); +} + +static void +prepared_statement_fetcher_send_fetch_request(DataFetcher *df) +{ + PreparedStatementFetcher *fetcher = cast_fetcher(PreparedStatementFetcher, df); + + if (fetcher->state.open) + { + /* data request has already been sent */ + Assert(fetcher->state.data_req != NULL); + return; + } + + /* make sure to have a clean state */ + prepared_statement_fetcher_reset(fetcher); + + TSConnection *conn = fetcher->state.conn; + if (remote_connection_get_status(conn) != CONN_IDLE) + { + elog(ERROR, "unexpected activity on data node connection when sending fetch request"); + } + + PGresult *pgres = remote_connection_get_result(conn, TS_NO_TIMEOUT); + if (pgres != NULL) + { + char *sqlstate = PQresultErrorField(pgres, PG_DIAG_SQLSTATE); + if (sqlstate != NULL && strcmp(sqlstate, "00000") == 0) + { + remote_result_elog(pgres, ERROR); + } + + elog(ERROR, + "unexpected activity on data node connection when sending fetch request " + "(PQresultStatus %d)", + PQresultStatus(pgres)); + } + + PGconn *pg_conn = remote_connection_get_pg_conn(conn); + int ret = PQsendQueryPrepared(pg_conn, + /* stmtName = */ "", + stmt_params_num_params(fetcher->state.stmt_params), + stmt_params_values(fetcher->state.stmt_params), + stmt_params_lengths(fetcher->state.stmt_params), + stmt_params_formats(fetcher->state.stmt_params), + tuplefactory_is_binary(fetcher->state.tf) ? FORMAT_BINARY : + FORMAT_TEXT); + + if (ret != 1) + { + TSConnectionError err; + remote_connection_get_error(conn, &err); + remote_connection_error_elog(&err, ERROR); + } + + if (!remote_connection_set_single_row_mode(conn)) + ereport(ERROR, + (errcode(ERRCODE_CONNECTION_FAILURE), + errmsg("could not set single-row mode on connection to \"%s\"", + remote_connection_node_name(fetcher->state.conn)), + errdetail("The aborted statement is: %s.", fetcher->state.stmt), + errhint("Row-by-row fetching of data is not supported together with sub-queries." + " Use cursor fetcher instead."))); + + fetcher->state.data_req = (void *) 1; + fetcher->state.open = true; +} + +/* + * Process response for ongoing async request + */ +static int +prepared_statement_fetcher_complete(PreparedStatementFetcher *fetcher) +{ + MemoryContext oldcontext; + + Assert(fetcher->state.open); + Assert(fetcher->state.data_req != NULL); + + data_fetcher_validate(&fetcher->state); + + /* + * We'll store the tuples in the batch_mctx. First, flush the previous + * batch. + */ + MemoryContextReset(fetcher->state.batch_mctx); + oldcontext = MemoryContextSwitchTo(fetcher->state.batch_mctx); + const int nattrs = tuplefactory_get_nattrs(fetcher->state.tf); + const int total = nattrs * fetcher->state.fetch_size; + fetcher->batch_nulls = palloc(sizeof(bool) * total); + for (int i = 0; i < total; i++) + { + fetcher->batch_nulls[i] = true; + } + fetcher->batch_values = palloc0(sizeof(Datum) * total); + + TSConnection *conn = fetcher->state.conn; + PGconn *pg_conn = remote_connection_get_pg_conn(conn); + if (PQsetnonblocking(pg_conn, 0) != 0) + { + remote_connection_elog(conn, ERROR); + } + + PG_TRY(); + { + int i; + + for (i = 0; i < fetcher->state.fetch_size; i++) + { + PGresult *res; + + res = remote_connection_get_result(conn, TS_NO_TIMEOUT); + + if (!(PQresultStatus(res) == PGRES_SINGLE_TUPLE || + PQresultStatus(res) == PGRES_TUPLES_OK)) + { + remote_result_elog(res, ERROR); + } + + if (PQresultStatus(res) == PGRES_TUPLES_OK) + { + /* fetched all the data */ + Assert(PQntuples(res) == 0); + PQclear(res); + + fetcher->state.eof = true; + break; + } + + Assert(PQresultStatus(res) == PGRES_SINGLE_TUPLE); + /* Allow creating tuples in alternative memory context if user has set + * it explicitly, otherwise same as batch_mctx */ + MemoryContextSwitchTo(fetcher->state.tuple_mctx); + + PG_USED_FOR_ASSERTS_ONLY ItemPointer ctid = + tuplefactory_make_virtual_tuple(fetcher->state.tf, + res, + 0, + PQbinaryTuples(res), + &fetcher->batch_values[i * nattrs], + &fetcher->batch_nulls[i * nattrs]); + + /* + * This fetcher uses virtual tuples that can't hold ctid, so if we're + * receiving a ctid here, we're doing something wrong. + */ + Assert(ctid == NULL); + + PQclear(res); + } + /* We need to manually reset the context since we've turned off per tuple reset */ + tuplefactory_reset_mctx(fetcher->state.tf); + + fetcher->state.num_tuples = i; + fetcher->state.next_tuple_idx = 0; + fetcher->state.batch_count++; + + if (fetcher->state.eof) + { + fetcher->state.data_req = NULL; + } + } + PG_CATCH(); + { + if (NULL != fetcher->state.data_req) + { + fetcher->state.data_req = NULL; + } + + PG_RE_THROW(); + } + PG_END_TRY(); + + MemoryContextSwitchTo(oldcontext); + + return fetcher->state.num_tuples; +} + +static int +prepared_statement_fetcher_fetch_data(DataFetcher *df) +{ + PreparedStatementFetcher *fetcher = cast_fetcher(PreparedStatementFetcher, df); + + if (fetcher->state.eof) + return 0; + + if (!fetcher->state.open) + prepared_statement_fetcher_send_fetch_request(df); + + return prepared_statement_fetcher_complete(fetcher); +} + +static void +prepared_statement_fetcher_store_tuple(DataFetcher *df, int row, TupleTableSlot *slot) +{ + PreparedStatementFetcher *fetcher = cast_fetcher(PreparedStatementFetcher, df); + + ExecClearTuple(slot); + + if (row >= df->num_tuples) + { + if (df->eof || df->funcs->fetch_data(df) == 0) + { + return; + } + + row = 0; + Assert(row == df->next_tuple_idx); + } + + Assert(fetcher->batch_values != NULL); + Assert(fetcher->batch_nulls != NULL); + Assert(row >= 0 && row < df->num_tuples); + + const int nattrs = tuplefactory_get_nattrs(fetcher->state.tf); + slot->tts_values = &fetcher->batch_values[nattrs * row]; + slot->tts_isnull = &fetcher->batch_nulls[nattrs * row]; + ExecStoreVirtualTuple(slot); +} + +static void +prepared_statement_fetcher_store_next_tuple(DataFetcher *df, TupleTableSlot *slot) +{ + prepared_statement_fetcher_store_tuple(df, df->next_tuple_idx, slot); + + if (!TupIsNull(slot)) + df->next_tuple_idx++; + + Assert(df->next_tuple_idx <= df->num_tuples); +} + +DataFetcher * +prepared_statement_fetcher_create_for_scan(TSConnection *conn, const char *stmt, StmtParams *params, + TupleFactory *tf) +{ + PreparedStatementFetcher *fetcher = palloc0(sizeof(PreparedStatementFetcher)); + + data_fetcher_init(&fetcher->state, conn, stmt, params, tf); + fetcher->state.type = PreparedStatementFetcherType; + fetcher->state.funcs = &funcs; + + PGconn *pg_conn = remote_connection_get_pg_conn(conn); + if (remote_connection_get_status(conn) != CONN_IDLE) + { + elog(ERROR, + "unexpected activity on data node connection when creating the row-by-row fetcher"); + } + + /* + * Force using the generic plan for each execution of the data node query, + * because it would be very expensive and pointless to replan it for each + * subsequent parameter value. + */ + PGresult *res = remote_connection_exec(conn, "SET plan_cache_mode = 'force_generic_plan'"); + if (PQresultStatus(res) != PGRES_COMMAND_OK) + { + TSConnectionError err; + remote_connection_get_result_error(res, &err); + remote_connection_error_elog(&err, ERROR); + } + PQclear(res); + + if (1 != PQsendPrepare(pg_conn, + /* stmtName = */ "", + stmt, + stmt_params_num_params(params), + /* paramTypes = */ NULL)) + { + TSConnectionError err; + remote_connection_get_error(conn, &err); + remote_connection_error_elog(&err, ERROR); + } + + res = remote_connection_get_result(conn, TS_NO_TIMEOUT); + if (PQresultStatus(res) != PGRES_COMMAND_OK) + { + TSConnectionError err; + remote_connection_get_result_error(res, &err); + remote_connection_error_elog(&err, ERROR); + } + + PQclear(res); + + return &fetcher->state; +} + +static void +prepared_statement_fetcher_close(DataFetcher *df) +{ + PreparedStatementFetcher *fetcher = cast_fetcher(PreparedStatementFetcher, df); + + if (fetcher->state.open) + { + if (fetcher->state.data_req != NULL) + { + fetcher->state.data_req = NULL; + } + prepared_statement_fetcher_reset(fetcher); + } + else + { + Assert(fetcher->state.data_req == NULL); + Assert(fetcher->state.num_tuples == 0); + +#ifdef USE_ASSERT_CHECKING + TSConnection *conn = fetcher->state.conn; + PGconn *pg_conn = remote_connection_get_pg_conn(conn); + + Assert(PQtransactionStatus(pg_conn) != PQTRANS_ACTIVE); +#endif + } + + PGresult *res = remote_connection_exec(fetcher->state.conn, "RESET plan_cache_mode"); + if (PQresultStatus(res) != PGRES_COMMAND_OK) + { + TSConnectionError err; + remote_connection_get_result_error(res, &err); + remote_connection_error_elog(&err, ERROR); + } + PQclear(res); +} + +static void +prepared_statement_fetcher_rewind(DataFetcher *df) +{ + PreparedStatementFetcher *fetcher = cast_fetcher(PreparedStatementFetcher, df); + + if (fetcher->state.batch_count > 1) + /* we're over the first batch so we need to reset fetcher and restart from clean state */ + prepared_statement_fetcher_reset(fetcher); + else + /* we can reuse current batch of results */ + fetcher->state.next_tuple_idx = 0; +} + +static void +prepared_statement_fetcher_rescan(DataFetcher *df, StmtParams *params) +{ + PreparedStatementFetcher *fetcher = cast_fetcher(PreparedStatementFetcher, df); + prepared_statement_fetcher_reset(fetcher); + df->stmt_params = params; +} diff --git a/tsl/src/remote/prepared_statement_fetcher.h b/tsl/src/remote/prepared_statement_fetcher.h new file mode 100644 index 000000000..eb78d5186 --- /dev/null +++ b/tsl/src/remote/prepared_statement_fetcher.h @@ -0,0 +1,17 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ +#ifndef TIMESCALEDB_TSL_PREPARED_STATEMENT_FETCHER_H +#define TIMESCALEDB_TSL_PREPARED_STATEMENT_FETCHER_H + +#include + +#include "data_fetcher.h" + +extern DataFetcher *prepared_statement_fetcher_create_for_scan(TSConnection *conn, const char *stmt, + StmtParams *params, + TupleFactory *tf); + +#endif /* TIMESCALEDB_TSL_PREPARED_STATEMENT_FETCHER_H */ diff --git a/tsl/test/expected/data_fetcher.out b/tsl/test/expected/data_fetcher.out index a7973d5bf..2a1758216 100644 --- a/tsl/test/expected/data_fetcher.out +++ b/tsl/test/expected/data_fetcher.out @@ -8,9 +8,8 @@ \set TEST_BASE_NAME data_fetcher SELECT format('include/%s_run.sql', :'TEST_BASE_NAME') as "TEST_QUERY_NAME", format('%s/results/%s_results_cursor.out', :'TEST_OUTPUT_DIR', :'TEST_BASE_NAME') as "TEST_RESULTS_CURSOR", - format('%s/results/%s_results_copy.out', :'TEST_OUTPUT_DIR', :'TEST_BASE_NAME') as "TEST_RESULTS_COPY" -\gset -SELECT format('\! diff %s %s', :'TEST_RESULTS_CURSOR', :'TEST_RESULTS_COPY') as "DIFF_CMD" + format('%s/results/%s_results_copy.out', :'TEST_OUTPUT_DIR', :'TEST_BASE_NAME') as "TEST_RESULTS_COPY", + format('%s/results/%s_results_prepared.out', :'TEST_OUTPUT_DIR', :'TEST_BASE_NAME') as "TEST_RESULTS_PREPARED" \gset SET ROLE :ROLE_CLUSTER_SUPERUSER; SELECT node_name, database, node_created, database_created, extension_created @@ -104,6 +103,30 @@ SELECT count(*), count(value) FROM one_batch; SELECT count(*), count(value) FROM one_batch_default; \o -- compare results +SELECT format('\! diff %s %s', :'TEST_RESULTS_CURSOR', :'TEST_RESULTS_COPY') as "DIFF_CMD" +\gset +:DIFF_CMD +-- run queries using prepares statement fetcher +SET timescaledb.remote_data_fetcher = 'prepared'; +\o :TEST_RESULTS_PREPARED +\ir :TEST_QUERY_NAME +-- This file and its contents are licensed under the Timescale License. +-- Please see the included NOTICE for copyright information and +-- LICENSE-TIMESCALE for a copy of the license. +ANALYZE disttable; +SELECT count(*) FROM disttable; +SELECT time_bucket('1 hour', time) AS time, device, avg(temp) +FROM disttable +GROUP BY 1,2 +ORDER BY 1,2; +-- Test for #5323 - ensure that no NULL tuples are generated +-- if the last element of the batch is the file trailer. +SELECT count(*), count(value) FROM one_batch; +SELECT count(*), count(value) FROM one_batch_default; +\o +-- compare results +SELECT format('\! diff %s %s', :'TEST_RESULTS_CURSOR', :'TEST_RESULTS_PREPARED') as "DIFF_CMD" +\gset :DIFF_CMD -- Test custom FDW settings. Instead of the tests above, we are not interersted -- in comparing the results of the fetchers. In the following tests we are diff --git a/tsl/test/expected/dist_param.out b/tsl/test/expected/dist_param.out index e67e234dc..d83e49839 100644 --- a/tsl/test/expected/dist_param.out +++ b/tsl/test/expected/dist_param.out @@ -19,6 +19,7 @@ grant usage on foreign server data_node_1 to public; grant create on schema public to :ROLE_1; set role :ROLE_1; reset client_min_messages; +\set ON_ERROR_STOP 0 -- helper function: float -> pseudorandom float [0..1]. create or replace function mix(x float4) returns float4 as $$ select ((hashfloat4(x) / (pow(2., 31) - 1) + 1) / 2)::float4 $$ language sql; -- distributed hypertable @@ -154,6 +155,122 @@ order by id (21 rows) reset timescaledb.enable_parameterized_data_node_scan; +-- All fetcher types with join +set timescaledb.remote_data_fetcher = 'copy'; +select id, max(value), count(*) +from metric_dist +where id in (select id from metric_name where name like 'cpu%') + and ts between '2022-02-02 02:02:02+03' and '2022-03-03 02:02:02+03' +group by id +order by id +; +ERROR: cannot use COPY fetcher because the plan is parameterized +set timescaledb.remote_data_fetcher = 'cursor'; +select id, max(value), count(*) +from metric_dist +where id in (select id from metric_name where name like 'cpu%') + and ts between '2022-02-02 02:02:02+03' and '2022-03-03 02:02:02+03' +group by id +order by id +; + id | max | count +----+------------------+------- + 1 | 49.9941974878311 | 4174 + 3 | 49.9958902597427 | 4119 + 7 | 49.9881327152252 | 4316 +(3 rows) + +set timescaledb.remote_data_fetcher = 'prepared'; +select id, max(value), count(*) +from metric_dist +where id in (select id from metric_name where name like 'cpu%') + and ts between '2022-02-02 02:02:02+03' and '2022-03-03 02:02:02+03' +group by id +order by id +; + id | max | count +----+------------------+------- + 1 | 49.9941974878311 | 4174 + 3 | 49.9958902597427 | 4119 + 7 | 49.9881327152252 | 4316 +(3 rows) + +-- All fetcher types with initplan +set timescaledb.remote_data_fetcher = 'copy'; +select id, max(value), count(*) +from metric_dist +where id = any((select array_agg(id) from metric_name where name like 'cpu%')::int[]) + and ts between '2022-02-02 02:02:02+03' and '2022-03-03 02:02:02+03' +group by id +order by id +; +ERROR: cannot use COPY fetcher because the plan is parameterized +set timescaledb.remote_data_fetcher = 'cursor'; +select id, max(value), count(*) +from metric_dist +where id = any((select array_agg(id) from metric_name where name like 'cpu%')::int[]) + and ts between '2022-02-02 02:02:02+03' and '2022-03-03 02:02:02+03' +group by id +order by id +; + id | max | count +----+------------------+------- + 1 | 49.9941974878311 | 4174 + 3 | 49.9958902597427 | 4119 + 7 | 49.9881327152252 | 4316 +(3 rows) + +set timescaledb.remote_data_fetcher = 'prepared'; +select id, max(value), count(*) +from metric_dist +where id = any((select array_agg(id) from metric_name where name like 'cpu%')::int[]) + and ts between '2022-02-02 02:02:02+03' and '2022-03-03 02:02:02+03' +group by id +order by id +; + id | max | count +----+------------------+------- + 1 | 49.9941974878311 | 4174 + 3 | 49.9958902597427 | 4119 + 7 | 49.9881327152252 | 4316 +(3 rows) + +-- Should prefer prepared statement data fetcher for these queries. +set timescaledb.remote_data_fetcher = 'auto'; +explain (analyze, verbose, costs off, timing off, summary off) +select id, max(value), count(*) +from metric_dist +where id in (select id from metric_name where name like 'cpu%') + and ts between '2022-02-02 02:02:02+03' and '2022-03-03 02:02:02+03' +group by id +order by id +; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + GroupAggregate (actual rows=3 loops=1) + Output: metric_dist.id, max(metric_dist.value), count(*) + Group Key: metric_dist.id + -> Sort (actual rows=12609 loops=1) + Output: metric_dist.id, metric_dist.value + Sort Key: metric_dist.id + Sort Method: quicksort + -> Nested Loop (actual rows=12609 loops=1) + Output: metric_dist.id, metric_dist.value + -> Index Scan using metric_name_name on public.metric_name (actual rows=3 loops=1) + Output: metric_name.id, metric_name.name + Index Cond: ((metric_name.name >= 'cpu'::text) AND (metric_name.name < 'cpv'::text)) + Filter: (metric_name.name ~~ 'cpu%'::text) + -> Custom Scan (DataNodeScan) on public.metric_dist (actual rows=4203 loops=3) + Output: metric_dist.id, metric_dist.value + Data node: data_node_1 + Fetcher Type: Prepared statement + Chunks: _dist_hyper_1_3_chunk, _dist_hyper_1_16_chunk, _dist_hyper_1_20_chunk, _dist_hyper_1_37_chunk, _dist_hyper_1_52_chunk + Remote SQL: SELECT id, value FROM public.metric_dist WHERE _timescaledb_internal.chunks_in(public.metric_dist.*, ARRAY[3, 16, 20, 37, 52]) AND ((ts >= '2022-02-01 15:02:02-08'::timestamp with time zone)) AND ((ts <= '2022-03-02 15:02:02-08'::timestamp with time zone)) AND (($1::integer = id)) +(19 rows) + +-- Should reset the prepared cache mode after using the prepared statement fetcher. +call distributed_exec('create or replace procedure assert_auto_plan_cache_mode() as $$ begin assert (select setting from pg_settings where name = ''plan_cache_mode'') = ''auto''; end; $$ language plpgsql;'); +call distributed_exec('call assert_auto_plan_cache_mode();'); -- Shippable EC join select name, max(value), count(*) from metric_dist join metric_name using (id) diff --git a/tsl/test/expected/dist_remote_error-12.out b/tsl/test/expected/dist_remote_error-12.out new file mode 120000 index 000000000..a5ad1b0b8 --- /dev/null +++ b/tsl/test/expected/dist_remote_error-12.out @@ -0,0 +1 @@ +dist_remote_error-14.out \ No newline at end of file diff --git a/tsl/test/expected/dist_remote_error-13.out b/tsl/test/expected/dist_remote_error-13.out new file mode 120000 index 000000000..a5ad1b0b8 --- /dev/null +++ b/tsl/test/expected/dist_remote_error-13.out @@ -0,0 +1 @@ +dist_remote_error-14.out \ No newline at end of file diff --git a/tsl/test/expected/dist_remote_error-14.out b/tsl/test/expected/dist_remote_error-14.out new file mode 100644 index 000000000..de42836df --- /dev/null +++ b/tsl/test/expected/dist_remote_error-14.out @@ -0,0 +1,430 @@ +-- This file and its contents are licensed under the Timescale License. +-- Please see the included NOTICE for copyright information and +-- LICENSE-TIMESCALE for a copy of the license. +--\set DATA_NODE_1 data_node_1 +--\set DATA_NODE_2 data_node_2 +--\set DATA_NODE_3 data_node_3 +-- Set up the data nodes. +\set DATA_NODE_1 :TEST_DBNAME _1 +\set DATA_NODE_2 :TEST_DBNAME _2 +\set DATA_NODE_3 :TEST_DBNAME _3 +\c :TEST_DBNAME :ROLE_SUPERUSER +SELECT node_name, database, node_created, database_created, extension_created +FROM ( + SELECT (add_data_node(name, host => 'localhost', DATABASE => name)).* + FROM (VALUES (:'DATA_NODE_1'), (:'DATA_NODE_2'), (:'DATA_NODE_3')) v(name) +) a; + node_name | database | node_created | database_created | extension_created +------------------------+------------------------+--------------+------------------+------------------- + db_dist_remote_error_1 | db_dist_remote_error_1 | t | t | t + db_dist_remote_error_2 | db_dist_remote_error_2 | t | t | t + db_dist_remote_error_3 | db_dist_remote_error_3 | t | t | t +(3 rows) + +GRANT USAGE ON FOREIGN SERVER :DATA_NODE_1, :DATA_NODE_2, :DATA_NODE_3 TO PUBLIC; +GRANT CREATE ON SCHEMA public TO :ROLE_1; +-- Import setup file to data nodes. +\unset ECHO +-- Disable SSL to get stable error output across versions. SSL adds some output +-- that changed in PG 14. +set timescaledb.debug_enable_ssl to off; +set client_min_messages to error; +SET timescaledb.hide_data_node_name_in_errors = 'on'; +-- A relatively big table on one data node +CREATE TABLE metrics_dist_remote_error(filler_1 int, filler_2 int, filler_3 int, time timestamptz NOT NULL, device_id int, v0 int, v1 int, v2 float, v3 float); +SELECT create_distributed_hypertable('metrics_dist_remote_error','time','device_id',3, + data_nodes => ARRAY[:'DATA_NODE_1']); + create_distributed_hypertable +---------------------------------------- + (1,public,metrics_dist_remote_error,t) +(1 row) + +ALTER TABLE metrics_dist_remote_error DROP COLUMN filler_1; +INSERT INTO metrics_dist_remote_error(time,device_id,v0,v1,v2,v3) SELECT time, device_id, device_id+1, device_id + 2, device_id + 0.5, NULL FROM generate_series('2000-01-01 0:00:00+0'::timestamptz,'2000-01-05 23:55:00+0','6m') gtime(time), generate_series(1,5,1) gdevice(device_id); +ALTER TABLE metrics_dist_remote_error DROP COLUMN filler_2; +INSERT INTO metrics_dist_remote_error(time,device_id,v0,v1,v2,v3) SELECT time, device_id, device_id+1, device_id + 2, device_id + 0.5, NULL FROM generate_series('2000-01-06 0:00:00+0'::timestamptz,'2000-01-12 23:55:00+0','6m') gtime(time), generate_series(1,5,1) gdevice(device_id); +ALTER TABLE metrics_dist_remote_error DROP COLUMN filler_3; +INSERT INTO metrics_dist_remote_error(time,device_id,v0,v1,v2,v3) SELECT time, device_id, device_id+1, device_id + 2, device_id + 0.5, NULL FROM generate_series('2000-01-13 0:00:00+0'::timestamptz,'2000-01-19 23:55:00+0','6m') gtime(time), generate_series(1,5,1) gdevice(device_id); +ANALYZE metrics_dist_remote_error; +-- The error messages vary wildly between the Postgres versions, dependent on +-- the particular behavior of libqp in this or that case. The purpose of this +-- test is not to solidify this accidental behavior, but to merely exercise the +-- error handling code to make sure it doesn't have fatal errors. Unfortunately, +-- there is no way to suppress error output from a psql script. +set client_min_messages to ERROR; +\set ON_ERROR_STOP off +set timescaledb.remote_data_fetcher = 'copy'; +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(0, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 0 rows, 0 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(1, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 1 rows, 1 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(2, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 2 rows, 2 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(701, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 701 rows, 701 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 10000 rows, 10000 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(16384, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 16384 rows, 16384 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000000, device_id)::int != 0; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Custom Scan (DataNodeScan) on public.metrics_dist_remote_error (actual rows=22799 loops=1) + Output: 1 + Data node: db_dist_remote_error_1 + Fetcher Type: COPY + Chunks: _dist_hyper_1_1_chunk, _dist_hyper_1_2_chunk, _dist_hyper_1_3_chunk, _dist_hyper_1_4_chunk, _dist_hyper_1_5_chunk, _dist_hyper_1_6_chunk, _dist_hyper_1_7_chunk, _dist_hyper_1_8_chunk, _dist_hyper_1_9_chunk + Remote SQL: SELECT NULL FROM public.metrics_dist_remote_error WHERE _timescaledb_internal.chunks_in(public.metrics_dist_remote_error.*, ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9]) AND ((public.ts_debug_shippable_error_after_n_rows(10000000, device_id) <> 0)) +(6 rows) + +-- We don't test fatal errors here, because PG versions before 14 are unable to +-- report them properly to the access node, so we get different errors in these +-- versions. +-- Now test the same with the cursor fetcher. +set timescaledb.remote_data_fetcher = 'cursor'; +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(0, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 0 rows, 0 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(1, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 1 rows, 1 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(2, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 2 rows, 2 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(701, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 701 rows, 701 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 10000 rows, 10000 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000000, device_id)::int != 0; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Custom Scan (DataNodeScan) on public.metrics_dist_remote_error (actual rows=22799 loops=1) + Output: 1 + Data node: db_dist_remote_error_1 + Fetcher Type: Cursor + Chunks: _dist_hyper_1_1_chunk, _dist_hyper_1_2_chunk, _dist_hyper_1_3_chunk, _dist_hyper_1_4_chunk, _dist_hyper_1_5_chunk, _dist_hyper_1_6_chunk, _dist_hyper_1_7_chunk, _dist_hyper_1_8_chunk, _dist_hyper_1_9_chunk + Remote SQL: SELECT NULL FROM public.metrics_dist_remote_error WHERE _timescaledb_internal.chunks_in(public.metrics_dist_remote_error.*, ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9]) AND ((public.ts_debug_shippable_error_after_n_rows(10000000, device_id) <> 0)) +(6 rows) + +-- Now test the same with the prepared statement fetcher. +set timescaledb.remote_data_fetcher = 'prepared'; +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(0, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 0 rows, 0 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(1, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 1 rows, 1 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(2, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 2 rows, 2 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(701, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 701 rows, 701 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 10000 rows, 10000 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000000, device_id)::int != 0; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Custom Scan (DataNodeScan) on public.metrics_dist_remote_error (actual rows=22799 loops=1) + Output: 1 + Data node: db_dist_remote_error_1 + Fetcher Type: Prepared statement + Chunks: _dist_hyper_1_1_chunk, _dist_hyper_1_2_chunk, _dist_hyper_1_3_chunk, _dist_hyper_1_4_chunk, _dist_hyper_1_5_chunk, _dist_hyper_1_6_chunk, _dist_hyper_1_7_chunk, _dist_hyper_1_8_chunk, _dist_hyper_1_9_chunk + Remote SQL: SELECT NULL FROM public.metrics_dist_remote_error WHERE _timescaledb_internal.chunks_in(public.metrics_dist_remote_error.*, ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9]) AND ((public.ts_debug_shippable_error_after_n_rows(10000000, device_id) <> 0)) +(6 rows) + +reset timescaledb.remote_data_fetcher; +-- Table with broken send for a data type. +create table metrics_dist_bs(like metrics_dist_remote_error); +alter table metrics_dist_bs alter column v0 type bs; +select table_name from create_distributed_hypertable('metrics_dist_bs', + 'time', 'device_id'); + table_name +----------------- + metrics_dist_bs +(1 row) + +set timescaledb.enable_connection_binary_data to off; +insert into metrics_dist_bs + select * from metrics_dist_remote_error; +set timescaledb.enable_connection_binary_data to on; +explain (analyze, verbose, costs off, timing off, summary off) +select * from metrics_dist_bs; +ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen +drop table metrics_dist_bs; +-- Table with broken receive for a data type. +create table metrics_dist_br(like metrics_dist_remote_error); +alter table metrics_dist_br alter column v0 type br; +select table_name from create_distributed_hypertable('metrics_dist_br', + 'time', 'device_id'); + table_name +----------------- + metrics_dist_br +(1 row) + +select hypertable_name, replication_factor from timescaledb_information.hypertables +where hypertable_name = 'metrics_dist_br'; + hypertable_name | replication_factor +-----------------+-------------------- + metrics_dist_br | 1 +(1 row) + +-- Test that INSERT and COPY fail on data nodes. +-- Note that we use the text format for the COPY input, so that the access node +-- doesn't call `recv` and fail by itself. It's going to use binary format for +-- transfer to data nodes regardless of the input format. +set timescaledb.dist_copy_transfer_format = 'binary'; +-- First, create the reference. +\copy (select * from metrics_dist_remote_error) to 'dist_remote_error.text' with (format text); +-- We have to test various interleavings of COPY and INSERT to check that +-- one can recover from connection failure states introduced by another. +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen +insert into metrics_dist_br select * from metrics_dist_remote_error; +ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen +insert into metrics_dist_br select * from metrics_dist_remote_error; +ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen +-- Fail at different points +set timescaledb.debug_broken_sendrecv_error_after = 1; +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +ERROR: []: debug point: requested to error out after 1 rows, 1 rows seen +set timescaledb.debug_broken_sendrecv_error_after = 2; +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +ERROR: []: debug point: requested to error out after 2 rows, 2 rows seen +set timescaledb.debug_broken_sendrecv_error_after = 1023; +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +ERROR: []: debug point: requested to error out after 1023 rows, 1023 rows seen +set timescaledb.debug_broken_sendrecv_error_after = 1024; +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +ERROR: []: debug point: requested to error out after 1024 rows, 1024 rows seen +set timescaledb.debug_broken_sendrecv_error_after = 1025; +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +ERROR: []: debug point: requested to error out after 1025 rows, 1025 rows seen +reset timescaledb.debug_broken_sendrecv_error_after; +-- Same with different replication factor +truncate metrics_dist_br; +select set_replication_factor('metrics_dist_br', 2); + set_replication_factor +------------------------ + +(1 row) + +select hypertable_name, replication_factor from timescaledb_information.hypertables +where hypertable_name = 'metrics_dist_br'; + hypertable_name | replication_factor +-----------------+-------------------- + metrics_dist_br | 2 +(1 row) + +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen +insert into metrics_dist_br select * from metrics_dist_remote_error; +ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen +insert into metrics_dist_br select * from metrics_dist_remote_error; +ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen +set timescaledb.debug_broken_sendrecv_error_after = 1; +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +ERROR: []: debug point: requested to error out after 1 rows, 1 rows seen +set timescaledb.debug_broken_sendrecv_error_after = 2; +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +ERROR: []: debug point: requested to error out after 2 rows, 2 rows seen +set timescaledb.debug_broken_sendrecv_error_after = 1023; +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +ERROR: []: debug point: requested to error out after 1023 rows, 1023 rows seen +set timescaledb.debug_broken_sendrecv_error_after = 1024; +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +ERROR: []: debug point: requested to error out after 1024 rows, 1024 rows seen +set timescaledb.debug_broken_sendrecv_error_after = 1025; +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +ERROR: []: debug point: requested to error out after 1025 rows, 1025 rows seen +-- Should succeed with text format for data transfer. +set timescaledb.dist_copy_transfer_format = 'text'; +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +-- Final check. +set timescaledb.enable_connection_binary_data = false; +select count(*) from metrics_dist_br; + count +------- + 22800 +(1 row) + +set timescaledb.enable_connection_binary_data = true; +reset timescaledb.debug_broken_sendrecv_error_after; +drop table metrics_dist_br; +-- Table with sleepy receive for a data type, to improve coverage of the waiting +-- code on the access node. +create table metrics_dist_sr(like metrics_dist_remote_error); +alter table metrics_dist_sr alter column v0 type sr; +select table_name from create_distributed_hypertable('metrics_dist_sr', + 'time', 'device_id'); + table_name +----------------- + metrics_dist_sr +(1 row) + +-- We're using sleepy recv function, so need the binary transfer format for it +-- to be called on the data nodes. +set timescaledb.dist_copy_transfer_format = 'binary'; +-- Test INSERT and COPY with slow data node. +\copy metrics_dist_sr from 'dist_remote_error.text' with (format text); +insert into metrics_dist_sr select * from metrics_dist_remote_error; +select count(*) from metrics_dist_sr; + count +------- + 45600 +(1 row) + +drop table metrics_dist_sr; +-- Table with sleepy send for a data type, on one data node, to improve coverage +-- of waiting in data fetchers. +create table metrics_dist_ss(like metrics_dist_remote_error); +alter table metrics_dist_ss alter column v0 type ss; +select table_name from create_distributed_hypertable('metrics_dist_ss', + 'time', 'device_id'); + table_name +----------------- + metrics_dist_ss +(1 row) + +-- Populate the table, using text COPY to avoid the sleepy stuff. +set timescaledb.dist_copy_transfer_format = 'text'; +\copy metrics_dist_ss from 'dist_remote_error.text' with (format text); +-- We're using sleepy send function, so need the binary transfer format for it +-- to be called on the data nodes. +set timescaledb.enable_connection_binary_data = true; +set timescaledb.remote_data_fetcher = 'prepared'; +explain (analyze, verbose, costs off, timing off, summary off) +select * from metrics_dist_ss; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Custom Scan (AsyncAppend) (actual rows=22800 loops=1) + Output: metrics_dist_ss."time", metrics_dist_ss.device_id, metrics_dist_ss.v0, metrics_dist_ss.v1, metrics_dist_ss.v2, metrics_dist_ss.v3 + -> Append (actual rows=22800 loops=1) + -> Custom Scan (DataNodeScan) on public.metrics_dist_ss metrics_dist_ss_1 (actual rows=4560 loops=1) + Output: metrics_dist_ss_1."time", metrics_dist_ss_1.device_id, metrics_dist_ss_1.v0, metrics_dist_ss_1.v1, metrics_dist_ss_1.v2, metrics_dist_ss_1.v3 + Data node: db_dist_remote_error_1 + Fetcher Type: Prepared statement + Chunks: _dist_hyper_5_96_chunk, _dist_hyper_5_99_chunk, _dist_hyper_5_102_chunk + Remote SQL: SELECT "time", device_id, v0, v1, v2, v3 FROM public.metrics_dist_ss WHERE _timescaledb_internal.chunks_in(public.metrics_dist_ss.*, ARRAY[54, 55, 56]) + -> Custom Scan (DataNodeScan) on public.metrics_dist_ss metrics_dist_ss_2 (actual rows=13680 loops=1) + Output: metrics_dist_ss_2."time", metrics_dist_ss_2.device_id, metrics_dist_ss_2.v0, metrics_dist_ss_2.v1, metrics_dist_ss_2.v2, metrics_dist_ss_2.v3 + Data node: db_dist_remote_error_2 + Fetcher Type: Prepared statement + Chunks: _dist_hyper_5_97_chunk, _dist_hyper_5_100_chunk, _dist_hyper_5_103_chunk + Remote SQL: SELECT "time", device_id, v0, v1, v2, v3 FROM public.metrics_dist_ss WHERE _timescaledb_internal.chunks_in(public.metrics_dist_ss.*, ARRAY[48, 49, 50]) + -> Custom Scan (DataNodeScan) on public.metrics_dist_ss metrics_dist_ss_3 (actual rows=4560 loops=1) + Output: metrics_dist_ss_3."time", metrics_dist_ss_3.device_id, metrics_dist_ss_3.v0, metrics_dist_ss_3.v1, metrics_dist_ss_3.v2, metrics_dist_ss_3.v3 + Data node: db_dist_remote_error_3 + Fetcher Type: Prepared statement + Chunks: _dist_hyper_5_98_chunk, _dist_hyper_5_101_chunk, _dist_hyper_5_104_chunk + Remote SQL: SELECT "time", device_id, v0, v1, v2, v3 FROM public.metrics_dist_ss WHERE _timescaledb_internal.chunks_in(public.metrics_dist_ss.*, ARRAY[32, 33, 34]) +(21 rows) + +set timescaledb.remote_data_fetcher = 'copy'; +explain (analyze, verbose, costs off, timing off, summary off) +select * from metrics_dist_ss; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Custom Scan (AsyncAppend) (actual rows=22800 loops=1) + Output: metrics_dist_ss."time", metrics_dist_ss.device_id, metrics_dist_ss.v0, metrics_dist_ss.v1, metrics_dist_ss.v2, metrics_dist_ss.v3 + -> Append (actual rows=22800 loops=1) + -> Custom Scan (DataNodeScan) on public.metrics_dist_ss metrics_dist_ss_1 (actual rows=4560 loops=1) + Output: metrics_dist_ss_1."time", metrics_dist_ss_1.device_id, metrics_dist_ss_1.v0, metrics_dist_ss_1.v1, metrics_dist_ss_1.v2, metrics_dist_ss_1.v3 + Data node: db_dist_remote_error_1 + Fetcher Type: COPY + Chunks: _dist_hyper_5_96_chunk, _dist_hyper_5_99_chunk, _dist_hyper_5_102_chunk + Remote SQL: SELECT "time", device_id, v0, v1, v2, v3 FROM public.metrics_dist_ss WHERE _timescaledb_internal.chunks_in(public.metrics_dist_ss.*, ARRAY[54, 55, 56]) + -> Custom Scan (DataNodeScan) on public.metrics_dist_ss metrics_dist_ss_2 (actual rows=13680 loops=1) + Output: metrics_dist_ss_2."time", metrics_dist_ss_2.device_id, metrics_dist_ss_2.v0, metrics_dist_ss_2.v1, metrics_dist_ss_2.v2, metrics_dist_ss_2.v3 + Data node: db_dist_remote_error_2 + Fetcher Type: COPY + Chunks: _dist_hyper_5_97_chunk, _dist_hyper_5_100_chunk, _dist_hyper_5_103_chunk + Remote SQL: SELECT "time", device_id, v0, v1, v2, v3 FROM public.metrics_dist_ss WHERE _timescaledb_internal.chunks_in(public.metrics_dist_ss.*, ARRAY[48, 49, 50]) + -> Custom Scan (DataNodeScan) on public.metrics_dist_ss metrics_dist_ss_3 (actual rows=4560 loops=1) + Output: metrics_dist_ss_3."time", metrics_dist_ss_3.device_id, metrics_dist_ss_3.v0, metrics_dist_ss_3.v1, metrics_dist_ss_3.v2, metrics_dist_ss_3.v3 + Data node: db_dist_remote_error_3 + Fetcher Type: COPY + Chunks: _dist_hyper_5_98_chunk, _dist_hyper_5_101_chunk, _dist_hyper_5_104_chunk + Remote SQL: SELECT "time", device_id, v0, v1, v2, v3 FROM public.metrics_dist_ss WHERE _timescaledb_internal.chunks_in(public.metrics_dist_ss.*, ARRAY[32, 33, 34]) +(21 rows) + +set timescaledb.remote_data_fetcher = 'cursor'; +explain (analyze, verbose, costs off, timing off, summary off) +select * from metrics_dist_ss; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Custom Scan (AsyncAppend) (actual rows=22800 loops=1) + Output: metrics_dist_ss."time", metrics_dist_ss.device_id, metrics_dist_ss.v0, metrics_dist_ss.v1, metrics_dist_ss.v2, metrics_dist_ss.v3 + -> Append (actual rows=22800 loops=1) + -> Custom Scan (DataNodeScan) on public.metrics_dist_ss metrics_dist_ss_1 (actual rows=4560 loops=1) + Output: metrics_dist_ss_1."time", metrics_dist_ss_1.device_id, metrics_dist_ss_1.v0, metrics_dist_ss_1.v1, metrics_dist_ss_1.v2, metrics_dist_ss_1.v3 + Data node: db_dist_remote_error_1 + Fetcher Type: Cursor + Chunks: _dist_hyper_5_96_chunk, _dist_hyper_5_99_chunk, _dist_hyper_5_102_chunk + Remote SQL: SELECT "time", device_id, v0, v1, v2, v3 FROM public.metrics_dist_ss WHERE _timescaledb_internal.chunks_in(public.metrics_dist_ss.*, ARRAY[54, 55, 56]) + -> Custom Scan (DataNodeScan) on public.metrics_dist_ss metrics_dist_ss_2 (actual rows=13680 loops=1) + Output: metrics_dist_ss_2."time", metrics_dist_ss_2.device_id, metrics_dist_ss_2.v0, metrics_dist_ss_2.v1, metrics_dist_ss_2.v2, metrics_dist_ss_2.v3 + Data node: db_dist_remote_error_2 + Fetcher Type: Cursor + Chunks: _dist_hyper_5_97_chunk, _dist_hyper_5_100_chunk, _dist_hyper_5_103_chunk + Remote SQL: SELECT "time", device_id, v0, v1, v2, v3 FROM public.metrics_dist_ss WHERE _timescaledb_internal.chunks_in(public.metrics_dist_ss.*, ARRAY[48, 49, 50]) + -> Custom Scan (DataNodeScan) on public.metrics_dist_ss metrics_dist_ss_3 (actual rows=4560 loops=1) + Output: metrics_dist_ss_3."time", metrics_dist_ss_3.device_id, metrics_dist_ss_3.v0, metrics_dist_ss_3.v1, metrics_dist_ss_3.v2, metrics_dist_ss_3.v3 + Data node: db_dist_remote_error_3 + Fetcher Type: Cursor + Chunks: _dist_hyper_5_98_chunk, _dist_hyper_5_101_chunk, _dist_hyper_5_104_chunk + Remote SQL: SELECT "time", device_id, v0, v1, v2, v3 FROM public.metrics_dist_ss WHERE _timescaledb_internal.chunks_in(public.metrics_dist_ss.*, ARRAY[32, 33, 34]) +(21 rows) + +-- Incorrect int output, to cover the error handling in tuplefactory. +create table metrics_dist_io(like metrics_dist_remote_error); +alter table metrics_dist_io alter column v0 type io; +select table_name from create_distributed_hypertable('metrics_dist_io', + 'time', 'device_id'); + table_name +----------------- + metrics_dist_io +(1 row) + +-- Populate the table, using binary COPY to avoid the broken in4out. +set timescaledb.enable_connection_binary_data = true; +set timescaledb.dist_copy_transfer_format = 'binary'; +\copy metrics_dist_io from 'dist_remote_error.text' with (format text); +-- For testing, force the text format to exerices our broken out function. +set timescaledb.enable_connection_binary_data = false; +set timescaledb.dist_copy_transfer_format = 'text'; +set timescaledb.remote_data_fetcher = 'prepared'; +explain (analyze, verbose, costs off, timing off, summary off) +select * from metrics_dist_io; +ERROR: invalid input syntax for type integer: "surprise" +set timescaledb.remote_data_fetcher = 'copy'; +explain (analyze, verbose, costs off, timing off, summary off) +select * from metrics_dist_io; +ERROR: cannot use COPY fetcher because some of the column types do not have binary serialization +set timescaledb.remote_data_fetcher = 'cursor'; +explain (analyze, verbose, costs off, timing off, summary off) +select * from metrics_dist_io; +ERROR: invalid input syntax for type integer: "surprise" +-- cleanup +\c :TEST_DBNAME :ROLE_SUPERUSER; +DROP DATABASE :DATA_NODE_1; +DROP DATABASE :DATA_NODE_2; +DROP DATABASE :DATA_NODE_3; diff --git a/tsl/test/expected/dist_remote_error-15.out b/tsl/test/expected/dist_remote_error-15.out new file mode 100644 index 000000000..a434714c6 --- /dev/null +++ b/tsl/test/expected/dist_remote_error-15.out @@ -0,0 +1,433 @@ +-- This file and its contents are licensed under the Timescale License. +-- Please see the included NOTICE for copyright information and +-- LICENSE-TIMESCALE for a copy of the license. +--\set DATA_NODE_1 data_node_1 +--\set DATA_NODE_2 data_node_2 +--\set DATA_NODE_3 data_node_3 +-- Set up the data nodes. +\set DATA_NODE_1 :TEST_DBNAME _1 +\set DATA_NODE_2 :TEST_DBNAME _2 +\set DATA_NODE_3 :TEST_DBNAME _3 +\c :TEST_DBNAME :ROLE_SUPERUSER +SELECT node_name, database, node_created, database_created, extension_created +FROM ( + SELECT (add_data_node(name, host => 'localhost', DATABASE => name)).* + FROM (VALUES (:'DATA_NODE_1'), (:'DATA_NODE_2'), (:'DATA_NODE_3')) v(name) +) a; + node_name | database | node_created | database_created | extension_created +------------------------+------------------------+--------------+------------------+------------------- + db_dist_remote_error_1 | db_dist_remote_error_1 | t | t | t + db_dist_remote_error_2 | db_dist_remote_error_2 | t | t | t + db_dist_remote_error_3 | db_dist_remote_error_3 | t | t | t +(3 rows) + +GRANT USAGE ON FOREIGN SERVER :DATA_NODE_1, :DATA_NODE_2, :DATA_NODE_3 TO PUBLIC; +GRANT CREATE ON SCHEMA public TO :ROLE_1; +-- Import setup file to data nodes. +\unset ECHO +-- Disable SSL to get stable error output across versions. SSL adds some output +-- that changed in PG 14. +set timescaledb.debug_enable_ssl to off; +set client_min_messages to error; +SET timescaledb.hide_data_node_name_in_errors = 'on'; +-- A relatively big table on one data node +CREATE TABLE metrics_dist_remote_error(filler_1 int, filler_2 int, filler_3 int, time timestamptz NOT NULL, device_id int, v0 int, v1 int, v2 float, v3 float); +SELECT create_distributed_hypertable('metrics_dist_remote_error','time','device_id',3, + data_nodes => ARRAY[:'DATA_NODE_1']); + create_distributed_hypertable +---------------------------------------- + (1,public,metrics_dist_remote_error,t) +(1 row) + +ALTER TABLE metrics_dist_remote_error DROP COLUMN filler_1; +INSERT INTO metrics_dist_remote_error(time,device_id,v0,v1,v2,v3) SELECT time, device_id, device_id+1, device_id + 2, device_id + 0.5, NULL FROM generate_series('2000-01-01 0:00:00+0'::timestamptz,'2000-01-05 23:55:00+0','6m') gtime(time), generate_series(1,5,1) gdevice(device_id); +ALTER TABLE metrics_dist_remote_error DROP COLUMN filler_2; +INSERT INTO metrics_dist_remote_error(time,device_id,v0,v1,v2,v3) SELECT time, device_id, device_id+1, device_id + 2, device_id + 0.5, NULL FROM generate_series('2000-01-06 0:00:00+0'::timestamptz,'2000-01-12 23:55:00+0','6m') gtime(time), generate_series(1,5,1) gdevice(device_id); +ALTER TABLE metrics_dist_remote_error DROP COLUMN filler_3; +INSERT INTO metrics_dist_remote_error(time,device_id,v0,v1,v2,v3) SELECT time, device_id, device_id+1, device_id + 2, device_id + 0.5, NULL FROM generate_series('2000-01-13 0:00:00+0'::timestamptz,'2000-01-19 23:55:00+0','6m') gtime(time), generate_series(1,5,1) gdevice(device_id); +ANALYZE metrics_dist_remote_error; +-- The error messages vary wildly between the Postgres versions, dependent on +-- the particular behavior of libqp in this or that case. The purpose of this +-- test is not to solidify this accidental behavior, but to merely exercise the +-- error handling code to make sure it doesn't have fatal errors. Unfortunately, +-- there is no way to suppress error output from a psql script. +set client_min_messages to ERROR; +\set ON_ERROR_STOP off +set timescaledb.remote_data_fetcher = 'copy'; +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(0, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 0 rows, 0 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(1, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 1 rows, 1 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(2, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 2 rows, 2 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(701, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 701 rows, 701 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 10000 rows, 10000 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(16384, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 16384 rows, 16384 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000000, device_id)::int != 0; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Result (actual rows=22799 loops=1) + Output: 1 + -> Custom Scan (DataNodeScan) on public.metrics_dist_remote_error (actual rows=22799 loops=1) + Data node: db_dist_remote_error_1 + Fetcher Type: COPY + Chunks: _dist_hyper_1_1_chunk, _dist_hyper_1_2_chunk, _dist_hyper_1_3_chunk, _dist_hyper_1_4_chunk, _dist_hyper_1_5_chunk, _dist_hyper_1_6_chunk, _dist_hyper_1_7_chunk, _dist_hyper_1_8_chunk, _dist_hyper_1_9_chunk + Remote SQL: SELECT NULL FROM public.metrics_dist_remote_error WHERE _timescaledb_internal.chunks_in(public.metrics_dist_remote_error.*, ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9]) AND ((public.ts_debug_shippable_error_after_n_rows(10000000, device_id) <> 0)) +(7 rows) + +-- We don't test fatal errors here, because PG versions before 14 are unable to +-- report them properly to the access node, so we get different errors in these +-- versions. +-- Now test the same with the cursor fetcher. +set timescaledb.remote_data_fetcher = 'cursor'; +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(0, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 0 rows, 0 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(1, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 1 rows, 1 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(2, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 2 rows, 2 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(701, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 701 rows, 701 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 10000 rows, 10000 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000000, device_id)::int != 0; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Result (actual rows=22799 loops=1) + Output: 1 + -> Custom Scan (DataNodeScan) on public.metrics_dist_remote_error (actual rows=22799 loops=1) + Data node: db_dist_remote_error_1 + Fetcher Type: Cursor + Chunks: _dist_hyper_1_1_chunk, _dist_hyper_1_2_chunk, _dist_hyper_1_3_chunk, _dist_hyper_1_4_chunk, _dist_hyper_1_5_chunk, _dist_hyper_1_6_chunk, _dist_hyper_1_7_chunk, _dist_hyper_1_8_chunk, _dist_hyper_1_9_chunk + Remote SQL: SELECT NULL FROM public.metrics_dist_remote_error WHERE _timescaledb_internal.chunks_in(public.metrics_dist_remote_error.*, ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9]) AND ((public.ts_debug_shippable_error_after_n_rows(10000000, device_id) <> 0)) +(7 rows) + +-- Now test the same with the prepared statement fetcher. +set timescaledb.remote_data_fetcher = 'prepared'; +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(0, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 0 rows, 0 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(1, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 1 rows, 1 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(2, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 2 rows, 2 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(701, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 701 rows, 701 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000, device_id)::int != 0; +ERROR: []: debug point: requested to error out after 10000 rows, 10000 rows seen +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000000, device_id)::int != 0; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Result (actual rows=22799 loops=1) + Output: 1 + -> Custom Scan (DataNodeScan) on public.metrics_dist_remote_error (actual rows=22799 loops=1) + Data node: db_dist_remote_error_1 + Fetcher Type: Prepared statement + Chunks: _dist_hyper_1_1_chunk, _dist_hyper_1_2_chunk, _dist_hyper_1_3_chunk, _dist_hyper_1_4_chunk, _dist_hyper_1_5_chunk, _dist_hyper_1_6_chunk, _dist_hyper_1_7_chunk, _dist_hyper_1_8_chunk, _dist_hyper_1_9_chunk + Remote SQL: SELECT NULL FROM public.metrics_dist_remote_error WHERE _timescaledb_internal.chunks_in(public.metrics_dist_remote_error.*, ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9]) AND ((public.ts_debug_shippable_error_after_n_rows(10000000, device_id) <> 0)) +(7 rows) + +reset timescaledb.remote_data_fetcher; +-- Table with broken send for a data type. +create table metrics_dist_bs(like metrics_dist_remote_error); +alter table metrics_dist_bs alter column v0 type bs; +select table_name from create_distributed_hypertable('metrics_dist_bs', + 'time', 'device_id'); + table_name +----------------- + metrics_dist_bs +(1 row) + +set timescaledb.enable_connection_binary_data to off; +insert into metrics_dist_bs + select * from metrics_dist_remote_error; +set timescaledb.enable_connection_binary_data to on; +explain (analyze, verbose, costs off, timing off, summary off) +select * from metrics_dist_bs; +ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen +drop table metrics_dist_bs; +-- Table with broken receive for a data type. +create table metrics_dist_br(like metrics_dist_remote_error); +alter table metrics_dist_br alter column v0 type br; +select table_name from create_distributed_hypertable('metrics_dist_br', + 'time', 'device_id'); + table_name +----------------- + metrics_dist_br +(1 row) + +select hypertable_name, replication_factor from timescaledb_information.hypertables +where hypertable_name = 'metrics_dist_br'; + hypertable_name | replication_factor +-----------------+-------------------- + metrics_dist_br | 1 +(1 row) + +-- Test that INSERT and COPY fail on data nodes. +-- Note that we use the text format for the COPY input, so that the access node +-- doesn't call `recv` and fail by itself. It's going to use binary format for +-- transfer to data nodes regardless of the input format. +set timescaledb.dist_copy_transfer_format = 'binary'; +-- First, create the reference. +\copy (select * from metrics_dist_remote_error) to 'dist_remote_error.text' with (format text); +-- We have to test various interleavings of COPY and INSERT to check that +-- one can recover from connection failure states introduced by another. +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen +insert into metrics_dist_br select * from metrics_dist_remote_error; +ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen +insert into metrics_dist_br select * from metrics_dist_remote_error; +ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen +-- Fail at different points +set timescaledb.debug_broken_sendrecv_error_after = 1; +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +ERROR: []: debug point: requested to error out after 1 rows, 1 rows seen +set timescaledb.debug_broken_sendrecv_error_after = 2; +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +ERROR: []: debug point: requested to error out after 2 rows, 2 rows seen +set timescaledb.debug_broken_sendrecv_error_after = 1023; +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +ERROR: []: debug point: requested to error out after 1023 rows, 1023 rows seen +set timescaledb.debug_broken_sendrecv_error_after = 1024; +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +ERROR: []: debug point: requested to error out after 1024 rows, 1024 rows seen +set timescaledb.debug_broken_sendrecv_error_after = 1025; +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +ERROR: []: debug point: requested to error out after 1025 rows, 1025 rows seen +reset timescaledb.debug_broken_sendrecv_error_after; +-- Same with different replication factor +truncate metrics_dist_br; +select set_replication_factor('metrics_dist_br', 2); + set_replication_factor +------------------------ + +(1 row) + +select hypertable_name, replication_factor from timescaledb_information.hypertables +where hypertable_name = 'metrics_dist_br'; + hypertable_name | replication_factor +-----------------+-------------------- + metrics_dist_br | 2 +(1 row) + +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen +insert into metrics_dist_br select * from metrics_dist_remote_error; +ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen +insert into metrics_dist_br select * from metrics_dist_remote_error; +ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen +set timescaledb.debug_broken_sendrecv_error_after = 1; +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +ERROR: []: debug point: requested to error out after 1 rows, 1 rows seen +set timescaledb.debug_broken_sendrecv_error_after = 2; +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +ERROR: []: debug point: requested to error out after 2 rows, 2 rows seen +set timescaledb.debug_broken_sendrecv_error_after = 1023; +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +ERROR: []: debug point: requested to error out after 1023 rows, 1023 rows seen +set timescaledb.debug_broken_sendrecv_error_after = 1024; +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +ERROR: []: debug point: requested to error out after 1024 rows, 1024 rows seen +set timescaledb.debug_broken_sendrecv_error_after = 1025; +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +ERROR: []: debug point: requested to error out after 1025 rows, 1025 rows seen +-- Should succeed with text format for data transfer. +set timescaledb.dist_copy_transfer_format = 'text'; +\copy metrics_dist_br from 'dist_remote_error.text' with (format text); +-- Final check. +set timescaledb.enable_connection_binary_data = false; +select count(*) from metrics_dist_br; + count +------- + 22800 +(1 row) + +set timescaledb.enable_connection_binary_data = true; +reset timescaledb.debug_broken_sendrecv_error_after; +drop table metrics_dist_br; +-- Table with sleepy receive for a data type, to improve coverage of the waiting +-- code on the access node. +create table metrics_dist_sr(like metrics_dist_remote_error); +alter table metrics_dist_sr alter column v0 type sr; +select table_name from create_distributed_hypertable('metrics_dist_sr', + 'time', 'device_id'); + table_name +----------------- + metrics_dist_sr +(1 row) + +-- We're using sleepy recv function, so need the binary transfer format for it +-- to be called on the data nodes. +set timescaledb.dist_copy_transfer_format = 'binary'; +-- Test INSERT and COPY with slow data node. +\copy metrics_dist_sr from 'dist_remote_error.text' with (format text); +insert into metrics_dist_sr select * from metrics_dist_remote_error; +select count(*) from metrics_dist_sr; + count +------- + 45600 +(1 row) + +drop table metrics_dist_sr; +-- Table with sleepy send for a data type, on one data node, to improve coverage +-- of waiting in data fetchers. +create table metrics_dist_ss(like metrics_dist_remote_error); +alter table metrics_dist_ss alter column v0 type ss; +select table_name from create_distributed_hypertable('metrics_dist_ss', + 'time', 'device_id'); + table_name +----------------- + metrics_dist_ss +(1 row) + +-- Populate the table, using text COPY to avoid the sleepy stuff. +set timescaledb.dist_copy_transfer_format = 'text'; +\copy metrics_dist_ss from 'dist_remote_error.text' with (format text); +-- We're using sleepy send function, so need the binary transfer format for it +-- to be called on the data nodes. +set timescaledb.enable_connection_binary_data = true; +set timescaledb.remote_data_fetcher = 'prepared'; +explain (analyze, verbose, costs off, timing off, summary off) +select * from metrics_dist_ss; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Custom Scan (AsyncAppend) (actual rows=22800 loops=1) + Output: metrics_dist_ss."time", metrics_dist_ss.device_id, metrics_dist_ss.v0, metrics_dist_ss.v1, metrics_dist_ss.v2, metrics_dist_ss.v3 + -> Append (actual rows=22800 loops=1) + -> Custom Scan (DataNodeScan) on public.metrics_dist_ss metrics_dist_ss_1 (actual rows=4560 loops=1) + Output: metrics_dist_ss_1."time", metrics_dist_ss_1.device_id, metrics_dist_ss_1.v0, metrics_dist_ss_1.v1, metrics_dist_ss_1.v2, metrics_dist_ss_1.v3 + Data node: db_dist_remote_error_1 + Fetcher Type: Prepared statement + Chunks: _dist_hyper_5_96_chunk, _dist_hyper_5_99_chunk, _dist_hyper_5_102_chunk + Remote SQL: SELECT "time", device_id, v0, v1, v2, v3 FROM public.metrics_dist_ss WHERE _timescaledb_internal.chunks_in(public.metrics_dist_ss.*, ARRAY[54, 55, 56]) + -> Custom Scan (DataNodeScan) on public.metrics_dist_ss metrics_dist_ss_2 (actual rows=13680 loops=1) + Output: metrics_dist_ss_2."time", metrics_dist_ss_2.device_id, metrics_dist_ss_2.v0, metrics_dist_ss_2.v1, metrics_dist_ss_2.v2, metrics_dist_ss_2.v3 + Data node: db_dist_remote_error_2 + Fetcher Type: Prepared statement + Chunks: _dist_hyper_5_97_chunk, _dist_hyper_5_100_chunk, _dist_hyper_5_103_chunk + Remote SQL: SELECT "time", device_id, v0, v1, v2, v3 FROM public.metrics_dist_ss WHERE _timescaledb_internal.chunks_in(public.metrics_dist_ss.*, ARRAY[48, 49, 50]) + -> Custom Scan (DataNodeScan) on public.metrics_dist_ss metrics_dist_ss_3 (actual rows=4560 loops=1) + Output: metrics_dist_ss_3."time", metrics_dist_ss_3.device_id, metrics_dist_ss_3.v0, metrics_dist_ss_3.v1, metrics_dist_ss_3.v2, metrics_dist_ss_3.v3 + Data node: db_dist_remote_error_3 + Fetcher Type: Prepared statement + Chunks: _dist_hyper_5_98_chunk, _dist_hyper_5_101_chunk, _dist_hyper_5_104_chunk + Remote SQL: SELECT "time", device_id, v0, v1, v2, v3 FROM public.metrics_dist_ss WHERE _timescaledb_internal.chunks_in(public.metrics_dist_ss.*, ARRAY[32, 33, 34]) +(21 rows) + +set timescaledb.remote_data_fetcher = 'copy'; +explain (analyze, verbose, costs off, timing off, summary off) +select * from metrics_dist_ss; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Custom Scan (AsyncAppend) (actual rows=22800 loops=1) + Output: metrics_dist_ss."time", metrics_dist_ss.device_id, metrics_dist_ss.v0, metrics_dist_ss.v1, metrics_dist_ss.v2, metrics_dist_ss.v3 + -> Append (actual rows=22800 loops=1) + -> Custom Scan (DataNodeScan) on public.metrics_dist_ss metrics_dist_ss_1 (actual rows=4560 loops=1) + Output: metrics_dist_ss_1."time", metrics_dist_ss_1.device_id, metrics_dist_ss_1.v0, metrics_dist_ss_1.v1, metrics_dist_ss_1.v2, metrics_dist_ss_1.v3 + Data node: db_dist_remote_error_1 + Fetcher Type: COPY + Chunks: _dist_hyper_5_96_chunk, _dist_hyper_5_99_chunk, _dist_hyper_5_102_chunk + Remote SQL: SELECT "time", device_id, v0, v1, v2, v3 FROM public.metrics_dist_ss WHERE _timescaledb_internal.chunks_in(public.metrics_dist_ss.*, ARRAY[54, 55, 56]) + -> Custom Scan (DataNodeScan) on public.metrics_dist_ss metrics_dist_ss_2 (actual rows=13680 loops=1) + Output: metrics_dist_ss_2."time", metrics_dist_ss_2.device_id, metrics_dist_ss_2.v0, metrics_dist_ss_2.v1, metrics_dist_ss_2.v2, metrics_dist_ss_2.v3 + Data node: db_dist_remote_error_2 + Fetcher Type: COPY + Chunks: _dist_hyper_5_97_chunk, _dist_hyper_5_100_chunk, _dist_hyper_5_103_chunk + Remote SQL: SELECT "time", device_id, v0, v1, v2, v3 FROM public.metrics_dist_ss WHERE _timescaledb_internal.chunks_in(public.metrics_dist_ss.*, ARRAY[48, 49, 50]) + -> Custom Scan (DataNodeScan) on public.metrics_dist_ss metrics_dist_ss_3 (actual rows=4560 loops=1) + Output: metrics_dist_ss_3."time", metrics_dist_ss_3.device_id, metrics_dist_ss_3.v0, metrics_dist_ss_3.v1, metrics_dist_ss_3.v2, metrics_dist_ss_3.v3 + Data node: db_dist_remote_error_3 + Fetcher Type: COPY + Chunks: _dist_hyper_5_98_chunk, _dist_hyper_5_101_chunk, _dist_hyper_5_104_chunk + Remote SQL: SELECT "time", device_id, v0, v1, v2, v3 FROM public.metrics_dist_ss WHERE _timescaledb_internal.chunks_in(public.metrics_dist_ss.*, ARRAY[32, 33, 34]) +(21 rows) + +set timescaledb.remote_data_fetcher = 'cursor'; +explain (analyze, verbose, costs off, timing off, summary off) +select * from metrics_dist_ss; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Custom Scan (AsyncAppend) (actual rows=22800 loops=1) + Output: metrics_dist_ss."time", metrics_dist_ss.device_id, metrics_dist_ss.v0, metrics_dist_ss.v1, metrics_dist_ss.v2, metrics_dist_ss.v3 + -> Append (actual rows=22800 loops=1) + -> Custom Scan (DataNodeScan) on public.metrics_dist_ss metrics_dist_ss_1 (actual rows=4560 loops=1) + Output: metrics_dist_ss_1."time", metrics_dist_ss_1.device_id, metrics_dist_ss_1.v0, metrics_dist_ss_1.v1, metrics_dist_ss_1.v2, metrics_dist_ss_1.v3 + Data node: db_dist_remote_error_1 + Fetcher Type: Cursor + Chunks: _dist_hyper_5_96_chunk, _dist_hyper_5_99_chunk, _dist_hyper_5_102_chunk + Remote SQL: SELECT "time", device_id, v0, v1, v2, v3 FROM public.metrics_dist_ss WHERE _timescaledb_internal.chunks_in(public.metrics_dist_ss.*, ARRAY[54, 55, 56]) + -> Custom Scan (DataNodeScan) on public.metrics_dist_ss metrics_dist_ss_2 (actual rows=13680 loops=1) + Output: metrics_dist_ss_2."time", metrics_dist_ss_2.device_id, metrics_dist_ss_2.v0, metrics_dist_ss_2.v1, metrics_dist_ss_2.v2, metrics_dist_ss_2.v3 + Data node: db_dist_remote_error_2 + Fetcher Type: Cursor + Chunks: _dist_hyper_5_97_chunk, _dist_hyper_5_100_chunk, _dist_hyper_5_103_chunk + Remote SQL: SELECT "time", device_id, v0, v1, v2, v3 FROM public.metrics_dist_ss WHERE _timescaledb_internal.chunks_in(public.metrics_dist_ss.*, ARRAY[48, 49, 50]) + -> Custom Scan (DataNodeScan) on public.metrics_dist_ss metrics_dist_ss_3 (actual rows=4560 loops=1) + Output: metrics_dist_ss_3."time", metrics_dist_ss_3.device_id, metrics_dist_ss_3.v0, metrics_dist_ss_3.v1, metrics_dist_ss_3.v2, metrics_dist_ss_3.v3 + Data node: db_dist_remote_error_3 + Fetcher Type: Cursor + Chunks: _dist_hyper_5_98_chunk, _dist_hyper_5_101_chunk, _dist_hyper_5_104_chunk + Remote SQL: SELECT "time", device_id, v0, v1, v2, v3 FROM public.metrics_dist_ss WHERE _timescaledb_internal.chunks_in(public.metrics_dist_ss.*, ARRAY[32, 33, 34]) +(21 rows) + +-- Incorrect int output, to cover the error handling in tuplefactory. +create table metrics_dist_io(like metrics_dist_remote_error); +alter table metrics_dist_io alter column v0 type io; +select table_name from create_distributed_hypertable('metrics_dist_io', + 'time', 'device_id'); + table_name +----------------- + metrics_dist_io +(1 row) + +-- Populate the table, using binary COPY to avoid the broken in4out. +set timescaledb.enable_connection_binary_data = true; +set timescaledb.dist_copy_transfer_format = 'binary'; +\copy metrics_dist_io from 'dist_remote_error.text' with (format text); +-- For testing, force the text format to exerices our broken out function. +set timescaledb.enable_connection_binary_data = false; +set timescaledb.dist_copy_transfer_format = 'text'; +set timescaledb.remote_data_fetcher = 'prepared'; +explain (analyze, verbose, costs off, timing off, summary off) +select * from metrics_dist_io; +ERROR: invalid input syntax for type integer: "surprise" +set timescaledb.remote_data_fetcher = 'copy'; +explain (analyze, verbose, costs off, timing off, summary off) +select * from metrics_dist_io; +ERROR: cannot use COPY fetcher because some of the column types do not have binary serialization +set timescaledb.remote_data_fetcher = 'cursor'; +explain (analyze, verbose, costs off, timing off, summary off) +select * from metrics_dist_io; +ERROR: invalid input syntax for type integer: "surprise" +-- cleanup +\c :TEST_DBNAME :ROLE_SUPERUSER; +DROP DATABASE :DATA_NODE_1; +DROP DATABASE :DATA_NODE_2; +DROP DATABASE :DATA_NODE_3; diff --git a/tsl/test/expected/remote_connection.out b/tsl/test/expected/remote_connection.out index ac86e3d88..00436d31f 100644 --- a/tsl/test/expected/remote_connection.out +++ b/tsl/test/expected/remote_connection.out @@ -60,7 +60,7 @@ LOCATION: ts_test_bad_remote_query, connection.c:216 SELECT * FROM test.get_connection_stats(); connections_created | connections_closed | results_created | results_cleared ---------------------+--------------------+-----------------+----------------- - 1 | 1 | 8 | 8 + 1 | 1 | 9 | 9 (1 row) SELECT test.remote_connection_tests(); diff --git a/tsl/test/shared/expected/dist_fetcher_type-12.out b/tsl/test/shared/expected/dist_fetcher_type-12.out index c20f3624c..593cafa7a 100644 --- a/tsl/test/shared/expected/dist_fetcher_type-12.out +++ b/tsl/test/shared/expected/dist_fetcher_type-12.out @@ -66,12 +66,17 @@ QUERY PLAN Remote SQL: SELECT id FROM public.distinct_on_distributed WHERE _timescaledb_internal.chunks_in(public.distinct_on_distributed.*, ARRAY[..]) (19 rows) --- This query can't work with copy fetcher. +-- This query can't work with copy or prepared fetcher. set timescaledb.remote_data_fetcher = 'copy'; select 1 x from distinct_on_distributed t1, distinct_on_distributed t2 where t1.id = t2.id + 1 limit 1; -ERROR: COPY fetcher not supported +ERROR: only cursor fetcher is supported for this query +set timescaledb.remote_data_fetcher = 'prepared'; +select 1 x from distinct_on_distributed t1, distinct_on_distributed t2 +where t1.id = t2.id + 1 +limit 1; +ERROR: only cursor fetcher is supported for this query -- Check once again that 'auto' is used after 'copy'. set timescaledb.remote_data_fetcher = 'auto'; select 1 x from distinct_on_distributed t1, distinct_on_distributed t2 @@ -104,7 +109,7 @@ WHERE ) AS l WHERE d.name ~~ d.name ) -ORDER BY 1,2; +ORDER BY 1, 2; device_id | name -----------+------ (0 rows) @@ -170,6 +175,26 @@ select * from disttable_with_bytea; 1001 | (2 rows) +-- Prepared statement fetcher with bytea data +set timescaledb.remote_data_fetcher = 'prepared'; +explain (analyze, verbose, costs off, timing off, summary off) +select * from disttable_with_bytea; +QUERY PLAN + Custom Scan (DataNodeScan) on public.disttable_with_bytea (actual rows=2 loops=1) + Output: disttable_with_bytea."time", disttable_with_bytea.bdata + Data node: data_node_3 + Fetcher Type: Prepared statement + Chunks: _dist_hyper_X_X_chunk + Remote SQL: SELECT "time", bdata FROM public.disttable_with_bytea WHERE _timescaledb_internal.chunks_in(public.disttable_with_bytea.*, ARRAY[..]) +(6 rows) + +select * from disttable_with_bytea; + time | bdata +------+------- + 1001 | \x + 1001 | +(2 rows) + -- #4515 test for assertion failure in copy_fetcher_close SET timescaledb.remote_data_fetcher = 'copy'; SELECT * @@ -190,6 +215,44 @@ WHERE EXISTS ( ------+--------+------- (0 rows) +SET timescaledb.remote_data_fetcher = 'prepared'; +SELECT * +FROM + conditions ref_0 +WHERE EXISTS ( + SELECT FROM + distinct_on_distributed, + LATERAL ( + SELECT * + FROM pg_class, + LATERAL ( + SELECT ref_0.device FROM pg_class WHERE false LIMIT 1) as lat_1 + ) as lat_2 + WHERE (SELECT 1 FROM pg_class LIMIT 1) >= ref_0.device +); + time | device | value +------+--------+------- +(0 rows) + +SET timescaledb.remote_data_fetcher = 'cursor'; +SELECT * +FROM + conditions ref_0 +WHERE EXISTS ( + SELECT FROM + distinct_on_distributed, + LATERAL ( + SELECT * + FROM pg_class, + LATERAL ( + SELECT ref_0.device FROM pg_class WHERE false LIMIT 1) as lat_1 + ) as lat_2 + WHERE (SELECT 1 FROM pg_class LIMIT 1) >= ref_0.device +); + time | device | value +------+--------+------- +(0 rows) + -- #4518 -- test error handling for queries with multiple distributed hypertables SET timescaledb.remote_data_fetcher = 'copy'; @@ -201,8 +264,21 @@ WHERE EXISTS ( LATERAL (select * from metrics as ref_2) as subq_3 WHERE (SELECT device_id FROM metrics_compressed limit 1 offset 3) >= ref_0.device -); -ERROR: COPY fetcher not supported +) +ORDER BY 1, 2; +ERROR: only cursor fetcher is supported for this query +SET timescaledb.remote_data_fetcher = 'prepared'; +SELECT * FROM + conditions_dist1 ref_0 +WHERE EXISTS ( + SELECT FROM + distinct_on_distributed as ref_1, + LATERAL (select * from metrics as ref_2) as subq_3 + WHERE + (SELECT device_id FROM metrics_compressed limit 1 offset 3) >= ref_0.device +) +ORDER BY 1, 2; +ERROR: only cursor fetcher is supported for this query SET timescaledb.remote_data_fetcher = 'auto'; SELECT * FROM conditions_dist1 ref_0 @@ -213,7 +289,7 @@ WHERE EXISTS ( WHERE (SELECT device_id FROM metrics_compressed limit 1 offset 3) >= ref_0.device ) -ORDER BY 1,2; +ORDER BY 1, 2; time | device | value ------------------------------+--------+------- Sun Jan 01 06:01:00 2017 PST | 1 | 1.2 @@ -267,7 +343,7 @@ WHERE AND m.ts BETWEEN '2021-08-17 00:00:00' AND '2021-08-17 01:00:00' ORDER BY 1 DESC LIMIT 1; ERROR: cannot use COPY fetcher because the plan is parameterized --- Test copy fetcher when query is aborted before EOF due to LIMIT +-- Test fetcher when query is aborted before EOF due to LIMIT SET timescaledb.remote_data_fetcher = 'copy'; SELECT * FROM metrics_dist ORDER BY time, device_id LIMIT 11; time | device_id | v0 | v1 | v2 | v3 @@ -285,6 +361,40 @@ SELECT * FROM metrics_dist ORDER BY time, device_id LIMIT 11; Fri Dec 31 16:04:00 1999 PST | 1 | 2 | 3 | 1.5 | (11 rows) +SET timescaledb.remote_data_fetcher = 'prepared'; +SELECT * FROM metrics_dist ORDER BY time, device_id LIMIT 11; + time | device_id | v0 | v1 | v2 | v3 +------------------------------+-----------+----+----+-----+---- + Fri Dec 31 16:00:00 1999 PST | 1 | 2 | 3 | 1.5 | + Fri Dec 31 16:00:00 1999 PST | 2 | 3 | 4 | 2.5 | + Fri Dec 31 16:00:00 1999 PST | 3 | 4 | 5 | 3.5 | + Fri Dec 31 16:00:00 1999 PST | 4 | 5 | 6 | 4.5 | + Fri Dec 31 16:00:00 1999 PST | 5 | 6 | 7 | 5.5 | + Fri Dec 31 16:02:00 1999 PST | 1 | 2 | 3 | 1.5 | + Fri Dec 31 16:02:00 1999 PST | 2 | 3 | 4 | 2.5 | + Fri Dec 31 16:02:00 1999 PST | 3 | 4 | 5 | 3.5 | + Fri Dec 31 16:02:00 1999 PST | 4 | 5 | 6 | 4.5 | + Fri Dec 31 16:02:00 1999 PST | 5 | 6 | 7 | 5.5 | + Fri Dec 31 16:04:00 1999 PST | 1 | 2 | 3 | 1.5 | +(11 rows) + +SET timescaledb.remote_data_fetcher = 'cursor'; +SELECT * FROM metrics_dist ORDER BY time, device_id LIMIT 11; + time | device_id | v0 | v1 | v2 | v3 +------------------------------+-----------+----+----+-----+---- + Fri Dec 31 16:00:00 1999 PST | 1 | 2 | 3 | 1.5 | + Fri Dec 31 16:00:00 1999 PST | 2 | 3 | 4 | 2.5 | + Fri Dec 31 16:00:00 1999 PST | 3 | 4 | 5 | 3.5 | + Fri Dec 31 16:00:00 1999 PST | 4 | 5 | 6 | 4.5 | + Fri Dec 31 16:00:00 1999 PST | 5 | 6 | 7 | 5.5 | + Fri Dec 31 16:02:00 1999 PST | 1 | 2 | 3 | 1.5 | + Fri Dec 31 16:02:00 1999 PST | 2 | 3 | 4 | 2.5 | + Fri Dec 31 16:02:00 1999 PST | 3 | 4 | 5 | 3.5 | + Fri Dec 31 16:02:00 1999 PST | 4 | 5 | 6 | 4.5 | + Fri Dec 31 16:02:00 1999 PST | 5 | 6 | 7 | 5.5 | + Fri Dec 31 16:04:00 1999 PST | 1 | 2 | 3 | 1.5 | +(11 rows) + -- Verify that cursor fetcher can be rewind before EOF due to an -- intermediate JOIN product reaching LIMIT SET timescaledb.remote_data_fetcher = 'cursor'; diff --git a/tsl/test/shared/expected/dist_fetcher_type-13.out b/tsl/test/shared/expected/dist_fetcher_type-13.out index c20f3624c..593cafa7a 100644 --- a/tsl/test/shared/expected/dist_fetcher_type-13.out +++ b/tsl/test/shared/expected/dist_fetcher_type-13.out @@ -66,12 +66,17 @@ QUERY PLAN Remote SQL: SELECT id FROM public.distinct_on_distributed WHERE _timescaledb_internal.chunks_in(public.distinct_on_distributed.*, ARRAY[..]) (19 rows) --- This query can't work with copy fetcher. +-- This query can't work with copy or prepared fetcher. set timescaledb.remote_data_fetcher = 'copy'; select 1 x from distinct_on_distributed t1, distinct_on_distributed t2 where t1.id = t2.id + 1 limit 1; -ERROR: COPY fetcher not supported +ERROR: only cursor fetcher is supported for this query +set timescaledb.remote_data_fetcher = 'prepared'; +select 1 x from distinct_on_distributed t1, distinct_on_distributed t2 +where t1.id = t2.id + 1 +limit 1; +ERROR: only cursor fetcher is supported for this query -- Check once again that 'auto' is used after 'copy'. set timescaledb.remote_data_fetcher = 'auto'; select 1 x from distinct_on_distributed t1, distinct_on_distributed t2 @@ -104,7 +109,7 @@ WHERE ) AS l WHERE d.name ~~ d.name ) -ORDER BY 1,2; +ORDER BY 1, 2; device_id | name -----------+------ (0 rows) @@ -170,6 +175,26 @@ select * from disttable_with_bytea; 1001 | (2 rows) +-- Prepared statement fetcher with bytea data +set timescaledb.remote_data_fetcher = 'prepared'; +explain (analyze, verbose, costs off, timing off, summary off) +select * from disttable_with_bytea; +QUERY PLAN + Custom Scan (DataNodeScan) on public.disttable_with_bytea (actual rows=2 loops=1) + Output: disttable_with_bytea."time", disttable_with_bytea.bdata + Data node: data_node_3 + Fetcher Type: Prepared statement + Chunks: _dist_hyper_X_X_chunk + Remote SQL: SELECT "time", bdata FROM public.disttable_with_bytea WHERE _timescaledb_internal.chunks_in(public.disttable_with_bytea.*, ARRAY[..]) +(6 rows) + +select * from disttable_with_bytea; + time | bdata +------+------- + 1001 | \x + 1001 | +(2 rows) + -- #4515 test for assertion failure in copy_fetcher_close SET timescaledb.remote_data_fetcher = 'copy'; SELECT * @@ -190,6 +215,44 @@ WHERE EXISTS ( ------+--------+------- (0 rows) +SET timescaledb.remote_data_fetcher = 'prepared'; +SELECT * +FROM + conditions ref_0 +WHERE EXISTS ( + SELECT FROM + distinct_on_distributed, + LATERAL ( + SELECT * + FROM pg_class, + LATERAL ( + SELECT ref_0.device FROM pg_class WHERE false LIMIT 1) as lat_1 + ) as lat_2 + WHERE (SELECT 1 FROM pg_class LIMIT 1) >= ref_0.device +); + time | device | value +------+--------+------- +(0 rows) + +SET timescaledb.remote_data_fetcher = 'cursor'; +SELECT * +FROM + conditions ref_0 +WHERE EXISTS ( + SELECT FROM + distinct_on_distributed, + LATERAL ( + SELECT * + FROM pg_class, + LATERAL ( + SELECT ref_0.device FROM pg_class WHERE false LIMIT 1) as lat_1 + ) as lat_2 + WHERE (SELECT 1 FROM pg_class LIMIT 1) >= ref_0.device +); + time | device | value +------+--------+------- +(0 rows) + -- #4518 -- test error handling for queries with multiple distributed hypertables SET timescaledb.remote_data_fetcher = 'copy'; @@ -201,8 +264,21 @@ WHERE EXISTS ( LATERAL (select * from metrics as ref_2) as subq_3 WHERE (SELECT device_id FROM metrics_compressed limit 1 offset 3) >= ref_0.device -); -ERROR: COPY fetcher not supported +) +ORDER BY 1, 2; +ERROR: only cursor fetcher is supported for this query +SET timescaledb.remote_data_fetcher = 'prepared'; +SELECT * FROM + conditions_dist1 ref_0 +WHERE EXISTS ( + SELECT FROM + distinct_on_distributed as ref_1, + LATERAL (select * from metrics as ref_2) as subq_3 + WHERE + (SELECT device_id FROM metrics_compressed limit 1 offset 3) >= ref_0.device +) +ORDER BY 1, 2; +ERROR: only cursor fetcher is supported for this query SET timescaledb.remote_data_fetcher = 'auto'; SELECT * FROM conditions_dist1 ref_0 @@ -213,7 +289,7 @@ WHERE EXISTS ( WHERE (SELECT device_id FROM metrics_compressed limit 1 offset 3) >= ref_0.device ) -ORDER BY 1,2; +ORDER BY 1, 2; time | device | value ------------------------------+--------+------- Sun Jan 01 06:01:00 2017 PST | 1 | 1.2 @@ -267,7 +343,7 @@ WHERE AND m.ts BETWEEN '2021-08-17 00:00:00' AND '2021-08-17 01:00:00' ORDER BY 1 DESC LIMIT 1; ERROR: cannot use COPY fetcher because the plan is parameterized --- Test copy fetcher when query is aborted before EOF due to LIMIT +-- Test fetcher when query is aborted before EOF due to LIMIT SET timescaledb.remote_data_fetcher = 'copy'; SELECT * FROM metrics_dist ORDER BY time, device_id LIMIT 11; time | device_id | v0 | v1 | v2 | v3 @@ -285,6 +361,40 @@ SELECT * FROM metrics_dist ORDER BY time, device_id LIMIT 11; Fri Dec 31 16:04:00 1999 PST | 1 | 2 | 3 | 1.5 | (11 rows) +SET timescaledb.remote_data_fetcher = 'prepared'; +SELECT * FROM metrics_dist ORDER BY time, device_id LIMIT 11; + time | device_id | v0 | v1 | v2 | v3 +------------------------------+-----------+----+----+-----+---- + Fri Dec 31 16:00:00 1999 PST | 1 | 2 | 3 | 1.5 | + Fri Dec 31 16:00:00 1999 PST | 2 | 3 | 4 | 2.5 | + Fri Dec 31 16:00:00 1999 PST | 3 | 4 | 5 | 3.5 | + Fri Dec 31 16:00:00 1999 PST | 4 | 5 | 6 | 4.5 | + Fri Dec 31 16:00:00 1999 PST | 5 | 6 | 7 | 5.5 | + Fri Dec 31 16:02:00 1999 PST | 1 | 2 | 3 | 1.5 | + Fri Dec 31 16:02:00 1999 PST | 2 | 3 | 4 | 2.5 | + Fri Dec 31 16:02:00 1999 PST | 3 | 4 | 5 | 3.5 | + Fri Dec 31 16:02:00 1999 PST | 4 | 5 | 6 | 4.5 | + Fri Dec 31 16:02:00 1999 PST | 5 | 6 | 7 | 5.5 | + Fri Dec 31 16:04:00 1999 PST | 1 | 2 | 3 | 1.5 | +(11 rows) + +SET timescaledb.remote_data_fetcher = 'cursor'; +SELECT * FROM metrics_dist ORDER BY time, device_id LIMIT 11; + time | device_id | v0 | v1 | v2 | v3 +------------------------------+-----------+----+----+-----+---- + Fri Dec 31 16:00:00 1999 PST | 1 | 2 | 3 | 1.5 | + Fri Dec 31 16:00:00 1999 PST | 2 | 3 | 4 | 2.5 | + Fri Dec 31 16:00:00 1999 PST | 3 | 4 | 5 | 3.5 | + Fri Dec 31 16:00:00 1999 PST | 4 | 5 | 6 | 4.5 | + Fri Dec 31 16:00:00 1999 PST | 5 | 6 | 7 | 5.5 | + Fri Dec 31 16:02:00 1999 PST | 1 | 2 | 3 | 1.5 | + Fri Dec 31 16:02:00 1999 PST | 2 | 3 | 4 | 2.5 | + Fri Dec 31 16:02:00 1999 PST | 3 | 4 | 5 | 3.5 | + Fri Dec 31 16:02:00 1999 PST | 4 | 5 | 6 | 4.5 | + Fri Dec 31 16:02:00 1999 PST | 5 | 6 | 7 | 5.5 | + Fri Dec 31 16:04:00 1999 PST | 1 | 2 | 3 | 1.5 | +(11 rows) + -- Verify that cursor fetcher can be rewind before EOF due to an -- intermediate JOIN product reaching LIMIT SET timescaledb.remote_data_fetcher = 'cursor'; diff --git a/tsl/test/shared/expected/dist_fetcher_type-14.out b/tsl/test/shared/expected/dist_fetcher_type-14.out index c20f3624c..593cafa7a 100644 --- a/tsl/test/shared/expected/dist_fetcher_type-14.out +++ b/tsl/test/shared/expected/dist_fetcher_type-14.out @@ -66,12 +66,17 @@ QUERY PLAN Remote SQL: SELECT id FROM public.distinct_on_distributed WHERE _timescaledb_internal.chunks_in(public.distinct_on_distributed.*, ARRAY[..]) (19 rows) --- This query can't work with copy fetcher. +-- This query can't work with copy or prepared fetcher. set timescaledb.remote_data_fetcher = 'copy'; select 1 x from distinct_on_distributed t1, distinct_on_distributed t2 where t1.id = t2.id + 1 limit 1; -ERROR: COPY fetcher not supported +ERROR: only cursor fetcher is supported for this query +set timescaledb.remote_data_fetcher = 'prepared'; +select 1 x from distinct_on_distributed t1, distinct_on_distributed t2 +where t1.id = t2.id + 1 +limit 1; +ERROR: only cursor fetcher is supported for this query -- Check once again that 'auto' is used after 'copy'. set timescaledb.remote_data_fetcher = 'auto'; select 1 x from distinct_on_distributed t1, distinct_on_distributed t2 @@ -104,7 +109,7 @@ WHERE ) AS l WHERE d.name ~~ d.name ) -ORDER BY 1,2; +ORDER BY 1, 2; device_id | name -----------+------ (0 rows) @@ -170,6 +175,26 @@ select * from disttable_with_bytea; 1001 | (2 rows) +-- Prepared statement fetcher with bytea data +set timescaledb.remote_data_fetcher = 'prepared'; +explain (analyze, verbose, costs off, timing off, summary off) +select * from disttable_with_bytea; +QUERY PLAN + Custom Scan (DataNodeScan) on public.disttable_with_bytea (actual rows=2 loops=1) + Output: disttable_with_bytea."time", disttable_with_bytea.bdata + Data node: data_node_3 + Fetcher Type: Prepared statement + Chunks: _dist_hyper_X_X_chunk + Remote SQL: SELECT "time", bdata FROM public.disttable_with_bytea WHERE _timescaledb_internal.chunks_in(public.disttable_with_bytea.*, ARRAY[..]) +(6 rows) + +select * from disttable_with_bytea; + time | bdata +------+------- + 1001 | \x + 1001 | +(2 rows) + -- #4515 test for assertion failure in copy_fetcher_close SET timescaledb.remote_data_fetcher = 'copy'; SELECT * @@ -190,6 +215,44 @@ WHERE EXISTS ( ------+--------+------- (0 rows) +SET timescaledb.remote_data_fetcher = 'prepared'; +SELECT * +FROM + conditions ref_0 +WHERE EXISTS ( + SELECT FROM + distinct_on_distributed, + LATERAL ( + SELECT * + FROM pg_class, + LATERAL ( + SELECT ref_0.device FROM pg_class WHERE false LIMIT 1) as lat_1 + ) as lat_2 + WHERE (SELECT 1 FROM pg_class LIMIT 1) >= ref_0.device +); + time | device | value +------+--------+------- +(0 rows) + +SET timescaledb.remote_data_fetcher = 'cursor'; +SELECT * +FROM + conditions ref_0 +WHERE EXISTS ( + SELECT FROM + distinct_on_distributed, + LATERAL ( + SELECT * + FROM pg_class, + LATERAL ( + SELECT ref_0.device FROM pg_class WHERE false LIMIT 1) as lat_1 + ) as lat_2 + WHERE (SELECT 1 FROM pg_class LIMIT 1) >= ref_0.device +); + time | device | value +------+--------+------- +(0 rows) + -- #4518 -- test error handling for queries with multiple distributed hypertables SET timescaledb.remote_data_fetcher = 'copy'; @@ -201,8 +264,21 @@ WHERE EXISTS ( LATERAL (select * from metrics as ref_2) as subq_3 WHERE (SELECT device_id FROM metrics_compressed limit 1 offset 3) >= ref_0.device -); -ERROR: COPY fetcher not supported +) +ORDER BY 1, 2; +ERROR: only cursor fetcher is supported for this query +SET timescaledb.remote_data_fetcher = 'prepared'; +SELECT * FROM + conditions_dist1 ref_0 +WHERE EXISTS ( + SELECT FROM + distinct_on_distributed as ref_1, + LATERAL (select * from metrics as ref_2) as subq_3 + WHERE + (SELECT device_id FROM metrics_compressed limit 1 offset 3) >= ref_0.device +) +ORDER BY 1, 2; +ERROR: only cursor fetcher is supported for this query SET timescaledb.remote_data_fetcher = 'auto'; SELECT * FROM conditions_dist1 ref_0 @@ -213,7 +289,7 @@ WHERE EXISTS ( WHERE (SELECT device_id FROM metrics_compressed limit 1 offset 3) >= ref_0.device ) -ORDER BY 1,2; +ORDER BY 1, 2; time | device | value ------------------------------+--------+------- Sun Jan 01 06:01:00 2017 PST | 1 | 1.2 @@ -267,7 +343,7 @@ WHERE AND m.ts BETWEEN '2021-08-17 00:00:00' AND '2021-08-17 01:00:00' ORDER BY 1 DESC LIMIT 1; ERROR: cannot use COPY fetcher because the plan is parameterized --- Test copy fetcher when query is aborted before EOF due to LIMIT +-- Test fetcher when query is aborted before EOF due to LIMIT SET timescaledb.remote_data_fetcher = 'copy'; SELECT * FROM metrics_dist ORDER BY time, device_id LIMIT 11; time | device_id | v0 | v1 | v2 | v3 @@ -285,6 +361,40 @@ SELECT * FROM metrics_dist ORDER BY time, device_id LIMIT 11; Fri Dec 31 16:04:00 1999 PST | 1 | 2 | 3 | 1.5 | (11 rows) +SET timescaledb.remote_data_fetcher = 'prepared'; +SELECT * FROM metrics_dist ORDER BY time, device_id LIMIT 11; + time | device_id | v0 | v1 | v2 | v3 +------------------------------+-----------+----+----+-----+---- + Fri Dec 31 16:00:00 1999 PST | 1 | 2 | 3 | 1.5 | + Fri Dec 31 16:00:00 1999 PST | 2 | 3 | 4 | 2.5 | + Fri Dec 31 16:00:00 1999 PST | 3 | 4 | 5 | 3.5 | + Fri Dec 31 16:00:00 1999 PST | 4 | 5 | 6 | 4.5 | + Fri Dec 31 16:00:00 1999 PST | 5 | 6 | 7 | 5.5 | + Fri Dec 31 16:02:00 1999 PST | 1 | 2 | 3 | 1.5 | + Fri Dec 31 16:02:00 1999 PST | 2 | 3 | 4 | 2.5 | + Fri Dec 31 16:02:00 1999 PST | 3 | 4 | 5 | 3.5 | + Fri Dec 31 16:02:00 1999 PST | 4 | 5 | 6 | 4.5 | + Fri Dec 31 16:02:00 1999 PST | 5 | 6 | 7 | 5.5 | + Fri Dec 31 16:04:00 1999 PST | 1 | 2 | 3 | 1.5 | +(11 rows) + +SET timescaledb.remote_data_fetcher = 'cursor'; +SELECT * FROM metrics_dist ORDER BY time, device_id LIMIT 11; + time | device_id | v0 | v1 | v2 | v3 +------------------------------+-----------+----+----+-----+---- + Fri Dec 31 16:00:00 1999 PST | 1 | 2 | 3 | 1.5 | + Fri Dec 31 16:00:00 1999 PST | 2 | 3 | 4 | 2.5 | + Fri Dec 31 16:00:00 1999 PST | 3 | 4 | 5 | 3.5 | + Fri Dec 31 16:00:00 1999 PST | 4 | 5 | 6 | 4.5 | + Fri Dec 31 16:00:00 1999 PST | 5 | 6 | 7 | 5.5 | + Fri Dec 31 16:02:00 1999 PST | 1 | 2 | 3 | 1.5 | + Fri Dec 31 16:02:00 1999 PST | 2 | 3 | 4 | 2.5 | + Fri Dec 31 16:02:00 1999 PST | 3 | 4 | 5 | 3.5 | + Fri Dec 31 16:02:00 1999 PST | 4 | 5 | 6 | 4.5 | + Fri Dec 31 16:02:00 1999 PST | 5 | 6 | 7 | 5.5 | + Fri Dec 31 16:04:00 1999 PST | 1 | 2 | 3 | 1.5 | +(11 rows) + -- Verify that cursor fetcher can be rewind before EOF due to an -- intermediate JOIN product reaching LIMIT SET timescaledb.remote_data_fetcher = 'cursor'; diff --git a/tsl/test/shared/expected/dist_fetcher_type-15.out b/tsl/test/shared/expected/dist_fetcher_type-15.out index b1e353ff6..7bce22016 100644 --- a/tsl/test/shared/expected/dist_fetcher_type-15.out +++ b/tsl/test/shared/expected/dist_fetcher_type-15.out @@ -67,12 +67,17 @@ QUERY PLAN Remote SQL: SELECT id FROM public.distinct_on_distributed WHERE _timescaledb_internal.chunks_in(public.distinct_on_distributed.*, ARRAY[..]) (19 rows) --- This query can't work with copy fetcher. +-- This query can't work with copy or prepared fetcher. set timescaledb.remote_data_fetcher = 'copy'; select 1 x from distinct_on_distributed t1, distinct_on_distributed t2 where t1.id = t2.id + 1 limit 1; -ERROR: COPY fetcher not supported +ERROR: only cursor fetcher is supported for this query +set timescaledb.remote_data_fetcher = 'prepared'; +select 1 x from distinct_on_distributed t1, distinct_on_distributed t2 +where t1.id = t2.id + 1 +limit 1; +ERROR: only cursor fetcher is supported for this query -- Check once again that 'auto' is used after 'copy'. set timescaledb.remote_data_fetcher = 'auto'; select 1 x from distinct_on_distributed t1, distinct_on_distributed t2 @@ -105,7 +110,7 @@ WHERE ) AS l WHERE d.name ~~ d.name ) -ORDER BY 1,2; +ORDER BY 1, 2; device_id | name -----------+------ (0 rows) @@ -171,6 +176,26 @@ select * from disttable_with_bytea; 1001 | (2 rows) +-- Prepared statement fetcher with bytea data +set timescaledb.remote_data_fetcher = 'prepared'; +explain (analyze, verbose, costs off, timing off, summary off) +select * from disttable_with_bytea; +QUERY PLAN + Custom Scan (DataNodeScan) on public.disttable_with_bytea (actual rows=2 loops=1) + Output: disttable_with_bytea."time", disttable_with_bytea.bdata + Data node: data_node_3 + Fetcher Type: Prepared statement + Chunks: _dist_hyper_X_X_chunk + Remote SQL: SELECT "time", bdata FROM public.disttable_with_bytea WHERE _timescaledb_internal.chunks_in(public.disttable_with_bytea.*, ARRAY[..]) +(6 rows) + +select * from disttable_with_bytea; + time | bdata +------+------- + 1001 | \x + 1001 | +(2 rows) + -- #4515 test for assertion failure in copy_fetcher_close SET timescaledb.remote_data_fetcher = 'copy'; SELECT * @@ -191,6 +216,44 @@ WHERE EXISTS ( ------+--------+------- (0 rows) +SET timescaledb.remote_data_fetcher = 'prepared'; +SELECT * +FROM + conditions ref_0 +WHERE EXISTS ( + SELECT FROM + distinct_on_distributed, + LATERAL ( + SELECT * + FROM pg_class, + LATERAL ( + SELECT ref_0.device FROM pg_class WHERE false LIMIT 1) as lat_1 + ) as lat_2 + WHERE (SELECT 1 FROM pg_class LIMIT 1) >= ref_0.device +); + time | device | value +------+--------+------- +(0 rows) + +SET timescaledb.remote_data_fetcher = 'cursor'; +SELECT * +FROM + conditions ref_0 +WHERE EXISTS ( + SELECT FROM + distinct_on_distributed, + LATERAL ( + SELECT * + FROM pg_class, + LATERAL ( + SELECT ref_0.device FROM pg_class WHERE false LIMIT 1) as lat_1 + ) as lat_2 + WHERE (SELECT 1 FROM pg_class LIMIT 1) >= ref_0.device +); + time | device | value +------+--------+------- +(0 rows) + -- #4518 -- test error handling for queries with multiple distributed hypertables SET timescaledb.remote_data_fetcher = 'copy'; @@ -202,8 +265,21 @@ WHERE EXISTS ( LATERAL (select * from metrics as ref_2) as subq_3 WHERE (SELECT device_id FROM metrics_compressed limit 1 offset 3) >= ref_0.device -); -ERROR: COPY fetcher not supported +) +ORDER BY 1, 2; +ERROR: only cursor fetcher is supported for this query +SET timescaledb.remote_data_fetcher = 'prepared'; +SELECT * FROM + conditions_dist1 ref_0 +WHERE EXISTS ( + SELECT FROM + distinct_on_distributed as ref_1, + LATERAL (select * from metrics as ref_2) as subq_3 + WHERE + (SELECT device_id FROM metrics_compressed limit 1 offset 3) >= ref_0.device +) +ORDER BY 1, 2; +ERROR: only cursor fetcher is supported for this query SET timescaledb.remote_data_fetcher = 'auto'; SELECT * FROM conditions_dist1 ref_0 @@ -214,7 +290,7 @@ WHERE EXISTS ( WHERE (SELECT device_id FROM metrics_compressed limit 1 offset 3) >= ref_0.device ) -ORDER BY 1,2; +ORDER BY 1, 2; time | device | value ------------------------------+--------+------- Sun Jan 01 06:01:00 2017 PST | 1 | 1.2 @@ -268,7 +344,7 @@ WHERE AND m.ts BETWEEN '2021-08-17 00:00:00' AND '2021-08-17 01:00:00' ORDER BY 1 DESC LIMIT 1; ERROR: cannot use COPY fetcher because the plan is parameterized --- Test copy fetcher when query is aborted before EOF due to LIMIT +-- Test fetcher when query is aborted before EOF due to LIMIT SET timescaledb.remote_data_fetcher = 'copy'; SELECT * FROM metrics_dist ORDER BY time, device_id LIMIT 11; time | device_id | v0 | v1 | v2 | v3 @@ -286,6 +362,40 @@ SELECT * FROM metrics_dist ORDER BY time, device_id LIMIT 11; Fri Dec 31 16:04:00 1999 PST | 1 | 2 | 3 | 1.5 | (11 rows) +SET timescaledb.remote_data_fetcher = 'prepared'; +SELECT * FROM metrics_dist ORDER BY time, device_id LIMIT 11; + time | device_id | v0 | v1 | v2 | v3 +------------------------------+-----------+----+----+-----+---- + Fri Dec 31 16:00:00 1999 PST | 1 | 2 | 3 | 1.5 | + Fri Dec 31 16:00:00 1999 PST | 2 | 3 | 4 | 2.5 | + Fri Dec 31 16:00:00 1999 PST | 3 | 4 | 5 | 3.5 | + Fri Dec 31 16:00:00 1999 PST | 4 | 5 | 6 | 4.5 | + Fri Dec 31 16:00:00 1999 PST | 5 | 6 | 7 | 5.5 | + Fri Dec 31 16:02:00 1999 PST | 1 | 2 | 3 | 1.5 | + Fri Dec 31 16:02:00 1999 PST | 2 | 3 | 4 | 2.5 | + Fri Dec 31 16:02:00 1999 PST | 3 | 4 | 5 | 3.5 | + Fri Dec 31 16:02:00 1999 PST | 4 | 5 | 6 | 4.5 | + Fri Dec 31 16:02:00 1999 PST | 5 | 6 | 7 | 5.5 | + Fri Dec 31 16:04:00 1999 PST | 1 | 2 | 3 | 1.5 | +(11 rows) + +SET timescaledb.remote_data_fetcher = 'cursor'; +SELECT * FROM metrics_dist ORDER BY time, device_id LIMIT 11; + time | device_id | v0 | v1 | v2 | v3 +------------------------------+-----------+----+----+-----+---- + Fri Dec 31 16:00:00 1999 PST | 1 | 2 | 3 | 1.5 | + Fri Dec 31 16:00:00 1999 PST | 2 | 3 | 4 | 2.5 | + Fri Dec 31 16:00:00 1999 PST | 3 | 4 | 5 | 3.5 | + Fri Dec 31 16:00:00 1999 PST | 4 | 5 | 6 | 4.5 | + Fri Dec 31 16:00:00 1999 PST | 5 | 6 | 7 | 5.5 | + Fri Dec 31 16:02:00 1999 PST | 1 | 2 | 3 | 1.5 | + Fri Dec 31 16:02:00 1999 PST | 2 | 3 | 4 | 2.5 | + Fri Dec 31 16:02:00 1999 PST | 3 | 4 | 5 | 3.5 | + Fri Dec 31 16:02:00 1999 PST | 4 | 5 | 6 | 4.5 | + Fri Dec 31 16:02:00 1999 PST | 5 | 6 | 7 | 5.5 | + Fri Dec 31 16:04:00 1999 PST | 1 | 2 | 3 | 1.5 | +(11 rows) + -- Verify that cursor fetcher can be rewind before EOF due to an -- intermediate JOIN product reaching LIMIT SET timescaledb.remote_data_fetcher = 'cursor'; diff --git a/tsl/test/shared/expected/dist_remote_error-12.out b/tsl/test/shared/expected/dist_remote_error-12.out deleted file mode 100644 index 4acbf5f01..000000000 --- a/tsl/test/shared/expected/dist_remote_error-12.out +++ /dev/null @@ -1,229 +0,0 @@ --- This file and its contents are licensed under the Timescale License. --- Please see the included NOTICE for copyright information and --- LICENSE-TIMESCALE for a copy of the license. --- Import setup file to data nodes. -\unset ECHO --- Disable SSL to get stable error output across versions. SSL adds some output --- that changed in PG 14. -set timescaledb.debug_enable_ssl to off; -set client_min_messages to error; -SET timescaledb.hide_data_node_name_in_errors = 'on'; --- A relatively big table on one data node -create table metrics_dist_remote_error(like metrics_dist); -select table_name from create_distributed_hypertable('metrics_dist_remote_error', 'time', 'device_id', - data_nodes => '{"data_node_1"}'); - table_name - metrics_dist_remote_error -(1 row) - -insert into metrics_dist_remote_error select * from metrics_dist order by metrics_dist limit 20000; --- The error messages vary wildly between the Postgres versions, dependent on --- the particular behavior of libqp in this or that case. The purpose of this --- test is not to solidify this accidental behavior, but to merely exercise the --- error handling code to make sure it doesn't have fatal errors. Unfortunately, --- there is no way to suppress error output from a psql script. -set client_min_messages to ERROR; -\set ON_ERROR_STOP off -set timescaledb.remote_data_fetcher = 'copy'; -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(0, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 0 rows, 1 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(1, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 1 rows, 1 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(2, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 2 rows, 2 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(701, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 701 rows, 701 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 10000 rows, 10000 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(16384, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 16384 rows, 16384 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000000, device_id)::int != 0; -QUERY PLAN - Custom Scan (DataNodeScan) on public.metrics_dist_remote_error (actual rows=20000 loops=1) - Output: 1 - Data node: data_node_1 - Fetcher Type: COPY - Chunks: _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk - Remote SQL: SELECT NULL FROM public.metrics_dist_remote_error WHERE _timescaledb_internal.chunks_in(public.metrics_dist_remote_error.*, ARRAY[..]) AND ((public.ts_debug_shippable_error_after_n_rows(10000000, device_id) <> 0)) -(6 rows) - --- We don't test fatal errors here, because PG versions before 14 are unable to --- report them properly to the access node, so we get different errors in these --- versions. --- Now test the same with the cursor fetcher. -set timescaledb.remote_data_fetcher = 'cursor'; -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(0, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 0 rows, 1 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(1, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 1 rows, 1 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(2, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 2 rows, 2 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(701, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 701 rows, 701 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 10000 rows, 10000 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000000, device_id)::int != 0; -QUERY PLAN - Custom Scan (DataNodeScan) on public.metrics_dist_remote_error (actual rows=20000 loops=1) - Output: 1 - Data node: data_node_1 - Fetcher Type: Cursor - Chunks: _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk - Remote SQL: SELECT NULL FROM public.metrics_dist_remote_error WHERE _timescaledb_internal.chunks_in(public.metrics_dist_remote_error.*, ARRAY[..]) AND ((public.ts_debug_shippable_error_after_n_rows(10000000, device_id) <> 0)) -(6 rows) - --- Table with broken send for a data type. -create table metrics_dist_bs(like metrics_dist); -alter table metrics_dist_bs alter column v0 type bs; -select table_name from create_distributed_hypertable('metrics_dist_bs', - 'time', 'device_id'); - table_name - metrics_dist_bs -(1 row) - -set timescaledb.enable_connection_binary_data to off; -insert into metrics_dist_bs - select * from metrics_dist_remote_error; -set timescaledb.enable_connection_binary_data to on; -explain (analyze, verbose, costs off, timing off, summary off) -select * from metrics_dist_bs; -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -drop table metrics_dist_bs; --- Table with broken receive for a data type. -create table metrics_dist_br(like metrics_dist); -alter table metrics_dist_br alter column v0 type br; -select table_name from create_distributed_hypertable('metrics_dist_br', - 'time', 'device_id'); - table_name - metrics_dist_br -(1 row) - -select hypertable_name, replication_factor from timescaledb_information.hypertables -where hypertable_name = 'metrics_dist_br'; - hypertable_name | replication_factor ------------------+-------------------- - metrics_dist_br | 1 -(1 row) - --- Test that INSERT and COPY fail on data nodes. --- Note that we use the text format for the COPY input, so that the access node --- doesn't call `recv` and fail by itself. It's going to use binary format for --- transfer to data nodes regardless of the input format. -set timescaledb.dist_copy_transfer_format = 'binary'; --- First, create the reference. -\copy (select * from metrics_dist_remote_error) to 'dist_remote_error.text' with (format text); --- We have to test various interleavings of COPY and INSERT to check that --- one can recover from connection failure states introduced by another. -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -insert into metrics_dist_br select * from metrics_dist_remote_error; -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -insert into metrics_dist_br select * from metrics_dist_remote_error; -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen --- Fail at different points -set timescaledb.debug_broken_sendrecv_throw_after = 1; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1 rows, 1 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 2; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 2 rows, 2 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 1023; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1023 rows, 1023 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 1024; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1024 rows, 1024 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 1025; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1025 rows, 1025 rows seen -reset timescaledb.debug_broken_sendrecv_throw_after; --- Same with different replication factor -truncate metrics_dist_br; -select set_replication_factor('metrics_dist_br', 2); - set_replication_factor - -(1 row) - -select hypertable_name, replication_factor from timescaledb_information.hypertables -where hypertable_name = 'metrics_dist_br'; - hypertable_name | replication_factor ------------------+-------------------- - metrics_dist_br | 2 -(1 row) - -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -insert into metrics_dist_br select * from metrics_dist_remote_error; -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -insert into metrics_dist_br select * from metrics_dist_remote_error; -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 1; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1 rows, 1 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 2; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 2 rows, 2 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 1023; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1023 rows, 1023 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 1024; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1024 rows, 1024 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 1025; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1025 rows, 1025 rows seen --- Should succeed with text format for data transfer. -set timescaledb.dist_copy_transfer_format = 'text'; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); --- Final check. -set timescaledb.enable_connection_binary_data = false; -select count(*) from metrics_dist_br; - count - 20000 -(1 row) - -set timescaledb.enable_connection_binary_data = true; -reset timescaledb.debug_broken_sendrecv_throw_after; -drop table metrics_dist_br; --- Table with sleepy receive for a data type, to improve coverage of the waiting --- code on the access node. -create table metrics_dist_bl(like metrics_dist); -alter table metrics_dist_bl alter column v0 type bl; -select table_name from create_distributed_hypertable('metrics_dist_bl', - 'time', 'device_id'); - table_name - metrics_dist_bl -(1 row) - --- We're using sleepy recv function, so need the binary transfer format for it --- to be called on the data nodes. -set timescaledb.dist_copy_transfer_format = 'binary'; --- Test INSERT and COPY with slow data node. -\copy metrics_dist_bl from 'dist_remote_error.text' with (format text); -insert into metrics_dist_bl select * from metrics_dist_remote_error; -select count(*) from metrics_dist_bl; - count - 40000 -(1 row) - -drop table metrics_dist_bl; -drop table metrics_dist_remote_error; diff --git a/tsl/test/shared/expected/dist_remote_error-13.out b/tsl/test/shared/expected/dist_remote_error-13.out deleted file mode 100644 index 4acbf5f01..000000000 --- a/tsl/test/shared/expected/dist_remote_error-13.out +++ /dev/null @@ -1,229 +0,0 @@ --- This file and its contents are licensed under the Timescale License. --- Please see the included NOTICE for copyright information and --- LICENSE-TIMESCALE for a copy of the license. --- Import setup file to data nodes. -\unset ECHO --- Disable SSL to get stable error output across versions. SSL adds some output --- that changed in PG 14. -set timescaledb.debug_enable_ssl to off; -set client_min_messages to error; -SET timescaledb.hide_data_node_name_in_errors = 'on'; --- A relatively big table on one data node -create table metrics_dist_remote_error(like metrics_dist); -select table_name from create_distributed_hypertable('metrics_dist_remote_error', 'time', 'device_id', - data_nodes => '{"data_node_1"}'); - table_name - metrics_dist_remote_error -(1 row) - -insert into metrics_dist_remote_error select * from metrics_dist order by metrics_dist limit 20000; --- The error messages vary wildly between the Postgres versions, dependent on --- the particular behavior of libqp in this or that case. The purpose of this --- test is not to solidify this accidental behavior, but to merely exercise the --- error handling code to make sure it doesn't have fatal errors. Unfortunately, --- there is no way to suppress error output from a psql script. -set client_min_messages to ERROR; -\set ON_ERROR_STOP off -set timescaledb.remote_data_fetcher = 'copy'; -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(0, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 0 rows, 1 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(1, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 1 rows, 1 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(2, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 2 rows, 2 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(701, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 701 rows, 701 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 10000 rows, 10000 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(16384, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 16384 rows, 16384 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000000, device_id)::int != 0; -QUERY PLAN - Custom Scan (DataNodeScan) on public.metrics_dist_remote_error (actual rows=20000 loops=1) - Output: 1 - Data node: data_node_1 - Fetcher Type: COPY - Chunks: _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk - Remote SQL: SELECT NULL FROM public.metrics_dist_remote_error WHERE _timescaledb_internal.chunks_in(public.metrics_dist_remote_error.*, ARRAY[..]) AND ((public.ts_debug_shippable_error_after_n_rows(10000000, device_id) <> 0)) -(6 rows) - --- We don't test fatal errors here, because PG versions before 14 are unable to --- report them properly to the access node, so we get different errors in these --- versions. --- Now test the same with the cursor fetcher. -set timescaledb.remote_data_fetcher = 'cursor'; -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(0, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 0 rows, 1 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(1, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 1 rows, 1 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(2, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 2 rows, 2 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(701, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 701 rows, 701 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 10000 rows, 10000 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000000, device_id)::int != 0; -QUERY PLAN - Custom Scan (DataNodeScan) on public.metrics_dist_remote_error (actual rows=20000 loops=1) - Output: 1 - Data node: data_node_1 - Fetcher Type: Cursor - Chunks: _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk - Remote SQL: SELECT NULL FROM public.metrics_dist_remote_error WHERE _timescaledb_internal.chunks_in(public.metrics_dist_remote_error.*, ARRAY[..]) AND ((public.ts_debug_shippable_error_after_n_rows(10000000, device_id) <> 0)) -(6 rows) - --- Table with broken send for a data type. -create table metrics_dist_bs(like metrics_dist); -alter table metrics_dist_bs alter column v0 type bs; -select table_name from create_distributed_hypertable('metrics_dist_bs', - 'time', 'device_id'); - table_name - metrics_dist_bs -(1 row) - -set timescaledb.enable_connection_binary_data to off; -insert into metrics_dist_bs - select * from metrics_dist_remote_error; -set timescaledb.enable_connection_binary_data to on; -explain (analyze, verbose, costs off, timing off, summary off) -select * from metrics_dist_bs; -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -drop table metrics_dist_bs; --- Table with broken receive for a data type. -create table metrics_dist_br(like metrics_dist); -alter table metrics_dist_br alter column v0 type br; -select table_name from create_distributed_hypertable('metrics_dist_br', - 'time', 'device_id'); - table_name - metrics_dist_br -(1 row) - -select hypertable_name, replication_factor from timescaledb_information.hypertables -where hypertable_name = 'metrics_dist_br'; - hypertable_name | replication_factor ------------------+-------------------- - metrics_dist_br | 1 -(1 row) - --- Test that INSERT and COPY fail on data nodes. --- Note that we use the text format for the COPY input, so that the access node --- doesn't call `recv` and fail by itself. It's going to use binary format for --- transfer to data nodes regardless of the input format. -set timescaledb.dist_copy_transfer_format = 'binary'; --- First, create the reference. -\copy (select * from metrics_dist_remote_error) to 'dist_remote_error.text' with (format text); --- We have to test various interleavings of COPY and INSERT to check that --- one can recover from connection failure states introduced by another. -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -insert into metrics_dist_br select * from metrics_dist_remote_error; -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -insert into metrics_dist_br select * from metrics_dist_remote_error; -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen --- Fail at different points -set timescaledb.debug_broken_sendrecv_throw_after = 1; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1 rows, 1 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 2; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 2 rows, 2 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 1023; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1023 rows, 1023 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 1024; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1024 rows, 1024 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 1025; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1025 rows, 1025 rows seen -reset timescaledb.debug_broken_sendrecv_throw_after; --- Same with different replication factor -truncate metrics_dist_br; -select set_replication_factor('metrics_dist_br', 2); - set_replication_factor - -(1 row) - -select hypertable_name, replication_factor from timescaledb_information.hypertables -where hypertable_name = 'metrics_dist_br'; - hypertable_name | replication_factor ------------------+-------------------- - metrics_dist_br | 2 -(1 row) - -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -insert into metrics_dist_br select * from metrics_dist_remote_error; -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -insert into metrics_dist_br select * from metrics_dist_remote_error; -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 1; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1 rows, 1 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 2; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 2 rows, 2 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 1023; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1023 rows, 1023 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 1024; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1024 rows, 1024 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 1025; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1025 rows, 1025 rows seen --- Should succeed with text format for data transfer. -set timescaledb.dist_copy_transfer_format = 'text'; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); --- Final check. -set timescaledb.enable_connection_binary_data = false; -select count(*) from metrics_dist_br; - count - 20000 -(1 row) - -set timescaledb.enable_connection_binary_data = true; -reset timescaledb.debug_broken_sendrecv_throw_after; -drop table metrics_dist_br; --- Table with sleepy receive for a data type, to improve coverage of the waiting --- code on the access node. -create table metrics_dist_bl(like metrics_dist); -alter table metrics_dist_bl alter column v0 type bl; -select table_name from create_distributed_hypertable('metrics_dist_bl', - 'time', 'device_id'); - table_name - metrics_dist_bl -(1 row) - --- We're using sleepy recv function, so need the binary transfer format for it --- to be called on the data nodes. -set timescaledb.dist_copy_transfer_format = 'binary'; --- Test INSERT and COPY with slow data node. -\copy metrics_dist_bl from 'dist_remote_error.text' with (format text); -insert into metrics_dist_bl select * from metrics_dist_remote_error; -select count(*) from metrics_dist_bl; - count - 40000 -(1 row) - -drop table metrics_dist_bl; -drop table metrics_dist_remote_error; diff --git a/tsl/test/shared/expected/dist_remote_error-14.out b/tsl/test/shared/expected/dist_remote_error-14.out deleted file mode 100644 index 4acbf5f01..000000000 --- a/tsl/test/shared/expected/dist_remote_error-14.out +++ /dev/null @@ -1,229 +0,0 @@ --- This file and its contents are licensed under the Timescale License. --- Please see the included NOTICE for copyright information and --- LICENSE-TIMESCALE for a copy of the license. --- Import setup file to data nodes. -\unset ECHO --- Disable SSL to get stable error output across versions. SSL adds some output --- that changed in PG 14. -set timescaledb.debug_enable_ssl to off; -set client_min_messages to error; -SET timescaledb.hide_data_node_name_in_errors = 'on'; --- A relatively big table on one data node -create table metrics_dist_remote_error(like metrics_dist); -select table_name from create_distributed_hypertable('metrics_dist_remote_error', 'time', 'device_id', - data_nodes => '{"data_node_1"}'); - table_name - metrics_dist_remote_error -(1 row) - -insert into metrics_dist_remote_error select * from metrics_dist order by metrics_dist limit 20000; --- The error messages vary wildly between the Postgres versions, dependent on --- the particular behavior of libqp in this or that case. The purpose of this --- test is not to solidify this accidental behavior, but to merely exercise the --- error handling code to make sure it doesn't have fatal errors. Unfortunately, --- there is no way to suppress error output from a psql script. -set client_min_messages to ERROR; -\set ON_ERROR_STOP off -set timescaledb.remote_data_fetcher = 'copy'; -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(0, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 0 rows, 1 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(1, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 1 rows, 1 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(2, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 2 rows, 2 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(701, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 701 rows, 701 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 10000 rows, 10000 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(16384, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 16384 rows, 16384 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000000, device_id)::int != 0; -QUERY PLAN - Custom Scan (DataNodeScan) on public.metrics_dist_remote_error (actual rows=20000 loops=1) - Output: 1 - Data node: data_node_1 - Fetcher Type: COPY - Chunks: _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk - Remote SQL: SELECT NULL FROM public.metrics_dist_remote_error WHERE _timescaledb_internal.chunks_in(public.metrics_dist_remote_error.*, ARRAY[..]) AND ((public.ts_debug_shippable_error_after_n_rows(10000000, device_id) <> 0)) -(6 rows) - --- We don't test fatal errors here, because PG versions before 14 are unable to --- report them properly to the access node, so we get different errors in these --- versions. --- Now test the same with the cursor fetcher. -set timescaledb.remote_data_fetcher = 'cursor'; -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(0, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 0 rows, 1 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(1, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 1 rows, 1 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(2, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 2 rows, 2 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(701, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 701 rows, 701 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 10000 rows, 10000 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000000, device_id)::int != 0; -QUERY PLAN - Custom Scan (DataNodeScan) on public.metrics_dist_remote_error (actual rows=20000 loops=1) - Output: 1 - Data node: data_node_1 - Fetcher Type: Cursor - Chunks: _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk - Remote SQL: SELECT NULL FROM public.metrics_dist_remote_error WHERE _timescaledb_internal.chunks_in(public.metrics_dist_remote_error.*, ARRAY[..]) AND ((public.ts_debug_shippable_error_after_n_rows(10000000, device_id) <> 0)) -(6 rows) - --- Table with broken send for a data type. -create table metrics_dist_bs(like metrics_dist); -alter table metrics_dist_bs alter column v0 type bs; -select table_name from create_distributed_hypertable('metrics_dist_bs', - 'time', 'device_id'); - table_name - metrics_dist_bs -(1 row) - -set timescaledb.enable_connection_binary_data to off; -insert into metrics_dist_bs - select * from metrics_dist_remote_error; -set timescaledb.enable_connection_binary_data to on; -explain (analyze, verbose, costs off, timing off, summary off) -select * from metrics_dist_bs; -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -drop table metrics_dist_bs; --- Table with broken receive for a data type. -create table metrics_dist_br(like metrics_dist); -alter table metrics_dist_br alter column v0 type br; -select table_name from create_distributed_hypertable('metrics_dist_br', - 'time', 'device_id'); - table_name - metrics_dist_br -(1 row) - -select hypertable_name, replication_factor from timescaledb_information.hypertables -where hypertable_name = 'metrics_dist_br'; - hypertable_name | replication_factor ------------------+-------------------- - metrics_dist_br | 1 -(1 row) - --- Test that INSERT and COPY fail on data nodes. --- Note that we use the text format for the COPY input, so that the access node --- doesn't call `recv` and fail by itself. It's going to use binary format for --- transfer to data nodes regardless of the input format. -set timescaledb.dist_copy_transfer_format = 'binary'; --- First, create the reference. -\copy (select * from metrics_dist_remote_error) to 'dist_remote_error.text' with (format text); --- We have to test various interleavings of COPY and INSERT to check that --- one can recover from connection failure states introduced by another. -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -insert into metrics_dist_br select * from metrics_dist_remote_error; -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -insert into metrics_dist_br select * from metrics_dist_remote_error; -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen --- Fail at different points -set timescaledb.debug_broken_sendrecv_throw_after = 1; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1 rows, 1 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 2; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 2 rows, 2 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 1023; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1023 rows, 1023 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 1024; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1024 rows, 1024 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 1025; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1025 rows, 1025 rows seen -reset timescaledb.debug_broken_sendrecv_throw_after; --- Same with different replication factor -truncate metrics_dist_br; -select set_replication_factor('metrics_dist_br', 2); - set_replication_factor - -(1 row) - -select hypertable_name, replication_factor from timescaledb_information.hypertables -where hypertable_name = 'metrics_dist_br'; - hypertable_name | replication_factor ------------------+-------------------- - metrics_dist_br | 2 -(1 row) - -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -insert into metrics_dist_br select * from metrics_dist_remote_error; -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -insert into metrics_dist_br select * from metrics_dist_remote_error; -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 1; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1 rows, 1 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 2; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 2 rows, 2 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 1023; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1023 rows, 1023 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 1024; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1024 rows, 1024 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 1025; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1025 rows, 1025 rows seen --- Should succeed with text format for data transfer. -set timescaledb.dist_copy_transfer_format = 'text'; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); --- Final check. -set timescaledb.enable_connection_binary_data = false; -select count(*) from metrics_dist_br; - count - 20000 -(1 row) - -set timescaledb.enable_connection_binary_data = true; -reset timescaledb.debug_broken_sendrecv_throw_after; -drop table metrics_dist_br; --- Table with sleepy receive for a data type, to improve coverage of the waiting --- code on the access node. -create table metrics_dist_bl(like metrics_dist); -alter table metrics_dist_bl alter column v0 type bl; -select table_name from create_distributed_hypertable('metrics_dist_bl', - 'time', 'device_id'); - table_name - metrics_dist_bl -(1 row) - --- We're using sleepy recv function, so need the binary transfer format for it --- to be called on the data nodes. -set timescaledb.dist_copy_transfer_format = 'binary'; --- Test INSERT and COPY with slow data node. -\copy metrics_dist_bl from 'dist_remote_error.text' with (format text); -insert into metrics_dist_bl select * from metrics_dist_remote_error; -select count(*) from metrics_dist_bl; - count - 40000 -(1 row) - -drop table metrics_dist_bl; -drop table metrics_dist_remote_error; diff --git a/tsl/test/shared/expected/dist_remote_error-15.out b/tsl/test/shared/expected/dist_remote_error-15.out deleted file mode 100644 index 007beacda..000000000 --- a/tsl/test/shared/expected/dist_remote_error-15.out +++ /dev/null @@ -1,231 +0,0 @@ --- This file and its contents are licensed under the Timescale License. --- Please see the included NOTICE for copyright information and --- LICENSE-TIMESCALE for a copy of the license. --- Import setup file to data nodes. -\unset ECHO --- Disable SSL to get stable error output across versions. SSL adds some output --- that changed in PG 14. -set timescaledb.debug_enable_ssl to off; -set client_min_messages to error; -SET timescaledb.hide_data_node_name_in_errors = 'on'; --- A relatively big table on one data node -create table metrics_dist_remote_error(like metrics_dist); -select table_name from create_distributed_hypertable('metrics_dist_remote_error', 'time', 'device_id', - data_nodes => '{"data_node_1"}'); - table_name - metrics_dist_remote_error -(1 row) - -insert into metrics_dist_remote_error select * from metrics_dist order by metrics_dist limit 20000; --- The error messages vary wildly between the Postgres versions, dependent on --- the particular behavior of libqp in this or that case. The purpose of this --- test is not to solidify this accidental behavior, but to merely exercise the --- error handling code to make sure it doesn't have fatal errors. Unfortunately, --- there is no way to suppress error output from a psql script. -set client_min_messages to ERROR; -\set ON_ERROR_STOP off -set timescaledb.remote_data_fetcher = 'copy'; -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(0, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 0 rows, 1 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(1, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 1 rows, 1 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(2, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 2 rows, 2 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(701, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 701 rows, 701 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 10000 rows, 10000 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(16384, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 16384 rows, 16384 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000000, device_id)::int != 0; -QUERY PLAN - Result (actual rows=20000 loops=1) - Output: 1 - -> Custom Scan (DataNodeScan) on public.metrics_dist_remote_error (actual rows=20000 loops=1) - Data node: data_node_1 - Fetcher Type: COPY - Chunks: _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk - Remote SQL: SELECT NULL FROM public.metrics_dist_remote_error WHERE _timescaledb_internal.chunks_in(public.metrics_dist_remote_error.*, ARRAY[..]) AND ((public.ts_debug_shippable_error_after_n_rows(10000000, device_id) <> 0)) -(7 rows) - --- We don't test fatal errors here, because PG versions before 14 are unable to --- report them properly to the access node, so we get different errors in these --- versions. --- Now test the same with the cursor fetcher. -set timescaledb.remote_data_fetcher = 'cursor'; -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(0, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 0 rows, 1 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(1, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 1 rows, 1 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(2, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 2 rows, 2 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(701, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 701 rows, 701 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000, device_id)::int != 0; -ERROR: []: debug point: requested to error out after 10000 rows, 10000 rows seen -explain (analyze, verbose, costs off, timing off, summary off) -select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000000, device_id)::int != 0; -QUERY PLAN - Result (actual rows=20000 loops=1) - Output: 1 - -> Custom Scan (DataNodeScan) on public.metrics_dist_remote_error (actual rows=20000 loops=1) - Data node: data_node_1 - Fetcher Type: Cursor - Chunks: _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk - Remote SQL: SELECT NULL FROM public.metrics_dist_remote_error WHERE _timescaledb_internal.chunks_in(public.metrics_dist_remote_error.*, ARRAY[..]) AND ((public.ts_debug_shippable_error_after_n_rows(10000000, device_id) <> 0)) -(7 rows) - --- Table with broken send for a data type. -create table metrics_dist_bs(like metrics_dist); -alter table metrics_dist_bs alter column v0 type bs; -select table_name from create_distributed_hypertable('metrics_dist_bs', - 'time', 'device_id'); - table_name - metrics_dist_bs -(1 row) - -set timescaledb.enable_connection_binary_data to off; -insert into metrics_dist_bs - select * from metrics_dist_remote_error; -set timescaledb.enable_connection_binary_data to on; -explain (analyze, verbose, costs off, timing off, summary off) -select * from metrics_dist_bs; -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -drop table metrics_dist_bs; --- Table with broken receive for a data type. -create table metrics_dist_br(like metrics_dist); -alter table metrics_dist_br alter column v0 type br; -select table_name from create_distributed_hypertable('metrics_dist_br', - 'time', 'device_id'); - table_name - metrics_dist_br -(1 row) - -select hypertable_name, replication_factor from timescaledb_information.hypertables -where hypertable_name = 'metrics_dist_br'; - hypertable_name | replication_factor ------------------+-------------------- - metrics_dist_br | 1 -(1 row) - --- Test that INSERT and COPY fail on data nodes. --- Note that we use the text format for the COPY input, so that the access node --- doesn't call `recv` and fail by itself. It's going to use binary format for --- transfer to data nodes regardless of the input format. -set timescaledb.dist_copy_transfer_format = 'binary'; --- First, create the reference. -\copy (select * from metrics_dist_remote_error) to 'dist_remote_error.text' with (format text); --- We have to test various interleavings of COPY and INSERT to check that --- one can recover from connection failure states introduced by another. -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -insert into metrics_dist_br select * from metrics_dist_remote_error; -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -insert into metrics_dist_br select * from metrics_dist_remote_error; -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen --- Fail at different points -set timescaledb.debug_broken_sendrecv_throw_after = 1; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1 rows, 1 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 2; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 2 rows, 2 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 1023; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1023 rows, 1023 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 1024; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1024 rows, 1024 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 1025; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1025 rows, 1025 rows seen -reset timescaledb.debug_broken_sendrecv_throw_after; --- Same with different replication factor -truncate metrics_dist_br; -select set_replication_factor('metrics_dist_br', 2); - set_replication_factor - -(1 row) - -select hypertable_name, replication_factor from timescaledb_information.hypertables -where hypertable_name = 'metrics_dist_br'; - hypertable_name | replication_factor ------------------+-------------------- - metrics_dist_br | 2 -(1 row) - -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -insert into metrics_dist_br select * from metrics_dist_remote_error; -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -insert into metrics_dist_br select * from metrics_dist_remote_error; -ERROR: []: debug point: requested to error out after 7103 rows, 7103 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 1; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1 rows, 1 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 2; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 2 rows, 2 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 1023; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1023 rows, 1023 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 1024; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1024 rows, 1024 rows seen -set timescaledb.debug_broken_sendrecv_throw_after = 1025; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); -ERROR: []: debug point: requested to error out after 1025 rows, 1025 rows seen --- Should succeed with text format for data transfer. -set timescaledb.dist_copy_transfer_format = 'text'; -\copy metrics_dist_br from 'dist_remote_error.text' with (format text); --- Final check. -set timescaledb.enable_connection_binary_data = false; -select count(*) from metrics_dist_br; - count - 20000 -(1 row) - -set timescaledb.enable_connection_binary_data = true; -reset timescaledb.debug_broken_sendrecv_throw_after; -drop table metrics_dist_br; --- Table with sleepy receive for a data type, to improve coverage of the waiting --- code on the access node. -create table metrics_dist_bl(like metrics_dist); -alter table metrics_dist_bl alter column v0 type bl; -select table_name from create_distributed_hypertable('metrics_dist_bl', - 'time', 'device_id'); - table_name - metrics_dist_bl -(1 row) - --- We're using sleepy recv function, so need the binary transfer format for it --- to be called on the data nodes. -set timescaledb.dist_copy_transfer_format = 'binary'; --- Test INSERT and COPY with slow data node. -\copy metrics_dist_bl from 'dist_remote_error.text' with (format text); -insert into metrics_dist_bl select * from metrics_dist_remote_error; -select count(*) from metrics_dist_bl; - count - 40000 -(1 row) - -drop table metrics_dist_bl; -drop table metrics_dist_remote_error; diff --git a/tsl/test/shared/sql/CMakeLists.txt b/tsl/test/shared/sql/CMakeLists.txt index a26c6eea0..ead5a1629 100644 --- a/tsl/test/shared/sql/CMakeLists.txt +++ b/tsl/test/shared/sql/CMakeLists.txt @@ -29,8 +29,7 @@ endif() if(CMAKE_BUILD_TYPE MATCHES Debug) list(APPEND TEST_FILES_SHARED dist_parallel_agg.sql dist_queries.sql timestamp_limits.sql with_clause_parser.sql) - list(APPEND TEST_TEMPLATES_SHARED constify_now.sql.in - dist_remote_error.sql.in) + list(APPEND TEST_TEMPLATES_SHARED constify_now.sql.in) endif(CMAKE_BUILD_TYPE MATCHES Debug) # Regression tests that vary with PostgreSQL version. Generated test files are diff --git a/tsl/test/shared/sql/dist_fetcher_type.sql.in b/tsl/test/shared/sql/dist_fetcher_type.sql.in index 932b37c51..35fc22a3d 100644 --- a/tsl/test/shared/sql/dist_fetcher_type.sql.in +++ b/tsl/test/shared/sql/dist_fetcher_type.sql.in @@ -28,12 +28,17 @@ select 1 x from distinct_on_distributed t1, distinct_on_distributed t2 where t1.id = t2.id limit 1; --- This query can't work with copy fetcher. +-- This query can't work with copy or prepared fetcher. set timescaledb.remote_data_fetcher = 'copy'; select 1 x from distinct_on_distributed t1, distinct_on_distributed t2 where t1.id = t2.id + 1 limit 1; +set timescaledb.remote_data_fetcher = 'prepared'; +select 1 x from distinct_on_distributed t1, distinct_on_distributed t2 +where t1.id = t2.id + 1 +limit 1; + -- Check once again that 'auto' is used after 'copy'. set timescaledb.remote_data_fetcher = 'auto'; select 1 x from distinct_on_distributed t1, distinct_on_distributed t2 @@ -64,7 +69,7 @@ WHERE ) AS l WHERE d.name ~~ d.name ) -ORDER BY 1,2; +ORDER BY 1, 2; RESET jit; @@ -96,8 +101,16 @@ explain (analyze, verbose, costs off, timing off, summary off) select * from disttable_with_bytea; select * from disttable_with_bytea; +-- Prepared statement fetcher with bytea data +set timescaledb.remote_data_fetcher = 'prepared'; + +explain (analyze, verbose, costs off, timing off, summary off) +select * from disttable_with_bytea; +select * from disttable_with_bytea; + -- #4515 test for assertion failure in copy_fetcher_close SET timescaledb.remote_data_fetcher = 'copy'; + SELECT * FROM conditions ref_0 @@ -113,6 +126,41 @@ WHERE EXISTS ( WHERE (SELECT 1 FROM pg_class LIMIT 1) >= ref_0.device ); +SET timescaledb.remote_data_fetcher = 'prepared'; + +SELECT * +FROM + conditions ref_0 +WHERE EXISTS ( + SELECT FROM + distinct_on_distributed, + LATERAL ( + SELECT * + FROM pg_class, + LATERAL ( + SELECT ref_0.device FROM pg_class WHERE false LIMIT 1) as lat_1 + ) as lat_2 + WHERE (SELECT 1 FROM pg_class LIMIT 1) >= ref_0.device +); + +SET timescaledb.remote_data_fetcher = 'cursor'; + +SELECT * +FROM + conditions ref_0 +WHERE EXISTS ( + SELECT FROM + distinct_on_distributed, + LATERAL ( + SELECT * + FROM pg_class, + LATERAL ( + SELECT ref_0.device FROM pg_class WHERE false LIMIT 1) as lat_1 + ) as lat_2 + WHERE (SELECT 1 FROM pg_class LIMIT 1) >= ref_0.device +); + + -- #4518 -- test error handling for queries with multiple distributed hypertables SET timescaledb.remote_data_fetcher = 'copy'; @@ -124,7 +172,21 @@ WHERE EXISTS ( LATERAL (select * from metrics as ref_2) as subq_3 WHERE (SELECT device_id FROM metrics_compressed limit 1 offset 3) >= ref_0.device -); +) +ORDER BY 1, 2; + +SET timescaledb.remote_data_fetcher = 'prepared'; +SELECT * FROM + conditions_dist1 ref_0 +WHERE EXISTS ( + SELECT FROM + distinct_on_distributed as ref_1, + LATERAL (select * from metrics as ref_2) as subq_3 + WHERE + (SELECT device_id FROM metrics_compressed limit 1 offset 3) >= ref_0.device +) +ORDER BY 1, 2; + SET timescaledb.remote_data_fetcher = 'auto'; SELECT * FROM conditions_dist1 ref_0 @@ -135,7 +197,7 @@ WHERE EXISTS ( WHERE (SELECT device_id FROM metrics_compressed limit 1 offset 3) >= ref_0.device ) -ORDER BY 1,2; +ORDER BY 1, 2; -- Check that we don't use COPY fetcher for parameterized plans. CREATE TABLE lookup (id SERIAL NOT NULL, key TEXT, val TEXT); @@ -174,10 +236,18 @@ WHERE AND m.ts BETWEEN '2021-08-17 00:00:00' AND '2021-08-17 01:00:00' ORDER BY 1 DESC LIMIT 1; --- Test copy fetcher when query is aborted before EOF due to LIMIT + +-- Test fetcher when query is aborted before EOF due to LIMIT SET timescaledb.remote_data_fetcher = 'copy'; SELECT * FROM metrics_dist ORDER BY time, device_id LIMIT 11; +SET timescaledb.remote_data_fetcher = 'prepared'; +SELECT * FROM metrics_dist ORDER BY time, device_id LIMIT 11; + +SET timescaledb.remote_data_fetcher = 'cursor'; +SELECT * FROM metrics_dist ORDER BY time, device_id LIMIT 11; + + -- Verify that cursor fetcher can be rewind before EOF due to an -- intermediate JOIN product reaching LIMIT SET timescaledb.remote_data_fetcher = 'cursor'; diff --git a/tsl/test/shared/sql/include/dist_remote_error_setup.sql b/tsl/test/shared/sql/include/dist_remote_error_setup.sql deleted file mode 100644 index f7a8bff8a..000000000 --- a/tsl/test/shared/sql/include/dist_remote_error_setup.sql +++ /dev/null @@ -1,79 +0,0 @@ --- This file and its contents are licensed under the Timescale License. --- Please see the included NOTICE for copyright information and --- LICENSE-TIMESCALE for a copy of the license. - --- bs is for "broken send", the type is int4 -create type bs; - -create or replace function bssend(bs) returns bytea - as :MODULE_PATHNAME, 'ts_debug_broken_int4send' - language c immutable strict parallel safe; - -create or replace function bsrecv(internal) returns bs as 'int4recv' language internal; - -create or replace function bsin(cstring) returns bs as 'int4in' language internal; - -create or replace function bsout(bs) returns cstring as 'int4out' language internal; - -create type bs(input = bsin, output = bsout, send = bssend, receive = bsrecv, - internallength = 4, passedbyvalue = true); - -create cast (int4 as bs) without function as implicit; - -create cast (bs as int4) without function as implicit; - --- same but for broken recv -create type br; - -create or replace function brsend(br) returns bytea as 'int4send' language internal; - -create or replace function brrecv(internal) returns br - as :MODULE_PATHNAME, 'ts_debug_broken_int4recv' - language c immutable strict parallel safe; - -create or replace function brin(cstring) returns br as 'int4in' language internal; - -create or replace function brout(br) returns cstring as 'int4out' language internal; - -create type br(input = brin, output = brout, send = brsend, receive = brrecv, - internallength = 4, passedbyvalue = true); - -create cast (int4 as br) without function as implicit; - -create cast (br as int4) without function as implicit; - --- recv that sleeps, optionally (want that only on one data node) -create type bl; - -create or replace function blsend(bl) returns bytea as 'int4send' language internal; - -\if :{?sleepy_recv} -create or replace function blrecv(internal) returns bl - as :MODULE_PATHNAME, 'ts_debug_sleepy_int4recv' - language c immutable strict parallel safe; -\else -create or replace function blrecv(internal) returns bl as 'int4recv' language internal; -\endif - -create or replace function blin(cstring) returns bl as 'int4in' language internal; - -create or replace function blout(bl) returns cstring as 'int4out' language internal; - -create type bl(input = blin, output = blout, send = blsend, receive = blrecv, - internallength = 4, passedbyvalue = true); - -create cast (int4 as bl) without function as implicit; - -create cast (bl as int4) without function as implicit; - --- Create a function that raises an error every nth row. --- It's stable, takes a second argument and returns current number of rows, --- so that it is shipped to data nodes and not optimized out. --- It's written in one line because I don't know how to make \set accept --- several lines. -CREATE OR REPLACE FUNCTION ts_debug_shippable_error_after_n_rows(integer, anyelement) - RETURNS integer AS :MODULE_PATHNAME LANGUAGE C STABLE STRICT; - --- Same as above, but fatal. -CREATE OR REPLACE FUNCTION ts_debug_shippable_fatal_after_n_rows(integer, anyelement) - RETURNS integer AS :MODULE_PATHNAME LANGUAGE C STABLE STRICT; diff --git a/tsl/test/sql/.gitignore b/tsl/test/sql/.gitignore index cbc60d698..01a18a175 100644 --- a/tsl/test/sql/.gitignore +++ b/tsl/test/sql/.gitignore @@ -12,6 +12,7 @@ /dist_hypertable-*.sql /dist_partial_agg-*.sql /dist_ref_table_join-*.sql +/dist_ref_table_join-*.sql /hypertable_distributed-*.sql /jit-*.sql /modify_exclusion-*.sql diff --git a/tsl/test/sql/CMakeLists.txt b/tsl/test/sql/CMakeLists.txt index 5894a042e..f123b060a 100644 --- a/tsl/test/sql/CMakeLists.txt +++ b/tsl/test/sql/CMakeLists.txt @@ -159,6 +159,7 @@ if(CMAKE_BUILD_TYPE MATCHES Debug) remote_copy.sql.in dist_grant.sql.in dist_ref_table_join.sql.in + dist_remote_error.sql.in dist_partial_agg.sql.in dist_query.sql.in cagg_invalidation_dist_ht.sql.in diff --git a/tsl/test/sql/data_fetcher.sql b/tsl/test/sql/data_fetcher.sql index dabae4a5d..b4e11250b 100644 --- a/tsl/test/sql/data_fetcher.sql +++ b/tsl/test/sql/data_fetcher.sql @@ -11,9 +11,8 @@ \set TEST_BASE_NAME data_fetcher SELECT format('include/%s_run.sql', :'TEST_BASE_NAME') as "TEST_QUERY_NAME", format('%s/results/%s_results_cursor.out', :'TEST_OUTPUT_DIR', :'TEST_BASE_NAME') as "TEST_RESULTS_CURSOR", - format('%s/results/%s_results_copy.out', :'TEST_OUTPUT_DIR', :'TEST_BASE_NAME') as "TEST_RESULTS_COPY" -\gset -SELECT format('\! diff %s %s', :'TEST_RESULTS_CURSOR', :'TEST_RESULTS_COPY') as "DIFF_CMD" + format('%s/results/%s_results_copy.out', :'TEST_OUTPUT_DIR', :'TEST_BASE_NAME') as "TEST_RESULTS_COPY", + format('%s/results/%s_results_prepared.out', :'TEST_OUTPUT_DIR', :'TEST_BASE_NAME') as "TEST_RESULTS_PREPARED" \gset SET ROLE :ROLE_CLUSTER_SUPERUSER; @@ -63,6 +62,18 @@ SET timescaledb.remote_data_fetcher = 'cursor'; \ir :TEST_QUERY_NAME \o -- compare results +SELECT format('\! diff %s %s', :'TEST_RESULTS_CURSOR', :'TEST_RESULTS_COPY') as "DIFF_CMD" +\gset +:DIFF_CMD + +-- run queries using prepares statement fetcher +SET timescaledb.remote_data_fetcher = 'prepared'; +\o :TEST_RESULTS_PREPARED +\ir :TEST_QUERY_NAME +\o +-- compare results +SELECT format('\! diff %s %s', :'TEST_RESULTS_CURSOR', :'TEST_RESULTS_PREPARED') as "DIFF_CMD" +\gset :DIFF_CMD -- Test custom FDW settings. Instead of the tests above, we are not interersted diff --git a/tsl/test/sql/dist_param.sql b/tsl/test/sql/dist_param.sql index 2fef38107..9240e8061 100644 --- a/tsl/test/sql/dist_param.sql +++ b/tsl/test/sql/dist_param.sql @@ -16,6 +16,7 @@ grant usage on foreign server data_node_1 to public; grant create on schema public to :ROLE_1; set role :ROLE_1; reset client_min_messages; +\set ON_ERROR_STOP 0 -- helper function: float -> pseudorandom float [0..1]. create or replace function mix(x float4) returns float4 as $$ select ((hashfloat4(x) / (pow(2., 31) - 1) + 1) / 2)::float4 $$ language sql; @@ -94,6 +95,87 @@ order by id reset timescaledb.enable_parameterized_data_node_scan; +-- All fetcher types with join +set timescaledb.remote_data_fetcher = 'copy'; + +select id, max(value), count(*) +from metric_dist +where id in (select id from metric_name where name like 'cpu%') + and ts between '2022-02-02 02:02:02+03' and '2022-03-03 02:02:02+03' +group by id +order by id +; + +set timescaledb.remote_data_fetcher = 'cursor'; + +select id, max(value), count(*) +from metric_dist +where id in (select id from metric_name where name like 'cpu%') + and ts between '2022-02-02 02:02:02+03' and '2022-03-03 02:02:02+03' +group by id +order by id +; + +set timescaledb.remote_data_fetcher = 'prepared'; + +select id, max(value), count(*) +from metric_dist +where id in (select id from metric_name where name like 'cpu%') + and ts between '2022-02-02 02:02:02+03' and '2022-03-03 02:02:02+03' +group by id +order by id +; + +-- All fetcher types with initplan +set timescaledb.remote_data_fetcher = 'copy'; + +select id, max(value), count(*) +from metric_dist +where id = any((select array_agg(id) from metric_name where name like 'cpu%')::int[]) + and ts between '2022-02-02 02:02:02+03' and '2022-03-03 02:02:02+03' +group by id +order by id +; + + +set timescaledb.remote_data_fetcher = 'cursor'; + +select id, max(value), count(*) +from metric_dist +where id = any((select array_agg(id) from metric_name where name like 'cpu%')::int[]) + and ts between '2022-02-02 02:02:02+03' and '2022-03-03 02:02:02+03' +group by id +order by id +; + + +set timescaledb.remote_data_fetcher = 'prepared'; + +select id, max(value), count(*) +from metric_dist +where id = any((select array_agg(id) from metric_name where name like 'cpu%')::int[]) + and ts between '2022-02-02 02:02:02+03' and '2022-03-03 02:02:02+03' +group by id +order by id +; + + +-- Should prefer prepared statement data fetcher for these queries. +set timescaledb.remote_data_fetcher = 'auto'; + +explain (analyze, verbose, costs off, timing off, summary off) +select id, max(value), count(*) +from metric_dist +where id in (select id from metric_name where name like 'cpu%') + and ts between '2022-02-02 02:02:02+03' and '2022-03-03 02:02:02+03' +group by id +order by id +; + +-- Should reset the prepared cache mode after using the prepared statement fetcher. +call distributed_exec('create or replace procedure assert_auto_plan_cache_mode() as $$ begin assert (select setting from pg_settings where name = ''plan_cache_mode'') = ''auto''; end; $$ language plpgsql;'); +call distributed_exec('call assert_auto_plan_cache_mode();'); + -- Shippable EC join select name, max(value), count(*) from metric_dist join metric_name using (id) @@ -269,6 +351,7 @@ where name like 'cpu%' and texteq(location, 'Yerevan') group by id ; + -- Multiple joins on different variables. Use a table instead of a CTE for saner -- stats. create table max_value_times as diff --git a/tsl/test/shared/sql/dist_remote_error.sql.in b/tsl/test/sql/dist_remote_error.sql.in similarity index 53% rename from tsl/test/shared/sql/dist_remote_error.sql.in rename to tsl/test/sql/dist_remote_error.sql.in index 5cf11c23f..c2c35f942 100644 --- a/tsl/test/shared/sql/dist_remote_error.sql.in +++ b/tsl/test/sql/dist_remote_error.sql.in @@ -2,21 +2,39 @@ -- Please see the included NOTICE for copyright information and -- LICENSE-TIMESCALE for a copy of the license. +--\set DATA_NODE_1 data_node_1 +--\set DATA_NODE_2 data_node_2 +--\set DATA_NODE_3 data_node_3 + +-- Set up the data nodes. +\set DATA_NODE_1 :TEST_DBNAME _1 +\set DATA_NODE_2 :TEST_DBNAME _2 +\set DATA_NODE_3 :TEST_DBNAME _3 + +\c :TEST_DBNAME :ROLE_SUPERUSER +SELECT node_name, database, node_created, database_created, extension_created +FROM ( + SELECT (add_data_node(name, host => 'localhost', DATABASE => name)).* + FROM (VALUES (:'DATA_NODE_1'), (:'DATA_NODE_2'), (:'DATA_NODE_3')) v(name) +) a; +GRANT USAGE ON FOREIGN SERVER :DATA_NODE_1, :DATA_NODE_2, :DATA_NODE_3 TO PUBLIC; +GRANT CREATE ON SCHEMA public TO :ROLE_1; + -- Import setup file to data nodes. \unset ECHO -\c data_node_1 :ROLE_SUPERUSER +\c :DATA_NODE_1 :ROLE_SUPERUSER set client_min_messages to error; \ir include/dist_remote_error_setup.sql -\c data_node_2 :ROLE_SUPERUSER +\c :DATA_NODE_2 :ROLE_SUPERUSER set client_min_messages to error; \ir include/dist_remote_error_setup.sql -\set sleepy_recv 1 -\c data_node_3 :ROLE_SUPERUSER +\set sleepy_sendrecv 1 +\c :DATA_NODE_3 :ROLE_SUPERUSER set client_min_messages to error; \ir include/dist_remote_error_setup.sql -\unset sleepy_recv +\unset sleepy_sendrecv \c :TEST_DBNAME :ROLE_SUPERUSER set client_min_messages to error; @@ -32,10 +50,17 @@ set client_min_messages to error; SET timescaledb.hide_data_node_name_in_errors = 'on'; -- A relatively big table on one data node -create table metrics_dist_remote_error(like metrics_dist); -select table_name from create_distributed_hypertable('metrics_dist_remote_error', 'time', 'device_id', - data_nodes => '{"data_node_1"}'); -insert into metrics_dist_remote_error select * from metrics_dist order by metrics_dist limit 20000; +CREATE TABLE metrics_dist_remote_error(filler_1 int, filler_2 int, filler_3 int, time timestamptz NOT NULL, device_id int, v0 int, v1 int, v2 float, v3 float); +SELECT create_distributed_hypertable('metrics_dist_remote_error','time','device_id',3, + data_nodes => ARRAY[:'DATA_NODE_1']); + +ALTER TABLE metrics_dist_remote_error DROP COLUMN filler_1; +INSERT INTO metrics_dist_remote_error(time,device_id,v0,v1,v2,v3) SELECT time, device_id, device_id+1, device_id + 2, device_id + 0.5, NULL FROM generate_series('2000-01-01 0:00:00+0'::timestamptz,'2000-01-05 23:55:00+0','6m') gtime(time), generate_series(1,5,1) gdevice(device_id); +ALTER TABLE metrics_dist_remote_error DROP COLUMN filler_2; +INSERT INTO metrics_dist_remote_error(time,device_id,v0,v1,v2,v3) SELECT time, device_id, device_id+1, device_id + 2, device_id + 0.5, NULL FROM generate_series('2000-01-06 0:00:00+0'::timestamptz,'2000-01-12 23:55:00+0','6m') gtime(time), generate_series(1,5,1) gdevice(device_id); +ALTER TABLE metrics_dist_remote_error DROP COLUMN filler_3; +INSERT INTO metrics_dist_remote_error(time,device_id,v0,v1,v2,v3) SELECT time, device_id, device_id+1, device_id + 2, device_id + 0.5, NULL FROM generate_series('2000-01-13 0:00:00+0'::timestamptz,'2000-01-19 23:55:00+0','6m') gtime(time), generate_series(1,5,1) gdevice(device_id); +ANALYZE metrics_dist_remote_error; -- The error messages vary wildly between the Postgres versions, dependent on -- the particular behavior of libqp in this or that case. The purpose of this @@ -94,9 +119,33 @@ select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_r explain (analyze, verbose, costs off, timing off, summary off) select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000000, device_id)::int != 0; +-- Now test the same with the prepared statement fetcher. +set timescaledb.remote_data_fetcher = 'prepared'; + +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(0, device_id)::int != 0; + +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(1, device_id)::int != 0; + +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(2, device_id)::int != 0; + +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(701, device_id)::int != 0; + +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000, device_id)::int != 0; + +explain (analyze, verbose, costs off, timing off, summary off) +select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000000, device_id)::int != 0; + +reset timescaledb.remote_data_fetcher; + + -- Table with broken send for a data type. -create table metrics_dist_bs(like metrics_dist); +create table metrics_dist_bs(like metrics_dist_remote_error); alter table metrics_dist_bs alter column v0 type bs; @@ -114,7 +163,7 @@ select * from metrics_dist_bs; drop table metrics_dist_bs; -- Table with broken receive for a data type. -create table metrics_dist_br(like metrics_dist); +create table metrics_dist_br(like metrics_dist_remote_error); alter table metrics_dist_br alter column v0 type br; @@ -142,17 +191,17 @@ insert into metrics_dist_br select * from metrics_dist_remote_error; \copy metrics_dist_br from 'dist_remote_error.text' with (format text); -- Fail at different points -set timescaledb.debug_broken_sendrecv_throw_after = 1; +set timescaledb.debug_broken_sendrecv_error_after = 1; \copy metrics_dist_br from 'dist_remote_error.text' with (format text); -set timescaledb.debug_broken_sendrecv_throw_after = 2; +set timescaledb.debug_broken_sendrecv_error_after = 2; \copy metrics_dist_br from 'dist_remote_error.text' with (format text); -set timescaledb.debug_broken_sendrecv_throw_after = 1023; +set timescaledb.debug_broken_sendrecv_error_after = 1023; \copy metrics_dist_br from 'dist_remote_error.text' with (format text); -set timescaledb.debug_broken_sendrecv_throw_after = 1024; +set timescaledb.debug_broken_sendrecv_error_after = 1024; \copy metrics_dist_br from 'dist_remote_error.text' with (format text); -set timescaledb.debug_broken_sendrecv_throw_after = 1025; +set timescaledb.debug_broken_sendrecv_error_after = 1025; \copy metrics_dist_br from 'dist_remote_error.text' with (format text); -reset timescaledb.debug_broken_sendrecv_throw_after; +reset timescaledb.debug_broken_sendrecv_error_after; -- Same with different replication factor @@ -165,15 +214,15 @@ where hypertable_name = 'metrics_dist_br'; \copy metrics_dist_br from 'dist_remote_error.text' with (format text); insert into metrics_dist_br select * from metrics_dist_remote_error; insert into metrics_dist_br select * from metrics_dist_remote_error; -set timescaledb.debug_broken_sendrecv_throw_after = 1; +set timescaledb.debug_broken_sendrecv_error_after = 1; \copy metrics_dist_br from 'dist_remote_error.text' with (format text); -set timescaledb.debug_broken_sendrecv_throw_after = 2; +set timescaledb.debug_broken_sendrecv_error_after = 2; \copy metrics_dist_br from 'dist_remote_error.text' with (format text); -set timescaledb.debug_broken_sendrecv_throw_after = 1023; +set timescaledb.debug_broken_sendrecv_error_after = 1023; \copy metrics_dist_br from 'dist_remote_error.text' with (format text); -set timescaledb.debug_broken_sendrecv_throw_after = 1024; +set timescaledb.debug_broken_sendrecv_error_after = 1024; \copy metrics_dist_br from 'dist_remote_error.text' with (format text); -set timescaledb.debug_broken_sendrecv_throw_after = 1025; +set timescaledb.debug_broken_sendrecv_error_after = 1025; \copy metrics_dist_br from 'dist_remote_error.text' with (format text); -- Should succeed with text format for data transfer. @@ -185,16 +234,16 @@ set timescaledb.enable_connection_binary_data = false; select count(*) from metrics_dist_br; set timescaledb.enable_connection_binary_data = true; -reset timescaledb.debug_broken_sendrecv_throw_after; +reset timescaledb.debug_broken_sendrecv_error_after; drop table metrics_dist_br; -- Table with sleepy receive for a data type, to improve coverage of the waiting -- code on the access node. -create table metrics_dist_bl(like metrics_dist); +create table metrics_dist_sr(like metrics_dist_remote_error); -alter table metrics_dist_bl alter column v0 type bl; +alter table metrics_dist_sr alter column v0 type sr; -select table_name from create_distributed_hypertable('metrics_dist_bl', +select table_name from create_distributed_hypertable('metrics_dist_sr', 'time', 'device_id'); -- We're using sleepy recv function, so need the binary transfer format for it @@ -202,12 +251,74 @@ select table_name from create_distributed_hypertable('metrics_dist_bl', set timescaledb.dist_copy_transfer_format = 'binary'; -- Test INSERT and COPY with slow data node. -\copy metrics_dist_bl from 'dist_remote_error.text' with (format text); +\copy metrics_dist_sr from 'dist_remote_error.text' with (format text); -insert into metrics_dist_bl select * from metrics_dist_remote_error; +insert into metrics_dist_sr select * from metrics_dist_remote_error; -select count(*) from metrics_dist_bl; +select count(*) from metrics_dist_sr; -drop table metrics_dist_bl; +drop table metrics_dist_sr; -drop table metrics_dist_remote_error; +-- Table with sleepy send for a data type, on one data node, to improve coverage +-- of waiting in data fetchers. +create table metrics_dist_ss(like metrics_dist_remote_error); + +alter table metrics_dist_ss alter column v0 type ss; + +select table_name from create_distributed_hypertable('metrics_dist_ss', + 'time', 'device_id'); + +-- Populate the table, using text COPY to avoid the sleepy stuff. +set timescaledb.dist_copy_transfer_format = 'text'; +\copy metrics_dist_ss from 'dist_remote_error.text' with (format text); + +-- We're using sleepy send function, so need the binary transfer format for it +-- to be called on the data nodes. +set timescaledb.enable_connection_binary_data = true; + +set timescaledb.remote_data_fetcher = 'prepared'; +explain (analyze, verbose, costs off, timing off, summary off) +select * from metrics_dist_ss; + +set timescaledb.remote_data_fetcher = 'copy'; +explain (analyze, verbose, costs off, timing off, summary off) +select * from metrics_dist_ss; + +set timescaledb.remote_data_fetcher = 'cursor'; +explain (analyze, verbose, costs off, timing off, summary off) +select * from metrics_dist_ss; + +-- Incorrect int output, to cover the error handling in tuplefactory. +create table metrics_dist_io(like metrics_dist_remote_error); + +alter table metrics_dist_io alter column v0 type io; + +select table_name from create_distributed_hypertable('metrics_dist_io', + 'time', 'device_id'); + +-- Populate the table, using binary COPY to avoid the broken in4out. +set timescaledb.enable_connection_binary_data = true; +set timescaledb.dist_copy_transfer_format = 'binary'; +\copy metrics_dist_io from 'dist_remote_error.text' with (format text); + +-- For testing, force the text format to exerices our broken out function. +set timescaledb.enable_connection_binary_data = false; +set timescaledb.dist_copy_transfer_format = 'text'; + +set timescaledb.remote_data_fetcher = 'prepared'; +explain (analyze, verbose, costs off, timing off, summary off) +select * from metrics_dist_io; + +set timescaledb.remote_data_fetcher = 'copy'; +explain (analyze, verbose, costs off, timing off, summary off) +select * from metrics_dist_io; + +set timescaledb.remote_data_fetcher = 'cursor'; +explain (analyze, verbose, costs off, timing off, summary off) +select * from metrics_dist_io; + +-- cleanup +\c :TEST_DBNAME :ROLE_SUPERUSER; +DROP DATABASE :DATA_NODE_1; +DROP DATABASE :DATA_NODE_2; +DROP DATABASE :DATA_NODE_3; diff --git a/tsl/test/sql/include/dist_remote_error_setup.sql b/tsl/test/sql/include/dist_remote_error_setup.sql new file mode 100644 index 000000000..ac51d8d28 --- /dev/null +++ b/tsl/test/sql/include/dist_remote_error_setup.sql @@ -0,0 +1,122 @@ +-- This file and its contents are licensed under the Timescale License. +-- Please see the included NOTICE for copyright information and +-- LICENSE-TIMESCALE for a copy of the license. + +-- bs is for "broken send", the type is int4 +create type bs; + +create or replace function bssend(bs) returns bytea + as :MODULE_PATHNAME, 'ts_debug_broken_int4send' + language c immutable strict parallel safe; + +create or replace function bsrecv(internal) returns bs as 'int4recv' language internal; + +create or replace function bsin(cstring) returns bs as 'int4in' language internal; + +create or replace function bsout(bs) returns cstring as 'int4out' language internal; + +create type bs(input = bsin, output = bsout, send = bssend, receive = bsrecv, + internallength = 4, passedbyvalue = true); + +create cast (int4 as bs) without function as implicit; + +create cast (bs as int4) without function as implicit; + +-- same but for broken recv +create type br; + +create or replace function brsend(br) returns bytea as 'int4send' language internal; + +create or replace function brrecv(internal) returns br + as :MODULE_PATHNAME, 'ts_debug_broken_int4recv' + language c immutable strict parallel safe; + +create or replace function brin(cstring) returns br as 'int4in' language internal; + +create or replace function brout(br) returns cstring as 'int4out' language internal; + +create type br(input = brin, output = brout, send = brsend, receive = brrecv, + internallength = 4, passedbyvalue = true); + +create cast (int4 as br) without function as implicit; + +create cast (br as int4) without function as implicit; + +-- recv that sleeps, optionally (want that only on one data node) +create type sr; + +create or replace function srsend(sr) returns bytea as 'int4send' language internal; + +\if :{?sleepy_sendrecv} +create or replace function srrecv(internal) returns sr + as :MODULE_PATHNAME, 'ts_debug_sleepy_int4recv' + language c immutable strict parallel safe; +\else +create or replace function srrecv(internal) returns sr as 'int4recv' language internal; +\endif + +create or replace function srin(cstring) returns sr as 'int4in' language internal; + +create or replace function srout(sr) returns cstring as 'int4out' language internal; + +create type sr(input = srin, output = srout, send = srsend, receive = srrecv, + internallength = 4, passedbyvalue = true); + +create cast (int4 as sr) without function as implicit; + +create cast (sr as int4) without function as implicit; + +-- send that sleeps, optionally (want that only on one data node) +create type ss; + +create or replace function ssrecv(internal) returns ss as 'int4recv' language internal; + +\if :{?sleepy_sendrecv} +create or replace function sssend(ss) returns bytea + as :MODULE_PATHNAME, 'ts_debug_sleepy_int4send' + language c immutable strict parallel safe; +\else +create or replace function sssend(ss) returns bytea as 'int4send' language internal; +\endif + +create or replace function ssin(cstring) returns ss as 'int4in' language internal; + +create or replace function ssout(ss) returns cstring as 'int4out' language internal; + +create type ss(input = ssin, output = ssout, send = sssend, receive = ssrecv, + internallength = 4, passedbyvalue = true); + +create cast (int4 as ss) without function as implicit; + +create cast (ss as int4) without function as implicit; + +-- int4out that sometimes outputs not an int (name is abbreviation of Incorrect Out) +create type io; + +create or replace function iorecv(internal) returns io as 'int4recv' language internal; + +create or replace function iosend(io) returns bytea as 'int4send' language internal; + +create or replace function ioin(cstring) returns io as 'int4in' language internal; + +create or replace function ioout(io) returns cstring + as :MODULE_PATHNAME, 'ts_debug_incorrect_int4out' + language c immutable strict parallel safe; + +create type io(input = ioin, output = ioout, send = iosend, receive = iorecv, + internallength = 4, passedbyvalue = true); + +create cast (int4 as io) without function as implicit; + +create cast (io as int4) without function as implicit; + + +-- Create a function that raises an error every nth row. +-- It's stable, takes a second argument and returns current number of rows, +-- so that it is shipped to data nodes and not optimized out. +create or replace function ts_debug_shippable_error_after_n_rows(integer, anyelement) + returns integer as :MODULE_PATHNAME language C stable strict; + +-- Same as above, but fatal. +create or replace function ts_debug_shippable_fatal_after_n_rows(integer, anyelement) + returns integer as :MODULE_PATHNAME language C stable strict;