Add health check function

A new health check function _timescaledb_internal.health() returns the
health and status of the database instance, including any configured
data nodes (in case the instance is an access node).

Since the function returns also the health of the data nodes, it tries
hard to avoid throwing errors. An error will fail the whole function
and therefore not return any node statuses, although some of the nodes
might be healthy.

The health check on the data nodes is a recursive (remote) call to the
same function on those nodes. Unfortunately, the check will fail with
an error if a connection cannot be established to a node (or an error
occurs on the connection), which means the whole function call will
fail. This will be addressed in a future change by returning the error
in the function result instead.
This commit is contained in:
Erik Nordström 2022-09-01 17:26:41 +02:00 committed by Erik Nordström
parent 23c01c44e5
commit 4b05402580
13 changed files with 409 additions and 2 deletions

View File

@ -50,3 +50,7 @@ BEGIN
RAISE 'subscription sync wait timedout';
END
$BODY$ SET search_path TO pg_catalog, pg_temp;
CREATE OR REPLACE FUNCTION _timescaledb_internal.health() RETURNS
TABLE (node_name NAME, healthy BOOL, in_recovery BOOL, error TEXT)
AS '@MODULE_PATHNAME@', 'ts_health_check' LANGUAGE C VOLATILE;

View File

@ -224,3 +224,5 @@ ALTER TABLE _timescaledb_internal.bgw_policy_chunk_stats
ADD CONSTRAINT bgw_policy_chunk_stats_job_id_fkey
FOREIGN KEY(job_id) REFERENCES _timescaledb_config.bgw_job(id)
ON DELETE CASCADE;
DROP FUNCTION _timescaledb_internal.health;

View File

@ -123,6 +123,7 @@ CROSSMODULE_WRAPPER(dist_remote_hypertable_index_info);
CROSSMODULE_WRAPPER(distributed_exec);
CROSSMODULE_WRAPPER(create_distributed_restore_point);
CROSSMODULE_WRAPPER(hypertable_distributed_set_replication_factor);
CROSSMODULE_WRAPPER(health_check);
TS_FUNCTION_INFO_V1(ts_dist_set_id);
Datum
@ -540,6 +541,7 @@ TSDLLEXPORT CrossModuleFunctions ts_cm_functions_default = {
.chunk_create_replica_table = error_no_default_fn_pg_community,
.hypertable_distributed_set_replication_factor = error_no_default_fn_pg_community,
.update_compressed_chunk_relstats = update_compressed_chunk_relstats_default,
.health_check = error_no_default_fn_pg_community,
};
TSDLLEXPORT CrossModuleFunctions *ts_cm_functions = &ts_cm_functions_default;

View File

@ -202,6 +202,7 @@ typedef struct CrossModuleFunctions
TupleTableSlot *(*compress_row_exec)(CompressSingleRowState *cr, TupleTableSlot *slot);
void (*compress_row_end)(CompressSingleRowState *cr);
void (*compress_row_destroy)(CompressSingleRowState *cr);
PGFunction health_check;
} CrossModuleFunctions;
extern TSDLLEXPORT CrossModuleFunctions *ts_cm_functions;

View File

@ -50,6 +50,7 @@
#include "remote/dist_commands.h"
#include "remote/dist_copy.h"
#include "remote/dist_txn.h"
#include "remote/healthcheck.h"
#include "remote/txn_id.h"
#include "remote/txn_resolve.h"
#include "reorder.h"
@ -230,6 +231,7 @@ CrossModuleFunctions tsl_cm_functions = {
.hypertable_distributed_set_replication_factor = hypertable_set_replication_factor,
.cache_syscache_invalidate = cache_syscache_invalidate,
.update_compressed_chunk_relstats = update_compressed_chunk_relstats,
.health_check = ts_dist_health_check,
};
static void

View File

@ -10,6 +10,7 @@ set(SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/dist_copy.c
${CMAKE_CURRENT_SOURCE_DIR}/dist_ddl.c
${CMAKE_CURRENT_SOURCE_DIR}/copy_fetcher.c
${CMAKE_CURRENT_SOURCE_DIR}/healthcheck.c
${CMAKE_CURRENT_SOURCE_DIR}/stmt_params.c
${CMAKE_CURRENT_SOURCE_DIR}/tuplefactory.c
${CMAKE_CURRENT_SOURCE_DIR}/txn.c

View File

@ -0,0 +1,313 @@
/*
* This file and its contents are licensed under the Timescale License.
* Please see the included NOTICE for copyright information and
* LICENSE-TIMESCALE for a copy of the license.
*/
#include <postgres.h>
#include <funcapi.h>
#include <utils/builtins.h>
#include <utils/lsyscache.h>
#include <utils/syscache.h>
#include "healthcheck.h"
#include "data_node.h"
#include "dist_commands.h"
#include "dist_util.h"
/*
* Functions and data structures for printing a health check result.
*/
enum Anum_show_conn
{
Anum_health_node_name = 1,
Anum_health_healthy,
Anum_health_in_recovery,
Anum_health_error,
_Anum_health_max,
};
#define Natts_health (_Anum_health_max - 1)
/*
* Health check result.
*
* Produces a tuple with four columns:
*
* 1. Node name (NULL for the access node itself)
*
* 2. A "healthy" boolean. An access node returns "false" in case a data node
* is not responding.
*
* 3. A boolean telling if the node is in recovery. Note that the "healthy"
* status will be "true" while the node is in recovery.
*
* 4. An optional error message explaining the reason for "healthy" being
* false.
*/
static HeapTuple
create_health_check_tuple(const char *data_node, bool in_recovery, TupleDesc tupdesc)
{
Datum values[Natts_health];
bool nulls[Natts_health];
memset(nulls, true, sizeof(nulls));
if (NULL != data_node)
{
NameData *node_name = palloc(sizeof(NameData));
namestrcpy(node_name, data_node);
values[AttrNumberGetAttrOffset(Anum_health_node_name)] = NameGetDatum(node_name);
nulls[AttrNumberGetAttrOffset(Anum_health_node_name)] = false;
}
values[AttrNumberGetAttrOffset(Anum_health_healthy)] = BoolGetDatum(in_recovery ? false : true);
nulls[AttrNumberGetAttrOffset(Anum_health_healthy)] = false;
values[AttrNumberGetAttrOffset(Anum_health_in_recovery)] = BoolGetDatum(in_recovery);
nulls[AttrNumberGetAttrOffset(Anum_health_in_recovery)] = false;
return heap_form_tuple(tupdesc, values, nulls);
}
static IOFuncSelector
get_io_func_selector_from_format(int format)
{
switch (format)
{
case 0: /* text format */
return IOFunc_input;
case 1: /* binary format */
return IOFunc_receive;
default:
/* reserved for future, so likely won't happen */
ereport(ERROR,
(errcode(ERRCODE_INTERNAL_ERROR),
errmsg("unexpected format of data node response")));
}
pg_unreachable();
/* Silence Windows compiler */
return IOFunc_input;
}
static void
fill_in_result_error(Datum values[Natts_health], bool nulls[Natts_health], const char *errormsg)
{
values[AttrNumberGetAttrOffset(Anum_health_error)] = CStringGetTextDatum(errormsg);
nulls[AttrNumberGetAttrOffset(Anum_health_error)] = false;
}
Datum
ts_dist_health_check(PG_FUNCTION_ARGS)
{
DistCmdResult *result;
FuncCallContext *funcctx;
HeapTuple tuple = NULL;
if (SRF_IS_FIRSTCALL())
{
MemoryContext oldcontext;
TupleDesc tupdesc;
funcctx = SRF_FIRSTCALL_INIT();
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("function returning record called in context "
"that cannot accept type record")));
funcctx->tuple_desc = BlessTupleDesc(tupdesc);
if (dist_util_membership() == DIST_MEMBER_ACCESS_NODE)
{
List *data_node_list;
StringInfo cmd = makeStringInfo();
Oid fnamespaceid = get_func_namespace(fcinfo->flinfo->fn_oid);
appendStringInfo(cmd,
"SELECT * FROM %s.%s()",
get_namespace_name(fnamespaceid),
get_func_name(fcinfo->flinfo->fn_oid));
data_node_list = data_node_get_node_name_list();
result = ts_dist_cmd_invoke_on_data_nodes_using_search_path(cmd->data,
NULL,
data_node_list,
true);
funcctx->user_fctx = result;
list_free(data_node_list);
}
MemoryContextSwitchTo(oldcontext);
}
funcctx = SRF_PERCALL_SETUP();
switch (dist_util_membership())
{
case DIST_MEMBER_ACCESS_NODE:
result = funcctx->user_fctx;
unsigned int call_cnt = funcctx->call_cntr;
if (call_cnt == 0)
{
/* On call count 0, produce a tuple for health of AN itself */
tuple = create_health_check_tuple(NULL, RecoveryInProgress(), funcctx->tuple_desc);
}
else if (result == NULL)
{
/* No result from data nodes, so nothing to do. This probably
* cannot happen in practice. */
SRF_RETURN_DONE(funcctx);
}
else if (call_cnt > ts_dist_cmd_response_count(result))
{
/* No more responses so no more tuples to produce */
ts_dist_cmd_close_response(result);
funcctx->user_fctx = NULL;
SRF_RETURN_DONE(funcctx);
}
else
{
/*
* Produce a tuple from a data node's response.
*
* TODO: Currently, the remote commands to data nodes will
* either succeed or throw an error if one of the data nodes
* cannot be contacted. Therefore, the access node will never
* produce a result showing a data node as unhealthy (unless a
* node is in recovery). This needs to be changed so that
* connection issues (or errors from data node) won't result
* in throwing an error here. Instead, the result should be
* "unhealthy" with the appropriate error string if available.
*/
const char *node_name = "";
NameData data_node_name;
PGresult *pgres = ts_dist_cmd_get_result_by_index(result, call_cnt - 1, &node_name);
TupleDesc return_tupdesc = funcctx->tuple_desc;
Datum values[Natts_health];
bool nulls[Natts_health];
memset(nulls, true, sizeof(nulls));
namestrcpy(&data_node_name, node_name);
nulls[AttrNumberGetAttrOffset(Anum_health_node_name)] = false;
values[AttrNumberGetAttrOffset(Anum_health_node_name)] =
NameGetDatum(&data_node_name);
if (PQresultStatus(pgres) != PGRES_TUPLES_OK)
{
values[AttrNumberGetAttrOffset(Anum_health_error)] =
CStringGetTextDatum(PQresultErrorMessage(pgres));
nulls[AttrNumberGetAttrOffset(Anum_health_error)] = false;
}
else if (PQnfields(pgres) != funcctx->tuple_desc->natts)
{
StringInfo error = makeStringInfo();
appendStringInfo(error,
"unexpected number of fields in data node response (%d vs %d) "
"%s",
PQnfields(pgres),
funcctx->tuple_desc->natts,
PQgetvalue(pgres, 0, 0));
fill_in_result_error(values, nulls, error->data);
}
else if (PQntuples(pgres) != 1)
{
StringInfo error = makeStringInfo();
appendStringInfo(error,
"unexpected number of rows in data node response (%d vs %d)",
PQntuples(pgres),
1);
fill_in_result_error(values, nulls, error->data);
}
else
{
int i;
for (i = 0; i < return_tupdesc->natts; i++)
{
/* Data nodes don't return their own name, and it was already filled in */
if (i == AttrNumberGetAttrOffset(Anum_health_node_name))
{
Assert(PQgetisnull(pgres, 0 /* row */, i /* column */) == 1);
}
else if (PQgetisnull(pgres, 0 /* row */, i /* column */) == 1)
{
nulls[i] = true;
}
else
{
Oid typid = PQftype(pgres, i);
int format = PQfformat(pgres, i);
IOFuncSelector iofuncsel = get_io_func_selector_from_format(format);
int16 typlen;
bool typbyval;
char typalign;
char typdelim;
Oid typioparam;
Oid typfuncid;
if (typid != return_tupdesc->attrs[i].atttypid)
{
StringInfo error = makeStringInfo();
appendStringInfo(error,
"unexpected field type in data node response %u "
"vs %u",
typid,
return_tupdesc->attrs[i].attrelid);
fill_in_result_error(values, nulls, error->data);
break;
}
get_type_io_data(typid,
iofuncsel,
&typlen,
&typbyval,
&typalign,
&typdelim,
&typioparam,
&typfuncid);
if (iofuncsel == IOFunc_receive)
{
StringInfo data = makeStringInfo();
appendBinaryStringInfo(data,
PQgetvalue(pgres, 0, i),
PQgetlength(pgres, 0, i));
values[i] = OidReceiveFunctionCall(typfuncid,
data,
typioparam,
PQfmod(pgres, i));
}
else
{
Assert(iofuncsel == IOFunc_input);
values[i] = OidInputFunctionCall(typfuncid,
PQgetvalue(pgres, 0, i),
typioparam,
PQfmod(pgres, i));
}
nulls[i] = false;
}
}
}
tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
}
break;
case DIST_MEMBER_DATA_NODE:
case DIST_MEMBER_NONE:
if (funcctx->call_cntr > 0)
SRF_RETURN_DONE(funcctx);
tuple = create_health_check_tuple(NULL, RecoveryInProgress(), funcctx->tuple_desc);
break;
}
SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
}

View File

@ -0,0 +1,13 @@
/*
* This file and its contents are licensed under the Timescale License.
* Please see the included NOTICE for copyright information and
* LICENSE-TIMESCALE for a copy of the license.
*/
#ifndef TIMESCALEDB_TSL_REMOTE_HEALTHCHECK_H
#include <postgres.h>
#include <fmgr.h>
extern Datum ts_dist_health_check(PG_FUNCTION_ARGS);
#endif /* TIMESCALEDB_TSL_REMOTE_HEALTHCHECK_H */

View File

@ -330,6 +330,16 @@ CALL distributed_exec('SELECT 1');
ERROR: function must be run on the access node only
\set ON_ERROR_STOP 1
\c :TEST_DBNAME :ROLE_SUPERUSER
-- Test health check function output on access node
SELECT * FROM _timescaledb_internal.health() ORDER BY 1 NULLS FIRST;
node_name | healthy | in_recovery | error
--------------------+---------+-------------+-------
| t | f |
db_dist_commands_1 | t | f |
db_dist_commands_2 | t | f |
db_dist_commands_3 | t | f |
(4 rows)
SELECT * FROM delete_data_node(:'DATA_NODE_1');
delete_data_node
------------------
@ -348,6 +358,13 @@ SELECT * FROM delete_data_node(:'DATA_NODE_3');
t
(1 row)
-- Test health check when no longer an access node (no data nodes)
SELECT * FROM _timescaledb_internal.health() ORDER BY 1 NULLS FIRST;
node_name | healthy | in_recovery | error
-----------+---------+-------------+-------
| t | f |
(1 row)
DROP DATABASE :DATA_NODE_1;
DROP DATABASE :DATA_NODE_2;
DROP DATABASE :DATA_NODE_3;

View File

@ -81,6 +81,7 @@ ORDER BY pronamespace::regnamespace::text COLLATE "C", p.oid::regprocedure::text
_timescaledb_internal.get_partition_for_key(anyelement)
_timescaledb_internal.get_partition_hash(anyelement)
_timescaledb_internal.get_time_type(integer)
_timescaledb_internal.health()
_timescaledb_internal.hist_combinefunc(internal,internal)
_timescaledb_internal.hist_deserializefunc(bytea,internal)
_timescaledb_internal.hist_finalfunc(internal,double precision,double precision,double precision,integer)

View File

@ -172,9 +172,17 @@ CALL distributed_exec('SELECT 1');
\set ON_ERROR_STOP 1
\c :TEST_DBNAME :ROLE_SUPERUSER
-- Test health check function output on access node
SELECT * FROM _timescaledb_internal.health() ORDER BY 1 NULLS FIRST;
SELECT * FROM delete_data_node(:'DATA_NODE_1');
SELECT * FROM delete_data_node(:'DATA_NODE_2');
SELECT * FROM delete_data_node(:'DATA_NODE_3');
-- Test health check when no longer an access node (no data nodes)
SELECT * FROM _timescaledb_internal.health() ORDER BY 1 NULLS FIRST;
DROP DATABASE :DATA_NODE_1;
DROP DATABASE :DATA_NODE_2;
DROP DATABASE :DATA_NODE_3;

View File

@ -0,0 +1,42 @@
# This file and its contents are licensed under the Timescale License.
# Please see the included NOTICE for copyright information and
# LICENSE-TIMESCALE for a copy of the license.
use strict;
use warnings;
use AccessNode;
use DataNode;
use TestLib;
use Test::More tests => 4;
#Initialize all the multi-node instances
my $an = AccessNode->create('an');
my $dn1 = DataNode->create('dn1');
my $dn2 = DataNode->create('dn2');
$an->add_data_node($dn1);
$an->add_data_node($dn2);
$an->psql_is(
'postgres',
'SELECT * FROM _timescaledb_internal.health() ORDER BY 1 NULLS FIRST',
q[|t|f|
dn1|t|f|
dn2|t|f|], 'Health check shows healthy AN and two healthy DNs');
# Stop a data node to simulate failure
$dn1->stop('fast');
# Health check will currently fail with an error when a data node
# cannot be contacted. This should be fixed so that the health check
# instead returns a negative status for the node.
my ($ret, $stdout, $stderr) = $an->psql('postgres',
'SELECT * FROM _timescaledb_internal.health() ORDER BY 1 NULLS FIRST;');
# psql return error code 3 in case of failure in script
is($ret, qq(3), "expect error code 3 due to failed data node");
done_testing();
1;

View File

@ -1,6 +1,7 @@
set(PROVE_TEST_FILES 001_simple_multinode.pl 003_connections_privs.pl)
set(PROVE_DEBUG_TEST_FILES 002_chunk_copy_move.pl 004_multinode_rdwr_1pc.pl
005_add_data_node.pl 006_job_crash_log.pl)
set(PROVE_DEBUG_TEST_FILES
002_chunk_copy_move.pl 004_multinode_rdwr_1pc.pl 005_add_data_node.pl
006_job_crash_log.pl 007_healthcheck.pl)
if(CMAKE_BUILD_TYPE MATCHES Debug)
list(APPEND PROVE_TEST_FILES ${PROVE_DEBUG_TEST_FILES})