Enable 1PC and DN reads for read only AN txns

In high availability setups, a streaming replica of the access node can
be configured. Since it's a hot standby users should be able to run
read-only queries from this replica and should be able to query the
distributed hypertables from it.

Additionally, the internal code which sets up a connection from the
standby access node to the data nodes now marks the ongoing transaction
explicitly as READ ONLY ones. This ensures that any functions or any
activity which tries to make modifications on the data nodes errors
out. Note that even if "timescaledb.enable_2pc" is enabled, we will
still use 1PC for these READ ONLY transactions.

While we are doing this, we also handle the case when a user explicitly
marks a transaction as "READ ONLY" on the primary AN. Even in that
case the connection to the DNs will be marked read only. Additionally,
similar to the above standby case, even if "timescaledb.enable_2pc" is
enabled, we will still use 1PC for these READ ONLY transactions on the
primary AN.

In normal Postgres standby, the ongoing transaction becomes READ WRITE
immediately on promotion whereas in our case the ongoing transaction
will remain READ ONLY till completion. New transactions on the same
session will be READ WRITE as expected.
This commit is contained in:
Nikhil 2021-05-24 14:48:58 +05:30
parent 449019f3c1
commit 1aabbc83f8
6 changed files with 256 additions and 10 deletions

View File

@ -2,12 +2,14 @@
# "local" is for Unix domain socket connections only
local all all trust
local replication all trust
# IPv4 local connections:
hostssl all @TEST_ROLE_CLUSTER_SUPERUSER@ 127.0.0.1/32 cert clientcert=verify-full
hostssl all @TEST_ROLE_1@ 127.0.0.1/32 cert clientcert=verify-full
host all @TEST_ROLE_2@ 127.0.0.1/32 password
host all @TEST_ROLE_3@ 127.0.0.1/32 password
host all all 127.0.0.1/32 trust
host replication all 127.0.0.1/32 trust
# IPv6 local connections:
hostssl all @TEST_ROLE_CLUSTER_SUPERUSER@ ::1/128 cert clientcert=verify-full
hostssl all @TEST_ROLE_1@ ::1/128 cert clientcert=verify-full

View File

@ -125,11 +125,11 @@ create_distributed_restore_point(PG_FUNCTION_ARGS)
* inconsistent state when the distributed database is restored from a backup
* using the restore point.
*
* To do that we take an exclusive lock on the remote transaction
* To do that we take an access exclusive lock on the remote transaction
* table, which will force any concurrent transaction
* wait during their PREPARE phase.
*/
LockRelationOid(ts_catalog_get()->tables[REMOTE_TXN].id, ExclusiveLock);
LockRelationOid(ts_catalog_get()->tables[REMOTE_TXN].id, AccessExclusiveLock);
/* Prevent situation when new data node added during the execution */
LockRelationOid(ForeignServerRelationId, ExclusiveLock);

View File

@ -5,15 +5,17 @@
*/
#include <postgres.h>
#include <access/htup_details.h>
#include <access/xact.h>
#include <access/xlog.h>
#include <storage/lmgr.h>
#include <utils/hsearch.h>
#include <utils/builtins.h>
#include <utils/memutils.h>
#include <utils/syscache.h>
#include "dist_txn.h"
#include "catalog.h"
#include "connection.h"
#include "async.h"
#include "errors.h"
#include "txn.h"
#include "txn_store.h"
#include "guc.h"
@ -126,10 +128,21 @@ static void
dist_txn_xact_callback_1pc_pre_commit()
{
RemoteTxn *remote_txn;
Catalog *catalog = ts_catalog_get();
AsyncRequestSet *ars = async_request_set_create();
eventcallback(DTXN_EVENT_PRE_COMMIT);
/*
* In 1PC, we don't need to add entries to the remote_txn table. However
* we do need to take a SHARE lock on it to interlock with any distributed
* restore point activity that might be happening in parallel.
*
* The catalog table lock is kept until the transaction completes in order to
* synchronize with distributed restore point creation
*/
LockRelationOid(catalog->tables[REMOTE_TXN].id, AccessShareLock);
/* send a commit to all connections */
remote_txn_store_foreach(store, remote_txn)
{
@ -474,11 +487,39 @@ dist_txn_xact_callback_2pc(XactEvent event, void *arg)
static void
dist_txn_xact_callback(XactEvent event, void *arg)
{
bool use_2pc;
char *xactReadOnly;
/* Quick exit if no connections were touched in this transaction. */
if (store == NULL)
return;
if (ts_guc_enable_2pc)
/*
* Windows MSVC builds have linking issues for GUC variables from postgres for
* use inside this extension. So we use GetConfigOptionByName
*/
xactReadOnly = GetConfigOptionByName("transaction_read_only", NULL, false);
/*
* The decision to use 2PC rests on multiple factors:
*
* 1) if ts_guc_enable_2pc is enabled and it's a regular backend use it
*
* 2) if ts_guc_enable_2pc is enabled but we are running a read only txn, don't use it
*
* We might be tempted to use 1PC if just one DN is involved in the transaction.
* However, it's possible that a transaction which involves data on AN and the one DN could get
* a failure at the end of the COMMIT processing on the AN due to issues in local AN data. In
* such a case since we send a COMMIT at "XACT_EVENT_PRE_COMMIT" event time to the DN, we might
* end up with a COMMITTED DN but an aborted AN! Hence this optimization is not possible to
* guarantee transactional semantics.
*/
use_2pc = (ts_guc_enable_2pc && strncmp(xactReadOnly, "on", sizeof("on")) != 0);
#ifdef TS_DEBUG
ereport(DEBUG3, (errmsg("use 2PC: %s", use_2pc ? "true" : "false")));
#endif
if (use_2pc)
dist_txn_xact_callback_2pc(event, arg);
else
dist_txn_xact_callback_1pc(event, arg);

View File

@ -83,20 +83,51 @@ remote_txn_begin(RemoteTxn *entry, int curlevel)
/* Start main transaction if we haven't yet */
if (xact_depth == 0)
{
const char *sql;
StringInfoData sql;
char *xactReadOnly;
Assert(remote_connection_get_status(entry->conn) == CONN_IDLE);
elog(DEBUG3, "starting remote transaction on connection %p", entry->conn);
initStringInfo(&sql);
appendStringInfo(&sql, "%s", "START TRANSACTION ISOLATION LEVEL");
if (IsolationIsSerializable())
sql = "START TRANSACTION ISOLATION LEVEL SERIALIZABLE";
appendStringInfo(&sql, "%s", " SERIALIZABLE");
else
sql = "START TRANSACTION ISOLATION LEVEL REPEATABLE READ";
appendStringInfo(&sql, "%s", " REPEATABLE READ");
/*
* Windows MSVC builds have linking issues for GUC variables from postgres for
* use inside this extension. So we use GetConfigOptionByName
*/
xactReadOnly = GetConfigOptionByName("transaction_read_only", NULL, false);
/*
* If we are initiating connection from a standby (of an AN for example),
* then the remote connection transaction needs to be also set up as a
* READ ONLY one. This will catch any commands that are sent from the
* read only AN to datanodes but which could have potential read-write
* side effects on data nodes.
*
* Note that when the STANDBY gets promoted then the ongoing transaction
* will remain READ ONLY till its completion. New transactions will be
* suitably READ WRITE. This is a slight change in behavior as compared to
* regular Postgres, but promotion is not a routine activity, so it should
* be acceptable and typically users would be reconnecting to the new
* promoted AN anyways.
*
* Note that the below will also handle the case when primary AN has a
* transaction which does an explicit "BEGIN TRANSACTION READ ONLY;". The
* treatment is the same, mark the remote DN transaction as READ ONLY
*/
if (strncmp(xactReadOnly, "on", sizeof("on")) == 0)
appendStringInfo(&sql, "%s", " READ ONLY");
remote_connection_xact_transition_begin(entry->conn);
remote_connection_cmd_ok(entry->conn, sql);
remote_connection_cmd_ok(entry->conn, sql.data);
remote_connection_xact_transition_end(entry->conn);
xact_depth = remote_connection_xact_depth_inc(entry->conn);
pfree(sql.data);
}
/* If the connection is in COPY mode, then exit out of it */
else if (remote_connection_get_status(entry->conn) == CONN_COPY_IN)

View File

@ -0,0 +1,172 @@
# This file and its contents are licensed under the Timescale License.
# Please see the included NOTICE for copyright information and
# LICENSE-TIMESCALE for a copy of the license.
# test a multi node cluster with read only queries from access node
# primary and standby nodes
use strict;
use warnings;
use AccessNode;
use DataNode;
use TestLib;
use Test::More tests => 15;
#Initialize all the multi-node instances
my $an = AccessNode->get_new_node('an');
$an->init(
allows_streaming => 1,
auth_extra => [ '--create-role', 'repl_role' ]);
$an->start;
$an->safe_psql('postgres', 'CREATE EXTENSION timescaledb');
my $backup_name = 'my_backup';
# Take backup
$an->backup($backup_name);
# Create streaming standby linking to master
my $an_standby = AccessNode->get_new_node('an_standby_1');
$an_standby->init_from_backup($an, $backup_name, has_streaming => 1);
$an_standby->start;
#Initialize and set up data nodes now
my $dn1 = DataNode->create('dn1');
my $dn2 = DataNode->create('dn2');
$an->add_data_node($dn1);
$an->add_data_node($dn2);
#Create a distributed hypertable and insert a few rows
$an->safe_psql(
'postgres',
qq[
CREATE TABLE test(time timestamp NOT NULL, device int, temp float);
SELECT create_distributed_hypertable('test', 'time', 'device', 3);
INSERT INTO test SELECT t, (abs(timestamp_hash(t::timestamp)) % 10) + 1, 0.10 FROM generate_series('2018-03-02 1:00'::TIMESTAMPTZ, '2018-03-08 1:00', '1 hour') t;
]);
my $query = qq[
CREATE OR REPLACE FUNCTION read_write_function()
RETURNS VOID
LANGUAGE plpgsql AS
\$func\$
BEGIN
CREATE TABLE t_rd_wr(
id serial PRIMARY KEY,
customerid int,
daterecorded date,
value double precision
);
END
\$func\$;
];
# Create a function which does READ WRITE activity on the datanode
$an->safe_psql('postgres', "$query; CALL distributed_exec('$query');");
#Allow standby to catch up with the primary
$an->wait_for_catchup($an_standby, 'replay');
#Check that chunks are shown appropriately from the AN standby node
$query = q[SELECT * from show_chunks('test');];
#Query Access Standby node
$an_standby->psql_is(
'postgres', $query, q[_timescaledb_internal._dist_hyper_1_1_chunk
_timescaledb_internal._dist_hyper_1_2_chunk
_timescaledb_internal._dist_hyper_1_3_chunk
_timescaledb_internal._dist_hyper_1_4_chunk],
'AN Standby shows correct set of chunks');
#Check that SELECT queries work ok from the AN standby node
my $result = $an_standby->safe_psql('postgres', "SELECT count(*) FROM test");
is($result, qq(145), 'streamed content on AN standby');
# Check that only READ-only queries can run on AN standby node
my ($ret, $stdout, $stderr) =
$an_standby->psql('postgres', 'INSERT INTO test(time) VALUES (now())');
is($ret, qq(3), "failed as expected");
like(
$stderr,
qr/cannot execute INSERT in a read-only transaction/,
"read only message as expected");
# Check that queries which connect to datanodes also remain read only
($ret, $stdout, $stderr) =
$an_standby->psql('postgres',
'CALL distributed_exec($$ CREATE USER testrole WITH LOGIN $$)');
is($ret, qq(3), "failed as expected");
like(
$stderr,
qr/cannot execute CREATE ROLE in a read-only transaction/,
"read only message for DNs as expected");
# Check that function doing read write activity doesn't work
($ret, $stdout, $stderr) =
$an_standby->psql('postgres',
'CALL distributed_exec($$ SELECT read_write_function() $$)');
is($ret, qq(3), "failed as expected");
like(
$stderr,
qr/cannot execute CREATE TABLE in a read-only transaction/,
"read only message for DNs as expected");
$an->append_conf(
'postgresql.conf', qq[
client_min_messages = 'debug3'
]);
$an->restart;
# Check that AN primary uses 2PC for read write transactions when multiple DNs
# are involved
($ret, $stdout, $stderr) =
$an->psql('postgres',
'BEGIN TRANSACTION READ WRITE; SELECT count(*) FROM TEST; ROLLBACK;');
like(
$stderr,
qr/use 2PC: true/,
"read write transaction uses 2PC with 2DNs on AN as expected");
# Check that AN primary uses 2PC for read write transactions even when SINGLE DN
# is involved
($ret, $stdout, $stderr) = $an->psql('postgres',
'CALL distributed_exec($$ CREATE USER testrole WITH LOGIN $$, node_list => \'{ "dn1" }\'); ROLLBACK;'
);
like(
$stderr,
qr/use 2PC: true/,
"read write transaction uses 2PC even with ONE DN from AN as expected");
# Check that AN primary uses 1PC for READ ONLY SERIALIZABLE transactions even when
# multiple DNs are involved
($ret, $stdout, $stderr) = $an->psql('postgres',
'BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE READ ONLY; SELECT count(*) FROM TEST; ROLLBACK;'
);
like(
$stderr,
qr/use 2PC: false/,
"read only serializable transaction uses 1PC on AN as expected");
# Check that AN primary uses 1PC for READ ONLY transactions even when multiple DNs
# are involved
($ret, $stdout, $stderr) =
$an->psql('postgres',
'BEGIN TRANSACTION READ ONLY; SELECT count(*) FROM TEST; ROLLBACK;');
like(
$stderr,
qr/use 2PC: false/,
"read only transaction uses 1PC on AN as expected");
# Check that standby can do READ WRITE queries post promotion
$an_standby->promote;
$an_standby->safe_psql('postgres', 'INSERT INTO test(time) VALUES (now())');
$result = $an_standby->safe_psql('postgres', "SELECT count(*) FROM test");
is($result, qq(146), 'READ WRITE content on AN standby');
# Read write function should also work now
$an_standby->safe_psql('postgres',
'CALL distributed_exec($$ SELECT read_write_function() $$)');
done_testing();
1;

View File

@ -1,5 +1,5 @@
set(PROVE_TEST_FILES 001_simple_multinode.pl 003_connections.pl)
set(PROVE_DEBUG_TEST_FILES 002_chunk_copy_move.pl)
set(PROVE_DEBUG_TEST_FILES 002_chunk_copy_move.pl 004_multinode_rdwr_1pc.pl)
if(CMAKE_BUILD_TYPE MATCHES Debug)
list(APPEND PROVE_TEST_FILES ${PROVE_DEBUG_TEST_FILES})