diff --git a/src/box/applier.cc b/src/box/applier.cc index 21d2e6bcb90066d0f510641489ab9d1130bfae88..512d05dfa1df4149d65fa141fed94126cf650231 100644 --- a/src/box/applier.cc +++ b/src/box/applier.cc @@ -391,6 +391,7 @@ applier_subscribe(struct applier *applier) struct ibuf *ibuf = &applier->ibuf; struct xrow_header row; struct vclock remote_vclock_at_subscribe; + struct tt_uuid cluster_id = uuid_nil; xrow_encode_subscribe_xc(&row, &REPLICASET_UUID, &INSTANCE_UUID, &replicaset.vclock); @@ -408,9 +409,26 @@ applier_subscribe(struct applier *applier) /* * In case of successful subscribe, the server * responds with its current vclock. + * + * Tarantool > 2.1.1 also sends its cluster id to + * the replica, and replica has to check whether + * its and master's cluster ids match. */ vclock_create(&remote_vclock_at_subscribe); - xrow_decode_vclock_xc(&row, &remote_vclock_at_subscribe); + xrow_decode_subscribe_response_xc(&row, + &cluster_id, + &remote_vclock_at_subscribe); + /* + * If master didn't send us its cluster id + * assume that it has done all the checks. + * In this case cluster_id will remain zero. + */ + if (!tt_uuid_is_nil(&cluster_id) && + !tt_uuid_is_equal(&cluster_id, &REPLICASET_UUID)) { + tnt_raise(ClientError, ER_REPLICASET_UUID_MISMATCH, + tt_uuid_str(&cluster_id), + tt_uuid_str(&REPLICASET_UUID)); + } } /* * Tarantool < 1.6.7: diff --git a/src/box/box.cc b/src/box/box.cc index 8892d0f0e3e41f19b2650e92d71186b374ca79a3..e12a1cba423b6fbfd85be48159edc772054f9402 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -1584,18 +1584,6 @@ box_process_subscribe(struct ev_io *io, struct xrow_header *header) /* Check permissions */ access_check_universe_xc(PRIV_R); - /** - * Check that the given UUID matches the UUID of the - * replica set this replica belongs to. Used to handshake - * replica connect, and refuse a connection from a replica - * which belongs to a different replica set. - */ - if (!tt_uuid_is_equal(&replicaset_uuid, &REPLICASET_UUID)) { - tnt_raise(ClientError, ER_REPLICASET_UUID_MISMATCH, - tt_uuid_str(&REPLICASET_UUID), - tt_uuid_str(&replicaset_uuid)); - } - /* Check replica uuid */ struct replica *replica = replica_by_uuid(&replica_uuid); if (replica == NULL || replica->id == REPLICA_ID_NIL) { @@ -1620,9 +1608,17 @@ box_process_subscribe(struct ev_io *io, struct xrow_header *header) * Send a response to SUBSCRIBE request, tell * the replica how many rows we have in stock for it, * and identify ourselves with our own replica id. + * + * Tarantool > 2.1.1 master doesn't check that replica + * has the same cluster id. Instead it sends its cluster + * id to replica, and replica checks that its cluster id + * matches master's one. Older versions will just ignore + * the additional field. */ struct xrow_header row; - xrow_encode_vclock_xc(&row, &replicaset.vclock); + xrow_encode_subscribe_response_xc(&row, + &REPLICASET_UUID, + &replicaset.vclock); /* * Identify the message with the replica id of this * instance, this is the only way for a replica to find diff --git a/src/box/xrow.c b/src/box/xrow.c index c4e3073be7917b41ebe6d086539027fb30152c57..ba3671fed819202fa208940589b1ecac5c19420e 100644 --- a/src/box/xrow.c +++ b/src/box/xrow.c @@ -1170,6 +1170,35 @@ xrow_encode_vclock(struct xrow_header *row, const struct vclock *vclock) return 0; } +int +xrow_encode_subscribe_response(struct xrow_header *row, + const struct tt_uuid *replicaset_uuid, + const struct vclock *vclock) +{ + memset(row, 0, sizeof(*row)); + size_t size = mp_sizeof_map(2) + + mp_sizeof_uint(IPROTO_VCLOCK) + mp_sizeof_vclock(vclock) + + mp_sizeof_uint(IPROTO_CLUSTER_UUID) + + mp_sizeof_str(UUID_STR_LEN); + char *buf = (char *) region_alloc(&fiber()->gc, size); + if (buf == NULL) { + diag_set(OutOfMemory, size, "region_alloc", "buf"); + return -1; + } + char *data = buf; + data = mp_encode_map(data, 2); + data = mp_encode_uint(data, IPROTO_VCLOCK); + data = mp_encode_vclock(data, vclock); + data = mp_encode_uint(data, IPROTO_CLUSTER_UUID); + data = xrow_encode_uuid(data, replicaset_uuid); + assert(data <= buf + size); + row->body[0].iov_base = buf; + row->body[0].iov_len = (data - buf); + row->bodycnt = 1; + row->type = IPROTO_OK; + return 0; +} + void xrow_encode_timestamp(struct xrow_header *row, uint32_t replica_id, double tm) { diff --git a/src/box/xrow.h b/src/box/xrow.h index 2654e35e6355de7113f02f285da7567cb1e56dbd..719add4f0df7dcad857ad70d193baa864ec45414 100644 --- a/src/box/xrow.h +++ b/src/box/xrow.h @@ -348,6 +348,37 @@ xrow_decode_vclock(struct xrow_header *row, struct vclock *vclock) return xrow_decode_subscribe(row, NULL, NULL, vclock, NULL); } +/** + * Encode a response to subscribe request. + * @param row[out] Row to encode into. + * @param replicaset_uuid. + * @param vclock. + * + * @retval 0 Success. + * @retval -1 Memory error. + */ +int +xrow_encode_subscribe_response(struct xrow_header *row, + const struct tt_uuid *replicaset_uuid, + const struct vclock *vclock); + +/** + * Decode a response to subscribe request. + * @param row Row to decode. + * @param[out] replicaset_uuid. + * @param[out] vclock. + * + * @retval 0 Success. + * @retval -1 Memory or format error. + */ +static inline int +xrow_decode_subscribe_response(struct xrow_header *row, + struct tt_uuid *replicaset_uuid, + struct vclock *vclock) +{ + return xrow_decode_subscribe(row, replicaset_uuid, NULL, vclock, NULL); +} + /** * Encode a heartbeat message. * @param row[out] Row to encode into. @@ -759,6 +790,26 @@ xrow_decode_vclock_xc(struct xrow_header *row, struct vclock *vclock) diag_raise(); } +/** @copydoc xrow_encode_subscribe_response. */ +static inline void +xrow_encode_subscribe_response_xc(struct xrow_header *row, + const struct tt_uuid *replicaset_uuid, + const struct vclock *vclock) +{ + if (xrow_encode_subscribe_response(row, replicaset_uuid, vclock) != 0) + diag_raise(); +} + +/** @copydoc xrow_decode_subscribe_response. */ +static inline void +xrow_decode_subscribe_response_xc(struct xrow_header *row, + struct tt_uuid *replicaset_uuid, + struct vclock *vclock) +{ + if (xrow_decode_subscribe_response(row, replicaset_uuid, vclock) != 0) + diag_raise(); +} + /** @copydoc iproto_reply_ok. */ static inline void iproto_reply_ok_xc(struct obuf *out, uint64_t sync, uint32_t schema_version) diff --git a/test/replication/misc.result b/test/replication/misc.result index c32681a7ada4c333c8bd2e7ad1ed94c1283d7b9c..ab827c50135cf328842c7e1ae1c8021489ef3e67 100644 --- a/test/replication/misc.result +++ b/test/replication/misc.result @@ -604,3 +604,64 @@ test_run:cmd("delete server replica") test_run:cleanup_cluster() --- ... +-- +-- gh-3704 move cluster id check to replica +-- +test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'") +--- +- true +... +box.schema.user.grant("guest", "replication") +--- +... +test_run:cmd("start server replica") +--- +- true +... +test_run:grep_log("replica", "REPLICASET_UUID_MISMATCH") +--- +- null +... +box.info.replication[2].downstream.status +--- +- follow +... +-- change master's cluster uuid and check that replica doesn't connect. +test_run:cmd("stop server replica") +--- +- true +... +_ = box.space._schema:replace{'cluster', tostring(uuid.new())} +--- +... +-- master believes replica is in cluster, but their cluster UUIDs differ. +test_run:cmd("start server replica") +--- +- true +... +test_run:wait_log("replica", "REPLICASET_UUID_MISMATCH", nil, 1.0) +--- +- REPLICASET_UUID_MISMATCH +... +box.info.replication[2].downstream.status +--- +- stopped +... +test_run:cmd("stop server replica") +--- +- true +... +test_run:cmd("cleanup server replica") +--- +- true +... +test_run:cmd("delete server replica") +--- +- true +... +test_run:cleanup_cluster() +--- +... +box.schema.user.revoke('guest', 'replication') +--- +... diff --git a/test/replication/misc.test.lua b/test/replication/misc.test.lua index 6a8af05c37661db5a9533d32c9bb7945d256c693..eda5310b6588a78044e94ba063781ec40f141ca9 100644 --- a/test/replication/misc.test.lua +++ b/test/replication/misc.test.lua @@ -243,3 +243,25 @@ test_run:cmd("stop server replica") test_run:cmd("cleanup server replica") test_run:cmd("delete server replica") test_run:cleanup_cluster() + +-- +-- gh-3704 move cluster id check to replica +-- +test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'") +box.schema.user.grant("guest", "replication") +test_run:cmd("start server replica") +test_run:grep_log("replica", "REPLICASET_UUID_MISMATCH") +box.info.replication[2].downstream.status +-- change master's cluster uuid and check that replica doesn't connect. +test_run:cmd("stop server replica") +_ = box.space._schema:replace{'cluster', tostring(uuid.new())} +-- master believes replica is in cluster, but their cluster UUIDs differ. +test_run:cmd("start server replica") +test_run:wait_log("replica", "REPLICASET_UUID_MISMATCH", nil, 1.0) +box.info.replication[2].downstream.status + +test_run:cmd("stop server replica") +test_run:cmd("cleanup server replica") +test_run:cmd("delete server replica") +test_run:cleanup_cluster() +box.schema.user.revoke('guest', 'replication')