-
Notifications
You must be signed in to change notification settings - Fork 443
RATIS-2559. Add linearizable check for streaming read request #1490
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,6 +17,7 @@ | |
| */ | ||
| package org.apache.ratis.netty.server; | ||
|
|
||
| import org.apache.ratis.client.impl.OrderedAsync; | ||
| import org.apache.ratis.conf.RaftProperties; | ||
| import org.apache.ratis.datastream.impl.DataStreamReplyByteBuffer; | ||
| import org.apache.ratis.datastream.impl.DataStreamRequestByteBuf; | ||
|
|
@@ -47,6 +48,7 @@ | |
|
|
||
| import static org.apache.ratis.client.impl.ClientProtoUtils.toRaftClientRequest; | ||
| import static org.apache.ratis.client.impl.ClientProtoUtils.toRaftClientReplyProto; | ||
| import static org.apache.ratis.netty.server.DataStreamManagement.newDataStreamReplyByteBuffer; | ||
| import static org.apache.ratis.netty.server.DataStreamManagement.replyDataStreamException; | ||
|
|
||
| public class ReadStreamManagement { | ||
|
|
@@ -60,24 +62,12 @@ static class ReadStream implements WritableByteChannel { | |
| private final DataStreamReplyByteBuffer terminalReply; | ||
| private long streamOffset; | ||
|
|
||
| ReadStream(RaftClientRequest request, long streamId, ChannelHandlerContext ctx) { | ||
| ReadStream(RaftClientRequest request, long streamId, ChannelHandlerContext ctx, RaftClientReply terminalReply) { | ||
| this.clientId = request.getClientId(); | ||
| this.streamId = streamId; | ||
| this.ctx = ctx; | ||
|
|
||
| final RaftClientReply reply = RaftClientReply.newBuilder() | ||
| .setRequest(request) | ||
| .setSuccess() | ||
| .build(); | ||
| this.terminalReply = DataStreamReplyByteBuffer.newBuilder() | ||
| .setClientId(clientId) | ||
| .setType(Type.STREAM_HEADER) | ||
| .setStreamId(streamId) | ||
| .setStreamOffset(0) | ||
| .setBuffer(toRaftClientReplyProto(reply).toByteString().asReadOnlyByteBuffer()) | ||
| .setSuccess(true) | ||
| .setBytesWritten(0) | ||
| .build(); | ||
| this.terminalReply = newReadStreamTerminalReply(clientId, streamId, terminalReply); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -186,17 +176,80 @@ private boolean processImpl(DataStreamRequestByteBuf requestBuf, ChannelHandlerC | |
| return true; | ||
| } | ||
|
|
||
| final ReadStream stream = new ReadStream(request, requestBuf.getStreamId(), ctx); | ||
| requestExecutor.execute(() -> { | ||
| final CompletableFuture<RaftClientReply> readOnlyCheck; | ||
| try { | ||
| readOnlyCheck = server.submitClientRequestAsync(newDummyReadRequest(request)); | ||
| } catch (IOException e) { | ||
| replyDataStreamException(server, e, request, requestBuf, ctx); | ||
| return true; | ||
| } | ||
|
|
||
| readOnlyCheck.whenCompleteAsync((reply, exception) -> { | ||
| if (exception != null) { | ||
| replyDataStreamException(server, exception, request, requestBuf, ctx); | ||
| return; | ||
| } | ||
|
|
||
| final RaftClientReply terminalReply = toReadStreamReply(request, reply); | ||
| if (!reply.isSuccess()) { | ||
| ctx.writeAndFlush(newDataStreamReplyByteBuffer(requestBuf, terminalReply)); | ||
| return; | ||
| } | ||
|
|
||
| final ReadStream stream = new ReadStream(request, requestBuf.getStreamId(), ctx, terminalReply); | ||
| try { | ||
| division.getStateMachine().data().query(request.getMessage(), stream); | ||
| } catch (Throwable t) { | ||
| LOG.error("{}: Failed read-only data stream query for {}", this, request, t); | ||
| } | ||
| }); | ||
| }, requestExecutor); | ||
| return true; | ||
| } | ||
|
|
||
| private static RaftClientRequest newDummyReadRequest(RaftClientRequest request) { | ||
| final RaftClientRequest.Builder builder = RaftClientRequest.newBuilder() | ||
| .setClientId(request.getClientId()) | ||
| .setGroupId(request.getRaftGroupId()) | ||
| .setCallId(request.getCallId()) | ||
| .setMessage(OrderedAsync.DUMMY) | ||
| .setType(request.getType()) | ||
| .setRepliedCallIds(request.getRepliedCallIds()) | ||
| .setSlidingWindowEntry(request.getSlidingWindowEntry()) | ||
| .setRoutingTable(request.getRoutingTable()) | ||
| .setTimeoutMs(request.getTimeoutMs()) | ||
| .setSpanContext(request.getSpanContext()); | ||
| if (request.isToLeader()) { | ||
| builder.setLeaderId(request.getServerId()); | ||
| } else { | ||
| builder.setServerId(request.getServerId()); | ||
| } | ||
| return builder.build(); | ||
|
Comment on lines
+210
to
+226
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's add a set(RaftClientRequest request) to Builder to copy everything. |
||
| } | ||
|
|
||
| private static RaftClientReply toReadStreamReply(RaftClientRequest request, RaftClientReply reply) { | ||
| return RaftClientReply.newBuilder() | ||
| .setRequest(request) | ||
| .setSuccess(reply.isSuccess()) | ||
| .setException(reply.getException()) | ||
| .setLogIndex(reply.getLogIndex()) | ||
| .setCommitInfos(reply.getCommitInfos()) | ||
| .build(); | ||
| } | ||
|
|
||
| private static DataStreamReplyByteBuffer newReadStreamTerminalReply( | ||
| ClientId clientId, long streamId, RaftClientReply reply) { | ||
| return DataStreamReplyByteBuffer.newBuilder() | ||
| .setClientId(clientId) | ||
| .setType(Type.STREAM_HEADER) | ||
| .setStreamId(streamId) | ||
| .setStreamOffset(0) | ||
| .setBuffer(toRaftClientReplyProto(reply).toByteString().asReadOnlyByteBuffer()) | ||
| .setSuccess(reply.isSuccess()) | ||
| .setBytesWritten(0) | ||
| .setCommitInfos(reply.getCommitInfos()) | ||
| .build(); | ||
| } | ||
|
|
||
| @Override | ||
| public String toString() { | ||
| return name; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1113,11 +1113,12 @@ private CompletableFuture<RaftClientReply> readAsync(RaftClientRequest request) | |
| if (request.getType().getRead().getPreferNonLinearizable() | ||
| || readOption == RaftServerConfigKeys.Read.Option.DEFAULT) { | ||
| final CompletableFuture<RaftClientReply> reply = checkLeaderState(request); | ||
| if (reply != null) { | ||
| return reply; | ||
| } | ||
| return queryStateMachine(request); | ||
| } else if (readOption == RaftServerConfigKeys.Read.Option.LINEARIZABLE){ | ||
| if (reply != null) { | ||
| return reply; | ||
| } | ||
| return isDummyRead(request) ? CompletableFuture.completedFuture(newSuccessReply(request)) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to learn the context, why do we need this dummy read in Ratis streaming read while we do not have it for Ratis streaming write?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is to run the linearizable check; see #1469 (comment) |
||
| : queryStateMachine(request); | ||
|
Comment on lines
+1119
to
+1120
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's add a DUMMY_SUCCESS_REPLY and move it to queryStateMachine: static final CompletableFuture<RaftClientReply> DUMMY_SUCCESS_REPLY
= CompletableFuture.completedFuture(RaftClientReply.newBuilder().setSuccess().build()); CompletableFuture<RaftClientReply> queryStateMachine(RaftClientRequest request) {
if (request.getType().getRead().getDummy()) {
return DUMMY_SUCCESS_REPLY;
}
return processQueryFuture(stateMachine.query(request.getMessage()), request);
} |
||
| } else if (readOption == RaftServerConfigKeys.Read.Option.LINEARIZABLE) { | ||
| final LeaderStateImpl leader = role.getLeaderState().orElse(null); | ||
| final CompletableFuture<Long> replyFuture; | ||
| if (leader != null) { | ||
|
|
@@ -1136,12 +1137,17 @@ private CompletableFuture<RaftClientReply> readAsync(RaftClientRequest request) | |
| return replyFuture | ||
| .thenCompose(readIndex -> getState().getReadRequests().waitToAdvance(readIndex, | ||
| () -> getReadException("add", snapshotInstallationHandler.getInProgressInstallSnapshotIndex(), false))) | ||
| .thenCompose(readIndex -> queryStateMachine(request)) | ||
| .thenCompose(readIndex -> isDummyRead(request) | ||
| ? CompletableFuture.completedFuture(newSuccessReply(request)) : queryStateMachine(request)) | ||
| .exceptionally(e -> readException2Reply(request, e)); | ||
| } else { | ||
| throw new IllegalStateException("Unexpected read option: " + readOption); | ||
| } | ||
| } | ||
| private static boolean isDummyRead(RaftClientRequest request) { | ||
| return request.getMessage() != null && OrderedAsync.DUMMY.getContent().equals(request.getMessage().getContent()); | ||
| } | ||
|
Comment on lines
+1147
to
+1149
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We cannot use DUMMY since user applications can set any message. They may already have used it in their messages. We probably need to add a flag to the proto: +++ b/ratis-proto/src/main/proto/Raft.proto
@@ -302,6 +302,7 @@ message ForwardRequestTypeProto {
message ReadRequestTypeProto {
bool preferNonLinearizable = 1;
bool readAfterWriteConsistent = 2;
+ bool dummy = 3;
} |
||
|
|
||
| private RaftClientReply readException2Reply(RaftClientRequest request, Throwable e) { | ||
| e = JavaUtils.unwrapCompletionException(e); | ||
| if (e instanceof StateMachineException ) { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since server returns a DUMMY_SUCCESS_REPLY, we could build the failed reply directly: