mirror of https://github.com/tikv/client-rust.git
188 lines
6.4 KiB
Protocol Buffer
188 lines
6.4 KiB
Protocol Buffer
syntax = "proto3";
|
|
package eraftpb;
|
|
|
|
enum EntryType {
|
|
EntryNormal = 0;
|
|
EntryConfChange = 1;
|
|
EntryConfChangeV2 = 2;
|
|
}
|
|
|
|
// The entry is a type of change that needs to be applied. It contains two data fields.
|
|
// While the fields are built into the model; their usage is determined by the entry_type.
|
|
//
|
|
// For normal entries, the data field should contain the data change that should be applied.
|
|
// The context field can be used for any contextual data that might be relevant to the
|
|
// application of the data.
|
|
//
|
|
// For configuration changes, the data will contain the ConfChange message and the
|
|
// context will provide anything needed to assist the configuration change. The context
|
|
// if for the user to set and use in this case.
|
|
message Entry {
|
|
EntryType entry_type = 1;
|
|
uint64 term = 2;
|
|
uint64 index = 3;
|
|
bytes data = 4;
|
|
bytes context = 6;
|
|
|
|
// Deprecated! It is kept for backward compatibility.
|
|
// TODO: remove it in the next major release.
|
|
bool sync_log = 5;
|
|
}
|
|
|
|
message SnapshotMetadata {
|
|
// The current `ConfState`.
|
|
ConfState conf_state = 1;
|
|
// The applied index.
|
|
uint64 index = 2;
|
|
// The term of the applied index.
|
|
uint64 term = 3;
|
|
}
|
|
|
|
message Snapshot {
|
|
bytes data = 1;
|
|
SnapshotMetadata metadata = 2;
|
|
}
|
|
|
|
enum MessageType {
|
|
MsgHup = 0;
|
|
MsgBeat = 1;
|
|
MsgPropose = 2;
|
|
MsgAppend = 3;
|
|
MsgAppendResponse = 4;
|
|
MsgRequestVote = 5;
|
|
MsgRequestVoteResponse = 6;
|
|
MsgSnapshot = 7;
|
|
MsgHeartbeat = 8;
|
|
MsgHeartbeatResponse = 9;
|
|
MsgUnreachable = 10;
|
|
MsgSnapStatus = 11;
|
|
MsgCheckQuorum = 12;
|
|
MsgTransferLeader = 13;
|
|
MsgTimeoutNow = 14;
|
|
MsgReadIndex = 15;
|
|
MsgReadIndexResp = 16;
|
|
MsgRequestPreVote = 17;
|
|
MsgRequestPreVoteResponse = 18;
|
|
}
|
|
|
|
message Message {
|
|
MessageType msg_type = 1;
|
|
uint64 to = 2;
|
|
uint64 from = 3;
|
|
uint64 term = 4;
|
|
uint64 log_term = 5;
|
|
uint64 index = 6;
|
|
repeated Entry entries = 7;
|
|
uint64 commit = 8;
|
|
Snapshot snapshot = 9;
|
|
uint64 request_snapshot = 13;
|
|
bool reject = 10;
|
|
uint64 reject_hint = 11;
|
|
bytes context = 12;
|
|
uint64 deprecated_priority = 14;
|
|
// If this new field is not set, then use the above old field; otherwise
|
|
// use the new field. When broadcasting request vote, both fields are
|
|
// set if the priority is larger than 0. This change is not a fully
|
|
// compatible change, but it makes minimal impact that only new priority
|
|
// is not recognized by the old nodes during rolling update.
|
|
int64 priority = 15;
|
|
}
|
|
|
|
message HardState {
|
|
uint64 term = 1;
|
|
uint64 vote = 2;
|
|
uint64 commit = 3;
|
|
}
|
|
|
|
enum ConfChangeTransition {
|
|
// Automatically use the simple protocol if possible, otherwise fall back
|
|
// to ConfChangeType::Implicit. Most applications will want to use this.
|
|
Auto = 0;
|
|
// Use joint consensus unconditionally, and transition out of them
|
|
// automatically (by proposing a zero configuration change).
|
|
//
|
|
// This option is suitable for applications that want to minimize the time
|
|
// spent in the joint configuration and do not store the joint configuration
|
|
// in the state machine (outside of InitialState).
|
|
Implicit = 1;
|
|
// Use joint consensus and remain in the joint configuration until the
|
|
// application proposes a no-op configuration change. This is suitable for
|
|
// applications that want to explicitly control the transitions, for example
|
|
// to use a custom payload (via the Context field).
|
|
Explicit = 2;
|
|
}
|
|
|
|
message ConfState {
|
|
repeated uint64 voters = 1;
|
|
repeated uint64 learners = 2;
|
|
|
|
// The voters in the outgoing config. If not empty the node is in joint consensus.
|
|
repeated uint64 voters_outgoing = 3;
|
|
// The nodes that will become learners when the outgoing config is removed.
|
|
// These nodes are necessarily currently in nodes_joint (or they would have
|
|
// been added to the incoming config right away).
|
|
repeated uint64 learners_next = 4;
|
|
// If set, the config is joint and Raft will automatically transition into
|
|
// the final config (i.e. remove the outgoing config) when this is safe.
|
|
bool auto_leave = 5;
|
|
}
|
|
|
|
enum ConfChangeType {
|
|
AddNode = 0;
|
|
RemoveNode = 1;
|
|
AddLearnerNode = 2;
|
|
}
|
|
|
|
message ConfChange {
|
|
ConfChangeType change_type = 2;
|
|
uint64 node_id = 3;
|
|
bytes context = 4;
|
|
|
|
uint64 id = 1;
|
|
}
|
|
|
|
// ConfChangeSingle is an individual configuration change operation. Multiple
|
|
// such operations can be carried out atomically via a ConfChangeV2.
|
|
message ConfChangeSingle {
|
|
ConfChangeType change_type = 1;
|
|
uint64 node_id = 2;
|
|
}
|
|
|
|
// ConfChangeV2 messages initiate configuration changes. They support both the
|
|
// simple "one at a time" membership change protocol and full Joint Consensus
|
|
// allowing for arbitrary changes in membership.
|
|
//
|
|
// The supplied context is treated as an opaque payload and can be used to
|
|
// attach an action on the state machine to the application of the config change
|
|
// proposal. Note that contrary to Joint Consensus as outlined in the Raft
|
|
// paper[1], configuration changes become active when they are *applied* to the
|
|
// state machine (not when they are appended to the log).
|
|
//
|
|
// The simple protocol can be used whenever only a single change is made.
|
|
//
|
|
// Non-simple changes require the use of Joint Consensus, for which two
|
|
// configuration changes are run. The first configuration change specifies the
|
|
// desired changes and transitions the Raft group into the joint configuration,
|
|
// in which quorum requires a majority of both the pre-changes and post-changes
|
|
// configuration. Joint Consensus avoids entering fragile intermediate
|
|
// configurations that could compromise survivability. For example, without the
|
|
// use of Joint Consensus and running across three availability zones with a
|
|
// replication factor of three, it is not possible to replace a voter without
|
|
// entering an intermediate configuration that does not survive the outage of
|
|
// one availability zone.
|
|
//
|
|
// The provided ConfChangeTransition specifies how (and whether) Joint Consensus
|
|
// is used, and assigns the task of leaving the joint configuration either to
|
|
// Raft or the application. Leaving the joint configuration is accomplished by
|
|
// proposing a ConfChangeV2 with only and optionally the Context field
|
|
// populated.
|
|
//
|
|
// For details on Raft membership changes, see:
|
|
//
|
|
// [1]: https://github.com/ongardie/dissertation/blob/master/online-trim.pdf
|
|
message ConfChangeV2 {
|
|
ConfChangeTransition transition = 1;
|
|
repeated ConfChangeSingle changes = 2;
|
|
bytes context = 3;
|
|
}
|