syntax = "proto3"; package raft_serverpb; import "eraftpb.proto"; import "metapb.proto"; import "kvrpcpb.proto"; import "disk_usage.proto"; import "encryptionpb.proto"; import "rustproto.proto"; option (rustproto.lite_runtime_all) = true; option java_package = "org.tikv.kvproto"; message RaftMessage { uint64 region_id = 1; metapb.Peer from_peer = 2; metapb.Peer to_peer = 3; eraftpb.Message message = 4; metapb.RegionEpoch region_epoch = 5; // true means to_peer is a tombstone peer and it should remove itself. bool is_tombstone = 6; // Region key range [start_key, end_key). bytes start_key = 7; bytes end_key = 8; // If it has value, to_peer should be removed if merge is never going to complete. metapb.Region merge_target = 9; ExtraMessage extra_msg = 10; bytes extra_ctx = 11; disk_usage.DiskUsage disk_usage = 12; } message RaftTruncatedState { uint64 index = 1; uint64 term = 2; } message SnapshotCFFile { string cf = 1; uint64 size = 2; uint32 checksum = 3; } message SnapshotMeta { repeated SnapshotCFFile cf_files = 1; // true means this snapshot is triggered for load balance bool for_balance = 2; // true means this is an empty snapshot for witness bool for_witness = 3; // the timestamp second to generate snapshot uint64 start = 4; // the duration of generating snapshot uint64 generate_duration_sec = 5; // the path of the tablet snapshot, it should only be used for v1 to receive // snapshot from v2 string tablet_snap_path = 6; // A hint of the latest commit index on leader when sending snapshot. // It should only be used for v2 to send snapshot to v1. // See https://github.com/pingcap/tiflash/issues/7568 uint64 commit_index_hint = 7; } message SnapshotChunk { RaftMessage message = 1; bytes data = 2; } message Done {} message TabletSnapshotFileMeta { uint64 file_size = 1; string file_name = 2; // Some block data. Unencrypted. bytes head_chunk = 3; // trailing data including checksum. Unencrypted. bytes trailing_chunk = 4; } // Snapshot preview for server to decide whether skip some files. // Server should send back an `AcceptedSnapshotFile` to let client // keep sending specified files. Only SST files can be skipped, all // other files should always be sent. message TabletSnapshotPreview { repeated TabletSnapshotFileMeta metas = 1; // There may be too many metas, use a flag to indicate all metas // are sent. bool end = 2; } message TabletSnapshotFileChunk { uint64 file_size = 1; string file_name = 2; // Encrypted. bytes data = 3; // Initial vector if encryption is enabled. bytes iv = 4; encryptionpb.DataKey key = 5; } message TabletSnapshotHead { RaftMessage message = 1; bool use_cache = 2; } message TabletSnapshotEnd { // Checksum of all data sent in `TabletSnapshotFileChunk.data` and // `TabletSnapshotFileChunk.file_name`. uint64 checksum = 1; } message TabletSnapshotRequest { oneof payload { TabletSnapshotHead head = 1; TabletSnapshotPreview preview = 2; TabletSnapshotFileChunk chunk = 3; TabletSnapshotEnd end = 4; } } message AcceptedSnapshotFiles { repeated string file_name = 1; } message TabletSnapshotResponse { AcceptedSnapshotFiles files = 1; } message KeyValue { bytes key = 1; bytes value = 2; } message RaftSnapshotData { metapb.Region region = 1; uint64 file_size = 2; repeated KeyValue data = 3; uint64 version = 4; SnapshotMeta meta = 5; repeated metapb.Peer removed_records = 6; repeated MergedRecord merged_records = 7; } message StoreIdent { uint64 cluster_id = 1; uint64 store_id = 2; kvrpcpb.APIVersion api_version = 3; } message StoreRecoverState { // Used for TiKV start recovery when WAL of KVDB was disabled. // TiKV may read all relations between seqno and raft log index, and replay // all raft logs which corresponding seqno smaller than the seqno here. // After TiKV replays all raft logs and flushed KV data, the seqno here must // be updated. uint64 seqno = 1; } message RaftLocalState { eraftpb.HardState hard_state = 1; uint64 last_index = 2; } message RaftApplyState { uint64 applied_index = 1; uint64 last_commit_index = 3; uint64 commit_index = 4; uint64 commit_term = 5; RaftTruncatedState truncated_state = 2; } enum PeerState { Normal = 0; Applying = 1; Tombstone = 2; Merging = 3; // Currently used for witness to non-witness conversion: When a witness // has just become a non-witness, we need to set and persist this state, // so that when the service restarts before applying snapshot, we can // actively request snapshot when initializing this peer. Unavailable = 4; } message MergeState { uint64 min_index = 1; metapb.Region target = 2; uint64 commit = 3; } message MergedRecord { uint64 source_region_id = 1; metapb.RegionEpoch source_epoch = 2; // Peers of source region when merge is committed. repeated metapb.Peer source_peers = 3; // Removed peers (by confchange) of source region when merge is committed. repeated metapb.Peer source_removed_records = 9; uint64 target_region_id = 4; metapb.RegionEpoch target_epoch = 5; repeated metapb.Peer target_peers = 6; // Commit merge index. uint64 index = 7; // Prepare merge index. uint64 source_index = 8; } message RegionLocalState { PeerState state = 1; metapb.Region region = 2; MergeState merge_state = 3; // The apply index corresponding to the storage when it's initialized. uint64 tablet_index = 4; // Raft doesn't guarantee peer will be removed in the end. In v1, peer finds // out its destiny by logs or broadcast; in v2, leader is responsible to // ensure removed peers are destroyed. // Note: only peers who has been part of this region can be in this list. repeated metapb.Peer removed_records = 5; // Merged peer can't be deleted like gc peers. Instead, leader needs to // query target peer to decide whether source peer can be destroyed. repeated MergedRecord merged_records = 6; } message RegionSequenceNumberRelation { uint64 region_id = 1; uint64 sequence_number = 2; RaftApplyState apply_state = 3; RegionLocalState region_state = 4; } message AvailabilityContext { uint64 from_region_id = 1; metapb.RegionEpoch from_region_epoch = 2; bool unavailable = 3; bool trimmed = 4; } enum ExtraMessageType { MsgRegionWakeUp = 0; MsgWantRollbackMerge = 1; MsgCheckStalePeer = 2; MsgCheckStalePeerResponse = 3; // If leader is going to sleep, it will send requests to all its followers // to make sure they all agree to sleep. MsgHibernateRequest = 4; MsgHibernateResponse = 5; MsgRejectRaftLogCausedByMemoryUsage = 6; MsgAvailabilityRequest = 7; MsgAvailabilityResponse = 8; MsgVoterReplicatedIndexRequest = 9; MsgVoterReplicatedIndexResponse = 10; // Message means that `from` is tombstone. Leader can then update removed_records. MsgGcPeerRequest = 11; MsgGcPeerResponse = 12; MsgFlushMemtable = 13; MsgRefreshBuckets = 14; } message FlushMemtable { uint64 region_id = 1; } message RefreshBuckets { uint64 version = 1; repeated bytes keys = 2; repeated uint64 sizes = 3; } message CheckGcPeer { // The region ID who triggers the check and wait for report. It should be // the ID of RaftMessage.from. uint64 from_region_id = 1; // The region ID to be checked if should be destroyed. uint64 check_region_id = 2; // The epoch of the region to be checked. metapb.RegionEpoch check_region_epoch = 3; // The peer to be checked. metapb.Peer check_peer = 4; } message ExtraMessage { ExtraMessageType type = 1; // It's merge related index. In `WantRollbackMerge`, it's prepare merge index. In // `MsgGcPeerRequest`, it's the commit merge index. In `MsgVoterReplicatedIndexRequest` // it's the voter_replicated_index. uint64 index = 2; // In `MsgCheckStalePeerResponse`, it's the peers that receiver can continue to query. repeated metapb.Peer check_peers = 3; bool wait_data = 4; // Flag for forcely wake up hibernate regions if true. bool forcely_awaken = 5; CheckGcPeer check_gc_peer = 6; FlushMemtable flush_memtable = 7; // Used by `MsgAvailabilityRequest` and `MsgAvailabilityResponse` in v2. AvailabilityContext availability_context = 8; // notice the peer to refresh buckets version RefreshBuckets refresh_buckets = 9; }