mirror of https://github.com/etcd-io/etcd.git
163 lines
4.1 KiB
Go
163 lines
4.1 KiB
Go
// Copyright 2025 The etcd Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package cache
|
|
|
|
import (
|
|
"fmt"
|
|
"sync"
|
|
|
|
"github.com/google/btree"
|
|
|
|
"go.etcd.io/etcd/api/v3/mvccpb"
|
|
"go.etcd.io/etcd/api/v3/v3rpc/rpctypes"
|
|
clientv3 "go.etcd.io/etcd/client/v3"
|
|
)
|
|
|
|
var ErrNotReady = fmt.Errorf("cache: store not ready")
|
|
|
|
// The store keeps a bounded history of snapshots using ringBuffer so that
|
|
// reads at historical revisions can be served until they fall out of the window.
|
|
type store struct {
|
|
mu sync.RWMutex
|
|
degree int
|
|
latest snapshot // latest is the mutable working snapshot
|
|
history ringBuffer[*snapshot] // history stores immutable cloned snapshots
|
|
}
|
|
|
|
func newStore(degree int, historyCapacity int) *store {
|
|
tree := btree.New(degree)
|
|
return &store{
|
|
degree: degree,
|
|
latest: snapshot{rev: 0, tree: tree},
|
|
history: *newRingBuffer(historyCapacity, func(s *snapshot) int64 { return s.rev }),
|
|
}
|
|
}
|
|
|
|
type kvItem struct {
|
|
key string
|
|
kv *mvccpb.KeyValue
|
|
}
|
|
|
|
func newKVItem(kv *mvccpb.KeyValue) *kvItem {
|
|
return &kvItem{key: string(kv.Key), kv: kv}
|
|
}
|
|
|
|
func (a *kvItem) Less(b btree.Item) bool {
|
|
return a.key < b.(*kvItem).key
|
|
}
|
|
|
|
func (s *store) Get(startKey, endKey []byte, rev int64) ([]*mvccpb.KeyValue, int64, error) {
|
|
snapshot, latestRev, err := s.getSnapshot(rev)
|
|
if err != nil {
|
|
return nil, 0, err
|
|
}
|
|
return snapshot.Range(startKey, endKey), latestRev, nil
|
|
}
|
|
|
|
func (s *store) getSnapshot(rev int64) (*snapshot, int64, error) {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
|
|
if s.latest.rev == 0 {
|
|
return nil, 0, ErrNotReady
|
|
}
|
|
if rev < 0 {
|
|
return nil, 0, fmt.Errorf("invalid revision: %d", rev)
|
|
}
|
|
if rev == 0 {
|
|
rev = s.latest.rev
|
|
}
|
|
if rev > s.latest.rev {
|
|
return nil, 0, rpctypes.ErrFutureRev
|
|
}
|
|
oldestRev := s.history.PeekOldest()
|
|
if rev < oldestRev {
|
|
return nil, 0, rpctypes.ErrCompacted
|
|
}
|
|
|
|
var targetSnapshot *snapshot
|
|
s.history.AscendGreaterOrEqual(rev, func(rev int64, snap *snapshot) bool {
|
|
targetSnapshot = snap
|
|
return false
|
|
})
|
|
|
|
return targetSnapshot, s.latest.rev, nil
|
|
}
|
|
|
|
// Restore replaces state with the bootstrap snapshot and resets history.
|
|
func (s *store) Restore(kvs []*mvccpb.KeyValue, rev int64) {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
s.latest.tree = btree.New(s.degree)
|
|
for _, kv := range kvs {
|
|
s.latest.tree.ReplaceOrInsert(newKVItem(kv))
|
|
}
|
|
s.history.RebaseHistory()
|
|
s.latest.rev = rev
|
|
s.history.Append(newClonedSnapshot(rev, s.latest.tree))
|
|
}
|
|
|
|
func (s *store) Apply(events []*clientv3.Event) error {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
if err := validateRevisions(events, s.latest.rev); err != nil {
|
|
return err
|
|
}
|
|
|
|
for i := 0; i < len(events); {
|
|
rev := events[i].Kv.ModRevision
|
|
|
|
for i < len(events) && events[i].Kv.ModRevision == rev {
|
|
ev := events[i]
|
|
switch ev.Type {
|
|
case clientv3.EventTypeDelete:
|
|
if removed := s.latest.tree.Delete(&kvItem{key: string(ev.Kv.Key)}); removed == nil {
|
|
return fmt.Errorf("cache: delete non-existent key %s", string(ev.Kv.Key))
|
|
}
|
|
case clientv3.EventTypePut:
|
|
s.latest.tree.ReplaceOrInsert(newKVItem(ev.Kv))
|
|
}
|
|
i++
|
|
}
|
|
s.latest.rev = rev
|
|
s.history.Append(newClonedSnapshot(rev, s.latest.tree))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (s *store) LatestRev() int64 {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
return s.latest.rev
|
|
}
|
|
|
|
func validateRevisions(events []*clientv3.Event, latestRev int64) error {
|
|
if len(events) == 0 {
|
|
return nil
|
|
}
|
|
for _, ev := range events {
|
|
r := ev.Kv.ModRevision
|
|
if r < latestRev {
|
|
return fmt.Errorf("cache: stale event batch (rev %d < latest %d)", r, latestRev)
|
|
}
|
|
if r == latestRev {
|
|
return fmt.Errorf("cache: duplicate revision batch breaks atomic guarantee (rev %d == latest %d)", r, latestRev)
|
|
}
|
|
}
|
|
return nil
|
|
}
|