membuffer: support staging & checkpoint for ART (#1465)

ref pingcap/tidb#55287

Signed-off-by: you06 <you1474600@gmail.com>
This commit is contained in:
you06 2024-09-23 11:55:13 +08:00 committed by GitHub
parent 6beede6998
commit 271945f345
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 279 additions and 38 deletions

View File

@ -259,7 +259,7 @@ type KeyFlagsGetter interface {
// VlogMemDB is the interface of the memory buffer which supports vlog to revert node and inspect node. // VlogMemDB is the interface of the memory buffer which supports vlog to revert node and inspect node.
type VlogMemDB[G KeyFlagsGetter] interface { type VlogMemDB[G KeyFlagsGetter] interface {
RevertNode(hdr *MemdbVlogHdr) RevertVAddr(hdr *MemdbVlogHdr)
InspectNode(addr MemdbArenaAddr) (G, MemdbArenaAddr) InspectNode(addr MemdbArenaAddr) (G, MemdbArenaAddr)
} }
@ -351,7 +351,7 @@ func (l *MemdbVlog[G, M]) RevertToCheckpoint(m M, cp *MemDBCheckpoint) {
block := l.blocks[cursor.blocks-1].buf block := l.blocks[cursor.blocks-1].buf
var hdr MemdbVlogHdr var hdr MemdbVlogHdr
hdr.load(block[hdrOff:]) hdr.load(block[hdrOff:])
m.RevertNode(&hdr) m.RevertVAddr(&hdr)
l.moveBackCursor(&cursor, &hdr) l.moveBackCursor(&cursor, &hdr)
} }
} }

View File

@ -42,7 +42,7 @@ import (
type dummyMemDB struct{} type dummyMemDB struct{}
func (m *dummyMemDB) RevertNode(hdr *MemdbVlogHdr) {} func (m *dummyMemDB) RevertVAddr(hdr *MemdbVlogHdr) {}
func (m *dummyMemDB) InspectNode(addr MemdbArenaAddr) (KeyFlagsGetter, MemdbArenaAddr) { func (m *dummyMemDB) InspectNode(addr MemdbArenaAddr) (KeyFlagsGetter, MemdbArenaAddr) {
return nil, NullAddr return nil, NullAddr
} }

View File

@ -16,6 +16,7 @@
package art package art
import ( import (
"fmt"
"math" "math"
tikverr "github.com/tikv/client-go/v2/error" tikverr "github.com/tikv/client-go/v2/error"
@ -76,7 +77,7 @@ func (t *ART) GetFlags(key []byte) (kv.KeyFlags, error) {
if leaf.vAddr.IsNull() && leaf.isDeleted() { if leaf.vAddr.IsNull() && leaf.isDeleted() {
return 0, tikverr.ErrNotExist return 0, tikverr.ErrNotExist
} }
return leaf.getKeyFlags(), nil return leaf.GetKeyFlags(), nil
} }
func (t *ART) Set(key artKey, value []byte, ops ...kv.FlagsOp) error { func (t *ART) Set(key artKey, value []byte, ops ...kv.FlagsOp) error {
@ -324,8 +325,8 @@ func (t *ART) newLeaf(key artKey) (artNode, *artLeaf) {
} }
func (t *ART) setValue(addr arena.MemdbArenaAddr, l *artLeaf, value []byte, ops []kv.FlagsOp) { func (t *ART) setValue(addr arena.MemdbArenaAddr, l *artLeaf, value []byte, ops []kv.FlagsOp) {
flags := l.getKeyFlags() flags := l.GetKeyFlags()
if flags == 0 && l.vAddr.IsNull() { if flags == 0 && l.vAddr.IsNull() || l.isDeleted() {
t.len++ t.len++
t.size += int(l.klen) t.size += int(l.klen)
} }
@ -373,12 +374,12 @@ func (t *ART) trySwapValue(addr arena.MemdbArenaAddr, value []byte) (int, bool)
} }
func (t *ART) Dirty() bool { func (t *ART) Dirty() bool {
panic("unimplemented") return t.dirty
} }
// Mem returns the memory usage of MemBuffer. // Mem returns the memory usage of MemBuffer.
func (t *ART) Mem() uint64 { func (t *ART) Mem() uint64 {
panic("unimplemented") return t.allocator.nodeAllocator.Capacity() + t.allocator.vlogAllocator.Capacity()
} }
// Len returns the count of entries in the MemBuffer. // Len returns the count of entries in the MemBuffer.
@ -392,51 +393,97 @@ func (t *ART) Size() int {
} }
func (t *ART) checkpoint() arena.MemDBCheckpoint { func (t *ART) checkpoint() arena.MemDBCheckpoint {
panic("unimplemented") return t.allocator.vlogAllocator.Checkpoint()
} }
func (t *ART) RevertNode(hdr *arena.MemdbVlogHdr) { func (t *ART) RevertVAddr(hdr *arena.MemdbVlogHdr) {
panic("unimplemented") lf := t.allocator.getLeaf(hdr.NodeAddr)
if lf == nil {
panic("revert an invalid node")
}
lf.vAddr = hdr.OldValue
t.size -= int(hdr.ValueLen)
if hdr.OldValue.IsNull() {
keptFlags := lf.GetKeyFlags()
keptFlags = keptFlags.AndPersistent()
if keptFlags == 0 {
lf.markDelete()
t.len--
t.size -= int(lf.klen)
} else {
lf.setKeyFlags(keptFlags)
}
} else {
t.size += len(t.allocator.vlogAllocator.GetValue(hdr.OldValue))
}
} }
func (t *ART) InspectNode(addr arena.MemdbArenaAddr) (*artLeaf, arena.MemdbArenaAddr) { func (t *ART) InspectNode(addr arena.MemdbArenaAddr) (*artLeaf, arena.MemdbArenaAddr) {
panic("unimplemented") lf := t.allocator.getLeaf(addr)
return lf, lf.vAddr
} }
// Checkpoint returns a checkpoint of ART. // Checkpoint returns a checkpoint of ART.
func (t *ART) Checkpoint() *arena.MemDBCheckpoint { func (t *ART) Checkpoint() *arena.MemDBCheckpoint {
panic("unimplemented") cp := t.allocator.vlogAllocator.Checkpoint()
return &cp
} }
// RevertToCheckpoint reverts the ART to the checkpoint. // RevertToCheckpoint reverts the ART to the checkpoint.
func (t *ART) RevertToCheckpoint(cp *arena.MemDBCheckpoint) { func (t *ART) RevertToCheckpoint(cp *arena.MemDBCheckpoint) {
panic("unimplemented") t.allocator.vlogAllocator.RevertToCheckpoint(t, cp)
t.allocator.vlogAllocator.Truncate(cp)
t.allocator.vlogAllocator.OnMemChange()
} }
func (t *ART) Stages() []arena.MemDBCheckpoint { func (t *ART) Stages() []arena.MemDBCheckpoint {
panic("unimplemented") return t.stages
} }
func (t *ART) Staging() int { func (t *ART) Staging() int {
return 0 t.stages = append(t.stages, t.checkpoint())
return len(t.stages)
} }
func (t *ART) Release(h int) { func (t *ART) Release(h int) {
if h == 0 {
// 0 is the invalid and no-effect handle.
return
}
if h != len(t.stages) {
panic("cannot release staging buffer")
}
if h == 1 {
tail := t.checkpoint()
if !t.stages[0].IsSamePosition(&tail) {
t.dirty = true
}
}
t.stages = t.stages[:h-1]
} }
func (t *ART) Cleanup(h int) { func (t *ART) Cleanup(h int) {
} if h == 0 {
// 0 is the invalid and no-effect handle.
return
}
if h > len(t.stages) {
return
}
if h < len(t.stages) {
panic(fmt.Sprintf("cannot cleanup staging buffer, h=%v, len(db.stages)=%v", h, len(t.stages)))
}
func (t *ART) revertToCheckpoint(cp *arena.MemDBCheckpoint) { cp := &t.stages[h-1]
panic("unimplemented") if !t.vlogInvalid {
} curr := t.checkpoint()
if !curr.IsSamePosition(cp) {
func (t *ART) moveBackCursor(cursor *arena.MemDBCheckpoint, hdr *arena.MemdbVlogHdr) { t.allocator.vlogAllocator.RevertToCheckpoint(t, cp)
panic("unimplemented") t.allocator.vlogAllocator.Truncate(cp)
} }
}
func (t *ART) truncate(snap *arena.MemDBCheckpoint) { t.stages = t.stages[:h-1]
panic("unimplemented") t.allocator.vlogAllocator.OnMemChange()
} }
// Reset resets the MemBuffer to initial states. // Reset resets the MemBuffer to initial states.
@ -459,7 +506,10 @@ func (t *ART) DiscardValues() {
// InspectStage used to inspect the value updates in the given stage. // InspectStage used to inspect the value updates in the given stage.
func (t *ART) InspectStage(handle int, f func([]byte, kv.KeyFlags, []byte)) { func (t *ART) InspectStage(handle int, f func([]byte, kv.KeyFlags, []byte)) {
panic("unimplemented") idx := handle - 1
tail := t.allocator.vlogAllocator.Checkpoint()
head := t.stages[idx]
t.allocator.vlogAllocator.InspectKVInLog(t, &head, &tail, f)
} }
// SelectValueHistory select the latest value which makes `predicate` returns true from the modification history. // SelectValueHistory select the latest value which makes `predicate` returns true from the modification history.

View File

@ -265,11 +265,6 @@ func (l *artLeaf) getKeyDepth(depth uint32) []byte {
return unsafe.Slice((*byte)(base), int(l.klen)-int(depth)) return unsafe.Slice((*byte)(base), int(l.klen)-int(depth))
} }
// GetKeyFlags gets the flags of the leaf
func (l *artLeaf) GetKeyFlags() kv.KeyFlags {
panic("unimplemented")
}
func (l *artLeaf) match(depth uint32, key artKey) bool { func (l *artLeaf) match(depth uint32, key artKey) bool {
return bytes.Equal(l.getKeyDepth(depth), key[depth:]) return bytes.Equal(l.getKeyDepth(depth), key[depth:])
} }
@ -278,7 +273,8 @@ func (l *artLeaf) setKeyFlags(flags kv.KeyFlags) {
l.flags = uint16(flags) & flagMask l.flags = uint16(flags) & flagMask
} }
func (l *artLeaf) getKeyFlags() kv.KeyFlags { // GetKeyFlags gets the flags of the leaf
func (l *artLeaf) GetKeyFlags() kv.KeyFlags {
return kv.KeyFlags(l.flags & flagMask) return kv.KeyFlags(l.flags & flagMask)
} }
@ -288,8 +284,6 @@ const (
) )
// markDelete marks the artLeaf as deleted // markDelete marks the artLeaf as deleted
//
//nolint:unused
func (l *artLeaf) markDelete() { func (l *artLeaf) markDelete() {
l.flags = deleteFlag l.flags = deleteFlag
} }

View File

@ -38,6 +38,7 @@
package unionstore package unionstore
import ( import (
"context"
rand2 "crypto/rand" rand2 "crypto/rand"
"encoding/binary" "encoding/binary"
"math/rand" "math/rand"
@ -166,3 +167,41 @@ func testRandomDeriveRecur(t *testing.T, db *MemDB, golden *leveldb.DB, depth in
return opLog return opLog
} }
func TestRandomAB(t *testing.T) {
testRandomAB(t, newRbtDBWithContext(), newArtDBWithContext())
}
func testRandomAB(t *testing.T, bufferA, bufferB MemBuffer) {
require := require.New(t)
const cnt = 50000
keys := make([][]byte, cnt)
for i := 0; i < cnt; i++ {
h := bufferA.Staging()
require.Equal(h, bufferB.Staging())
keys[i] = make([]byte, rand.Intn(19)+1)
rand2.Read(keys[i])
bufferA.Set(keys[i], keys[i])
bufferB.Set(keys[i], keys[i])
if i%2 == 0 {
bufferA.Cleanup(h)
bufferB.Cleanup(h)
} else {
bufferA.Release(h)
bufferB.Release(h)
}
require.Equal(bufferA.Dirty(), bufferB.Dirty())
require.Equal(bufferA.Len(), bufferB.Len())
require.Equal(bufferA.Size(), bufferB.Size(), i)
key := keys[rand.Intn(i+1)]
v1, err1 := bufferA.Get(context.Background(), key)
v2, err2 := bufferB.Get(context.Background(), key)
require.Equal(err1, err2)
require.Equal(v1, v2)
}
}

View File

@ -392,6 +392,7 @@ func testReset(t *testing.T, db interface {
func TestInspectStage(t *testing.T) { func TestInspectStage(t *testing.T) {
testInspectStage(t, newRbtDBWithContext()) testInspectStage(t, newRbtDBWithContext())
testInspectStage(t, newArtDBWithContext())
} }
func testInspectStage(t *testing.T, db MemBuffer) { func testInspectStage(t *testing.T, db MemBuffer) {
@ -449,6 +450,7 @@ func testInspectStage(t *testing.T, db MemBuffer) {
func TestDirty(t *testing.T) { func TestDirty(t *testing.T) {
testDirty(t, func() MemBuffer { return newRbtDBWithContext() }) testDirty(t, func() MemBuffer { return newRbtDBWithContext() })
testDirty(t, func() MemBuffer { return newArtDBWithContext() })
} }
func testDirty(t *testing.T, createDb func() MemBuffer) { func testDirty(t *testing.T, createDb func() MemBuffer) {
@ -782,8 +784,12 @@ func TestNewIteratorMin(t *testing.T) {
} }
func TestMemDBStaging(t *testing.T) { func TestMemDBStaging(t *testing.T) {
testMemDBStaging(t, newRbtDBWithContext())
testMemDBStaging(t, newArtDBWithContext())
}
func testMemDBStaging(t *testing.T, buffer MemBuffer) {
assert := assert.New(t) assert := assert.New(t)
buffer := NewMemDB()
err := buffer.Set([]byte("x"), make([]byte, 2)) err := buffer.Set([]byte("x"), make([]byte, 2))
assert.Nil(err) assert.Nil(err)
@ -809,6 +815,117 @@ func TestMemDBStaging(t *testing.T) {
assert.Equal(len(v), 2) assert.Equal(len(v), 2)
} }
func TestMemDBMultiLevelStaging(t *testing.T) {
testMemDBMultiLevelStaging(t, newRbtDBWithContext())
testMemDBMultiLevelStaging(t, newArtDBWithContext())
}
func testMemDBMultiLevelStaging(t *testing.T, buffer MemBuffer) {
assert := assert.New(t)
key := []byte{0}
for i := 0; i < 100; i++ {
assert.Equal(i+1, buffer.Staging())
buffer.Set(key, []byte{byte(i)})
v, err := buffer.Get(context.Background(), key)
assert.Nil(err)
assert.Equal(v, []byte{byte(i)})
}
for i := 99; i >= 0; i-- {
expect := i
if i%2 == 1 {
expect = i - 1
buffer.Cleanup(i + 1)
} else {
buffer.Release(i + 1)
}
v, err := buffer.Get(context.Background(), key)
assert.Nil(err)
assert.Equal(v, []byte{byte(expect)})
}
}
func TestInvalidStagingHandle(t *testing.T) {
testInvalidStagingHandle(t, newRbtDBWithContext())
testInvalidStagingHandle(t, newArtDBWithContext())
}
func testInvalidStagingHandle(t *testing.T, buffer MemBuffer) {
// handle == 0 takes no effect
// MemBuffer.Release only accept the latest handle
// MemBuffer.Cleanup accept handle large or equal than the latest handle, but only takes effect when handle is the latest handle.
assert := assert.New(t)
// test MemBuffer.Release
h1 := buffer.Staging()
assert.Positive(h1)
h2 := buffer.Staging()
assert.Positive(h2)
assert.Panics(func() {
buffer.Release(h2 + 1)
})
assert.Panics(func() {
buffer.Release(h2 - 1)
})
buffer.Release(0)
buffer.Release(h2)
buffer.Release(0)
buffer.Release(h1)
buffer.Release(0)
// test MemBuffer.Cleanup
h1 = buffer.Staging()
assert.Positive(h1)
h2 = buffer.Staging()
assert.Positive(h2)
buffer.Cleanup(h2 + 1) // Cleanup is ok even if the handle is greater than the existing handles.
assert.Panics(func() {
buffer.Cleanup(h2 - 1)
})
buffer.Cleanup(0)
buffer.Cleanup(h2)
buffer.Cleanup(0)
buffer.Cleanup(h1)
buffer.Cleanup(0)
}
func TestMemDBCheckpoint(t *testing.T) {
testMemDBCheckpoint(t, newRbtDBWithContext())
testMemDBCheckpoint(t, newArtDBWithContext())
}
func testMemDBCheckpoint(t *testing.T, buffer MemBuffer) {
assert := assert.New(t)
cp1 := buffer.Checkpoint()
buffer.Set([]byte("x"), []byte("x"))
cp2 := buffer.Checkpoint()
buffer.Set([]byte("y"), []byte("y"))
h := buffer.Staging()
buffer.Set([]byte("z"), []byte("z"))
buffer.Release(h)
for _, k := range []string{"x", "y", "z"} {
v, _ := buffer.Get(context.Background(), []byte(k))
assert.Equal(v, []byte(k))
}
buffer.RevertToCheckpoint(cp2)
v, _ := buffer.Get(context.Background(), []byte("x"))
assert.Equal(v, []byte("x"))
for _, k := range []string{"y", "z"} {
_, err := buffer.Get(context.Background(), []byte(k))
assert.NotNil(err)
}
buffer.RevertToCheckpoint(cp1)
_, err := buffer.Get(context.Background(), []byte("x"))
assert.NotNil(err)
}
func TestBufferLimit(t *testing.T) { func TestBufferLimit(t *testing.T) {
testBufferLimit(t, newRbtDBWithContext()) testBufferLimit(t, newRbtDBWithContext())
} }
@ -897,3 +1014,36 @@ func testSnapshotGetIter(t *testing.T, db MemBuffer) {
assert.Equal(reverseIter.Value(), []byte{byte(50)}) assert.Equal(reverseIter.Value(), []byte{byte(50)})
} }
} }
func TestCleanupKeepPersistentFlag(t *testing.T) {
testCleanupKeepPersistentFlag(t, newRbtDBWithContext())
testCleanupKeepPersistentFlag(t, newArtDBWithContext())
}
func testCleanupKeepPersistentFlag(t *testing.T, db MemBuffer) {
assert := assert.New(t)
persistentFlag := kv.SetKeyLocked
nonPersistentFlag := kv.SetPresumeKeyNotExists
h := db.Staging()
db.SetWithFlags([]byte{1}, []byte{1}, persistentFlag)
db.SetWithFlags([]byte{2}, []byte{2}, nonPersistentFlag)
db.SetWithFlags([]byte{3}, []byte{3}, persistentFlag, nonPersistentFlag)
db.Cleanup(h)
for _, key := range [][]byte{{1}, {2}, {3}} {
// the values are reverted by MemBuffer.Cleanup
_, err := db.Get(context.Background(), key)
assert.NotNil(err)
}
flag, err := db.GetFlags([]byte{1})
assert.Nil(err)
assert.True(flag.HasLocked())
_, err = db.GetFlags([]byte{2})
assert.NotNil(err)
flag, err = db.GetFlags([]byte{3})
assert.Nil(err)
assert.True(flag.HasLocked())
assert.False(flag.HasPresumeKeyNotExists())
}

View File

@ -111,7 +111,7 @@ func (db *RBT) checkKeyInCache(key []byte) (MemdbNodeAddr, bool) {
return nullNodeAddr, false return nullNodeAddr, false
} }
func (db *RBT) RevertNode(hdr *arena.MemdbVlogHdr) { func (db *RBT) RevertVAddr(hdr *arena.MemdbVlogHdr) {
node := db.getNode(hdr.NodeAddr) node := db.getNode(hdr.NodeAddr)
node.vptr = hdr.OldValue node.vptr = hdr.OldValue
db.size -= int(hdr.ValueLen) db.size -= int(hdr.ValueLen)
@ -150,6 +150,10 @@ func (db *RBT) Staging() int {
// Release publish all modifications in the latest staging buffer to upper level. // Release publish all modifications in the latest staging buffer to upper level.
func (db *RBT) Release(h int) { func (db *RBT) Release(h int) {
if h == 0 {
// 0 is the invalid and no-effect handle.
return
}
if h != len(db.stages) { if h != len(db.stages) {
// This should never happens in production environment. // This should never happens in production environment.
// Use panic to make debug easier. // Use panic to make debug easier.
@ -168,6 +172,10 @@ func (db *RBT) Release(h int) {
// Cleanup cleanup the resources referenced by the StagingHandle. // Cleanup cleanup the resources referenced by the StagingHandle.
// If the changes are not published by `Release`, they will be discarded. // If the changes are not published by `Release`, they will be discarded.
func (db *RBT) Cleanup(h int) { func (db *RBT) Cleanup(h int) {
if h == 0 {
// 0 is the invalid and no-effect handle.
return
}
if h > len(db.stages) { if h > len(db.stages) {
return return
} }