Merge pull request #46 from Hoverbear/overhaul

Overhaul menus
This commit is contained in:
Ana Hobden 2019-07-18 17:13:03 -07:00 committed by GitHub
commit 6c90ecce9b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
61 changed files with 892 additions and 751 deletions

View File

@ -2,7 +2,13 @@ serve:
hugo server \
--buildDrafts \
--buildFuture \
--disableFastRender
--disableFastRender \
--bind 0.0.0.0
serve-production:
hugo server \
--disableFastRender \
--bind 0.0.0.0
production-build:
hugo --minify
@ -12,4 +18,4 @@ preview-build:
--buildDrafts \
--buildFuture \
--baseURL $(DEPLOY_PRIME_URL) \
--minify
--minify

View File

@ -68,19 +68,18 @@ $colors: mergeColorMaps(("twitter-blue": ($twitter-blue, $white)), $colors)
.docs-content
padding-bottom: 8rem
.deep-dive-toc
padding-bottom: 2rem
margin-right: 1rem
.toc
padding: 1rem 0 3rem 2rem
li
font-size: 1.1rem
&.is-active
a
color: $tikv-blue
font-weight: 700
a.is-active
color: $tikv-blue
font-weight: 700
ul
ul
list-style: square !important;
ul
margin-left: 1rem

View File

@ -1,47 +0,0 @@
title = "TiKV"
baseURL = "https://tikv.org"
languageCode = "en-us"
pygmentsCodeFences = true
pygmentsUseClasses = true
disableKinds = ["taxonomy", "taxonomyTerm"]
[params]
favicon = "favicon.png"
googleAnalyticsId = "UA-130734531-1"
[params.versions]
latest = "3.0"
[params.description]
brief = "A distributed transactional key-value database"
long = "Based on the design of [Google Spanner](https://ai.google/research/pubs/pub39966) and [HBase](https://hbase.apache.org), but simpler to manage and without dependencies on any distributed filesystem"
[params.fonts]
sansserif = "Titillium Web"
monospace = "Inconsolata"
[params.logos]
white = "img/logos/horizontal/white/tikv-horizontal-white.png"
color = "img/logos/horizontal/color/tikv-horizontal-color.png"
black = "img/logos/horizontal/black/tikv-horizontal-black.png"
cncf = "img/logos/cncf-color.png"
card = "img/logos/card.png"
[params.assets]
fontAwesomeVersion = "5.3.1"
js = ["jquery-3.3.1", "anchor", "app"]
css = ["syntax"]
[params.socialmedia]
twitter = "tikvproject"
github = "https://github.com/tikv/tikv"
[outputs]
home = ["HTML", "REDIRECTS"]
[outputFormats.REDIRECTS]
mediaType = "text/netlify"
baseName = "_redirects"
[mediaTypes."text/netlify"]
delimiter = ""

61
config.yaml Normal file
View File

@ -0,0 +1,61 @@
title: "TiKV"
baseURL: "https://tikv.org"
languageCode: "en-us"
pygmentsCodeFences: true
pygmentsUseClasses: true
disableKinds: ["taxonomy", "taxonomyTerm"]
menu:
nav:
- name: Forum
url: https://forum.tikv.org
parent: Community
weight: 1
- name: Chat
url: /chat
parent: Community
weight: 2
- name: Branding
url: https://branding.cncf.io/projects/tikv
parent: Community
weight: 4
- name: Docs
url: /docs/3.0/concepts
weight: 1
params:
favicon: "favicon.png"
googleAnalyticsId: "UA-130734531-1"
versions:
latest: "3.0"
description:
brief: "A distributed transactional key-value database"
long: "Based on the design of [Google Spanner](https://ai.google/research/pubs/pub39966) and [HBase](https://hbase.apache.org), but simpler to manage and without dependencies on any distributed filesystem"
fonts:
sansserif: "Titillium Web"
monospace: "Inconsolata"
logos:
white: "img/logos/horizontal/white/tikv-horizontal-white.png"
color: "img/logos/horizontal/color/tikv-horizontal-color.png"
black: "img/logos/horizontal/black/tikv-horizontal-black.png"
cncf: "img/logos/cncf-color.png"
card: "img/logos/card.png"
assets:
fontAwesomeVersion: "5.3.1"
js: ["jquery-3.3.1", "anchor", "app"]
css: ["syntax"]
socialmedia:
twitter: "tikvproject"
github: "https://github.com/tikv/tikv"
output:
home: ["HTML", "REDIRECTS"]
outputFormats:
REDIRECTS:
mediaType: "text/netlify"
baseName: "_redirects"
mediaTypes:
"text/netlify":
delimiter: ""

View File

@ -1,5 +1,9 @@
---
title: TiKV adopters
title: Adopters
menu:
nav:
parent: Community
weight: 3
---
TiKV has been adopted by many companies across a wide range of industries. The table below lists many of them:

View File

@ -1,9 +1,14 @@
---
title: TiKV overview
description: Some basic facts about TiKV
weight: 1
aliases:
- /docs
menu:
nav:
name: Concepts
parent: Docs
weight: 1
docs:
name: Concepts
weight: 1
---
**TiKV** is a distributed transactional key-value database originally created by [PingCAP](https://pingcap.com/en) to complement [TiDB](https://github.com/pingcap/tidb).

View File

@ -1,9 +1,9 @@
---
title: APIs
description: Interact with TiKV using the raw key-value API or the transactional key-value API
weight: 4
aliases:
- /docs/apis
menu:
docs:
parent: Concepts
---
TiKV offers two APIs that you can interact with:

View File

@ -1,9 +1,9 @@
---
title: Architecture
description: How TiKV works and how it was built
weight: 2
aliases:
- /docs/architecture
menu:
docs:
parent: Concepts
---
This page discusses the core concepts and architecture behind TiKV, including:

View File

@ -0,0 +1,11 @@
---
title: Reference
headless: true
draft: true
menu:
nav:
parent: Docs
weight: 3
reference:
weight: 4
---

View File

@ -1,5 +1,4 @@
---
title: Clients
draft: true
weight: 2
---

View File

@ -0,0 +1,7 @@
---
title: Tools
draft: true
menu:
docs:
parent: Reference
---

View File

@ -1,7 +1,14 @@
---
title: Quickstart
description: Run TiKV in your local environment using Docker Compose
weight: 1
menu:
nav:
name: Tasks
parent: Docs
weight: 2
docs:
name: Tasks
weight: 2
---
This guide describes how to quickly deploy a TiKV testing cluster using [Docker Compose](https://docs.docker.com/compose/) on a single machine. Currently, this installation method is supported only on Linux.

View File

@ -1,7 +1,9 @@
---
title: Configure TiKV
description: Configure a wide range of TiKV facets, including RocksDB, gRPC, the Placement Driver, and more
weight: 3
menu:
docs:
parent: Tasks
---
## RocksDB configuration {#rocksdb}

View File

@ -1,8 +1,10 @@
---
title: Install and deploy
description: Run TiKV using Ansible or Docker
weight: 2
draft: true
menu:
docs:
parent: Tasks
---
This document tells you how to install TiKV using:

View File

@ -1,8 +1,8 @@
---
title: Secure TiKV
weight: 4
aliases:
- /docs/security
menu:
docs:
parent: Tasks
---
This page discusses how to secure your TiKV deployment. Learn how to:

View File

@ -1,5 +0,0 @@
---
title: Concepts
headless: true
weight: 1
---

View File

@ -1,6 +1,9 @@
---
title: Byzantine Failure
weight: 2
menu:
docs:
parent: Consensus algorithm
weight: 2
---
Consensus algorithms are typically either *Byzantine Fault Tolerant*, or not. Succinctly, systems which can withstand Byzantine faults are able to withstand misbehaving peers. Most distributed systems you would use inside of a VLAN, such as Kafka, TiKV, and etcd, are not Byzantine Fault Tolerant.

View File

@ -1,6 +1,9 @@
---
title: CAP Theorem
weight: 1
menu:
docs:
parent: Consensus algorithm
weight: 1
---
In 2000, Eric Brewer presented [“Towards Robust Distributed Systems”](http://awoc.wolski.fi/dlib/big-data/Brewer_podc_keynote_2000.pdf) which detailed the CAP Theorem. Succinctly, the theorem declares that a distributed system may only choose two of the following three attributes:

View File

@ -1,6 +1,9 @@
---
title: Consensus algorithm
weight: 2
menu:
docs:
parent: Deep Dive
weight: 2
---
When building a distributed system one principal goal is often to build in fault-tolerance. That is, if one particular node in a network goes down, or if there is a network partition, the systems continues to operate. The cluster of nodes taking part in a distributed consensus protocol must come to agreement regarding values, and once that decision is reached, that choice is final, even if some nodes were in a faulty state at the time.

View File

@ -1,6 +1,9 @@
---
title: Paxos
weight: 3
menu:
docs:
parent: Consensus algorithm
weight: 3
---
Paxos is a protocol that Leslie Lamport and others have written extensively about. The most succinct paper describing Paxos is ["Paxos Made Easy"](https://lamport.azurewebsites.net/pubs/paxos-simple.pdf) published by Lamport in 2001. The original paper ["The Part-Time Parliment"](http://lamport.azurewebsites.net/pubs/pubs.html#lamport-paxos) was published in 1989.

View File

@ -1,6 +1,9 @@
---
title: Raft
weight: 4
menu:
docs:
parent: Consensus algorithm
weight: 4
---
In 2014, Diego Ongaro and John Ousterhout presented the Raft algorithm. It is explained succinctly in a [paper](https://raft.github.io/raft.pdf) and detailed at length in a [thesis](https://ramcloud.stanford.edu/~ongaro/thesis.pdf).

View File

@ -1,6 +1,9 @@
---
title: Distributed SQL
weight: 1
menu:
docs:
parent: Distributed SQL over TiKV
weight: 2
---
By now we already know how [TiDB]'s relational structure is encoded into the Key-Value form with version. In this section, we will focus on the following questions:

View File

@ -1,6 +1,9 @@
---
title: Distributed SQL over TiKV
weight: 8
menu:
docs:
parent: Deep Dive
weight: 8
---
TiKV is the storage layer for [TiDB], a distributed HTAP SQL database. So far,

View File

@ -1,6 +1,9 @@
---
title: Distributed Algorithms
weight: 2
menu:
docs:
parent: Distributed transaction
weight: 2
---
## Two-Phase Commit

View File

@ -1,6 +1,9 @@
---
title: Distributed transaction
weight: 4
menu:
docs:
parent: Deep Dive
weight: 4
---
As TiKV is a distributed transactional key-value database, transaction is a core feature of TiKV. In this chapter we will talk about general implementations of distributed transaction and some implementation details in TiKV.

View File

@ -1,6 +1,9 @@
---
title: Isolation Level
weight: 1
menu:
docs:
parent: Distributed transaction
weight: 1
---
Isolation is one of the ACID (Atomicity, Consistency, Isolation, Durability) properties. It determines how transaction integrity is visible to other users and systems. For example, when a user is creating a Purchase Order and has created the header, but not the Purchase Order lines, is the header available for other systems/users (carrying out concurrent operations, such as a report on Purchase Orders) to see?

View File

@ -1,6 +1,9 @@
---
title: Locking
weight: 3
menu:
docs:
parent: Distributed transaction
weight: 3
---
To prevent lost updates and dirty reads, locking is employed to manage the actions of multiple concurrent users on a database. The two types of locking are pessimistic locking and optimistic locking.

View File

@ -1,81 +1,84 @@
---
title: Optimized Percolator
weight: 6
---
As said in [previous chapter](../percolator), TiKV makes use of Percolator's transaction algorithm. In TiKV's implementation, there are some optimizations on Percolator. In this chapter, we will introduce these optimizations in TiKV.
## Parallel Prewrite
In practice, for a single transaction, we don't want to do prewrites one by one. When there are dozens of TiKV nodes in the cluster, we hope the prewrite can be executed concurrently on these TiKV nodes.
In TiKV's implementation, when committing a transaction, the keys in the transaction will be divided into several batches and each batch will be prewritten in parallel. It doesn't matter whether the primary key is written first.
If a conflict happens during a transaction's prewrite phase, the prewrite process will be canceled and rollback will be performed on all keys affected by the transaction. Doing rollback on a key will leave a `Rollback` record in `CF_WRITE`(Percolator's `write` column), which is not described in Google's Percolator paper. The `Rollback` record is a mark to indicate that the transaction with `start_ts` in the record has been rolled back, and if a prewrite request arrives later than the rollback request, the prewrite will not succeed. This situation may be caused by network issues. The correctness won't be broken if we allow the prewrite to succeed. However, the key will be locked and unavailable until the lock's TTL expires.
## Short Value in Write Column
As mentioned in [Percolator in TiKV](../percolator/#percolator-in-tikv), TiKV uses RocksDB's column families to save different columns of Percolator. Different column families of RocksDB are actually different LSM-Trees. When we access a value, we need to search firstly the `CF_WRITE` to find the `start_ts` of the next record, and then the corresponding record in `CF_DEFAULT`. If a value is very small, it is wasteful to search RocksDB twice.
The optimization in TiKV is to avoid handling `CF_DEFAULT` for short values. If the value is short enough, it will not be put into `CF_DEFAULT` during the prewrite phase. Instead, it will be embedded in the lock and saved in `CF_LOCK`. Then in the commit phase, the value will be moved out of the lock and inlined in the write record. Therefore, we can access and manipulate short values without having to handle `CF_DEFAULT`.
## Point Read Without Timestamp
Timestamps are critical to providing isolation for transactions. For every transaction, we allocate a unique `start_ts` for it, and ensures transaction T can only see the data committed before T's `start_ts`.
But if transaction T does nothing but reads a single key, is it really necessary to allocate it a `start_ts`? The answer is no. We can simply read the newest version directly, because it's equivalent to reading with `start_ts` which is exactly the instant when the key is read. It's even ok to read a locked key, because it's equivalent to reading with the `start_ts` allocated before the lock's `start_ts`.
## Calculated Commit Timestamp
{{< warning >}}
This optimization hasn't been finished yet, but will be available in the future. [RFC](https://github.com/tikv/rfcs/pull/25).
{{</ warning >}}
To provide Snapshot Isolation, we must ensure all transactional reads are
repeatable. The `commit_ts` should be large enough so that the transaction will
not be committed before a valid read. Otherwise, Repeatable Read will be broken.
For example:
1. Txn1 gets `start_ts` 100
2. Txn2 gets `start_ts` 200
3. Txn2 reads key `"k1"` and gets value `"1"`
4. Txn1 prewrites `"k1"` with value `"2"`
5. Txn1 commits with `commit_ts` 101
6. Tnx2 reads key `"k1"` and gets value `"2"`
Txn2 reads `"k1"` twice but gets two different results. If `commit_ts` is
allocated from PD, this will not happen, because Txn2's first read must happen
before Txn1's prewrite while Txn1's `commit_ts` must be requested after
finishing prewrite. And as a result, Txn2's `commit_ts` must be larger than
Txn1's `start_ts`.
On the other hand, `commit_ts` can't be arbitrarily large. If the `commit_ts` is
ahead of the actual time, the committed data may be unreadable by other new
transactions, which breaks integrity. We are not sure whether a timestamp is
ahead of the actual time if we don't ask PD.
To conclude, in order not to break the Snapshot Isolation and the integrity, a
valid range for `commit_ts` should be:
```text
max{start_ts, max_read_ts_of_written_keys} < commit_ts <= now
```
So here comes a method to calculate the commit_ts:
```text
commit_ts = max{start_ts, region_1_max_read_ts, region_2_max_read_ts, ...} + 1
```
where `region_N_max_read_ts` is the maximum timestamp of all reads on the
region, for all regions involved in the transaction.
## Single Region 1PC
{{< warning >}}
This optimization haven't been finished yet, but will be available in the future.
{{</ warning >}}
For non-distributed databases, it's easy to provide ACID transactions; but for distributed databases, usually 2PC (two-phase commit) is required to make transactions ACID. Percolator provides such a 2PC algorithm, which is adopted by TiKV.
---
title: Optimized Percolator
menu:
docs:
parent: Distributed transaction
weight: 6
---
As said in [previous chapter](../percolator), TiKV makes use of Percolator's transaction algorithm. In TiKV's implementation, there are some optimizations on Percolator. In this chapter, we will introduce these optimizations in TiKV.
## Parallel Prewrite
In practice, for a single transaction, we don't want to do prewrites one by one. When there are dozens of TiKV nodes in the cluster, we hope the prewrite can be executed concurrently on these TiKV nodes.
In TiKV's implementation, when committing a transaction, the keys in the transaction will be divided into several batches and each batch will be prewritten in parallel. It doesn't matter whether the primary key is written first.
If a conflict happens during a transaction's prewrite phase, the prewrite process will be canceled and rollback will be performed on all keys affected by the transaction. Doing rollback on a key will leave a `Rollback` record in `CF_WRITE`(Percolator's `write` column), which is not described in Google's Percolator paper. The `Rollback` record is a mark to indicate that the transaction with `start_ts` in the record has been rolled back, and if a prewrite request arrives later than the rollback request, the prewrite will not succeed. This situation may be caused by network issues. The correctness won't be broken if we allow the prewrite to succeed. However, the key will be locked and unavailable until the lock's TTL expires.
## Short Value in Write Column
As mentioned in [Percolator in TiKV](../percolator/#percolator-in-tikv), TiKV uses RocksDB's column families to save different columns of Percolator. Different column families of RocksDB are actually different LSM-Trees. When we access a value, we need to search firstly the `CF_WRITE` to find the `start_ts` of the next record, and then the corresponding record in `CF_DEFAULT`. If a value is very small, it is wasteful to search RocksDB twice.
The optimization in TiKV is to avoid handling `CF_DEFAULT` for short values. If the value is short enough, it will not be put into `CF_DEFAULT` during the prewrite phase. Instead, it will be embedded in the lock and saved in `CF_LOCK`. Then in the commit phase, the value will be moved out of the lock and inlined in the write record. Therefore, we can access and manipulate short values without having to handle `CF_DEFAULT`.
## Point Read Without Timestamp
Timestamps are critical to providing isolation for transactions. For every transaction, we allocate a unique `start_ts` for it, and ensures transaction T can only see the data committed before T's `start_ts`.
But if transaction T does nothing but reads a single key, is it really necessary to allocate it a `start_ts`? The answer is no. We can simply read the newest version directly, because it's equivalent to reading with `start_ts` which is exactly the instant when the key is read. It's even ok to read a locked key, because it's equivalent to reading with the `start_ts` allocated before the lock's `start_ts`.
## Calculated Commit Timestamp
{{< warning >}}
This optimization hasn't been finished yet, but will be available in the future. [RFC](https://github.com/tikv/rfcs/pull/25).
{{</ warning >}}
To provide Snapshot Isolation, we must ensure all transactional reads are
repeatable. The `commit_ts` should be large enough so that the transaction will
not be committed before a valid read. Otherwise, Repeatable Read will be broken.
For example:
1. Txn1 gets `start_ts` 100
2. Txn2 gets `start_ts` 200
3. Txn2 reads key `"k1"` and gets value `"1"`
4. Txn1 prewrites `"k1"` with value `"2"`
5. Txn1 commits with `commit_ts` 101
6. Tnx2 reads key `"k1"` and gets value `"2"`
Txn2 reads `"k1"` twice but gets two different results. If `commit_ts` is
allocated from PD, this will not happen, because Txn2's first read must happen
before Txn1's prewrite while Txn1's `commit_ts` must be requested after
finishing prewrite. And as a result, Txn2's `commit_ts` must be larger than
Txn1's `start_ts`.
On the other hand, `commit_ts` can't be arbitrarily large. If the `commit_ts` is
ahead of the actual time, the committed data may be unreadable by other new
transactions, which breaks integrity. We are not sure whether a timestamp is
ahead of the actual time if we don't ask PD.
To conclude, in order not to break the Snapshot Isolation and the integrity, a
valid range for `commit_ts` should be:
```text
max{start_ts, max_read_ts_of_written_keys} < commit_ts <= now
```
So here comes a method to calculate the commit_ts:
```text
commit_ts = max{start_ts, region_1_max_read_ts, region_2_max_read_ts, ...} + 1
```
where `region_N_max_read_ts` is the maximum timestamp of all reads on the
region, for all regions involved in the transaction.
## Single Region 1PC
{{< warning >}}
This optimization haven't been finished yet, but will be available in the future.
{{</ warning >}}
For non-distributed databases, it's easy to provide ACID transactions; but for distributed databases, usually 2PC (two-phase commit) is required to make transactions ACID. Percolator provides such a 2PC algorithm, which is adopted by TiKV.
Considering that write batches are done atomically in a single Region, we come up with this realization that if a transaction affects only one region, 2PC is actually unnecessary. Once there is no write conflict, the transaction can be committed directly. Based on [the previous optimization](#calculated-commit-ts), the `commit_ts` can be set to `max_read_ts` of the region directly. In this way, we saved an RPC and a write operation (including a Raft committing and RocksDB writing) in TiKV for single-region transactions.

View File

@ -1,6 +1,9 @@
---
title: Percolator
weight: 5
menu:
docs:
parent: Distributed transaction
weight: 5
---
TiKV supports distributed transactions, which is inspired by Google's [Percolator](https://ai.google/research/pubs/pub36726.pdf). In this section, we will briefly introduce Percolator and how we make use of it in TiKV.

View File

@ -1,6 +1,9 @@
---
title: Timestamp Oracle
weight: 4
menu:
docs:
parent: Distributed transaction
weight: 4
---
The timestamp oracle plays a significant role in the Percolator Transaction model, it is a server that hands out timestamps in strictly increasing order, a property required for correct operation of the snapshot isolation protocol.

View File

@ -1,7 +1,11 @@
---
title: Introduction
headless: true
weight: 1
title: Deep Dive
menu:
docs:
weight: 3
nav:
parent: Docs
weight: 3
---
[TiKV](https://github.com/tikv/tikv) is a distributed, transactional key-value database. It has been widely adopted in many critical production environments &mdash; see the [TiKV adopters](https://github.com/tikv/tikv/blob/master/docs/adopters.md). It has also been accepted by the [Cloud Native Computing Foundation](https://www.cfnc.org) as a [Sandbox project](https://www.cncf.io/blog/2018/08/28/cncf-to-host-tikv-in-the-sandbox/) in August, 2018.

View File

@ -1,7 +1,9 @@
---
title: B-Tree vs LSM-Tree
mathjax: true
weight: 1
menu:
docs:
parent: Key-value engine
weight: 1
---
The [B-tree](https://en.wikipedia.org/wiki/B-tree) and the [Log-Structured Merge-tree](https://en.wikipedia.org/wiki/Log-structured_merge-tree) (LSM-tree) are the two most widely used data structures for data-intensive applications to organize and store data. However, each of them has its own advantages and disadvantages. This article aims to use quantitative approaches to compare these two data structures.

View File

@ -1,6 +1,9 @@
---
title: Key-value engine
weight: 3
menu:
docs:
parent: Deep Dive
weight: 3
---
A key-value engine serves as the bottommost layer in a key-value

View File

@ -1,6 +1,9 @@
---
title: RocksDB
weight: 2
menu:
docs:
parent: Key-value engine
weight: 2
---
RocksDB is a persistent key-value store for fast storage environment.

View File

@ -1,6 +1,9 @@
---
title: Resource scheduling
weight: 7
menu:
docs:
parent: Deep Dive
weight: 7
---
In a distributed database environment, resource scheduling needs to meet the following requirements:

View File

@ -1,6 +1,9 @@
---
title: Kubernetes
weight: 1
menu:
docs:
parent: Resource scheduling
weight: 1
---
Kubernetes is a Docker-based open source container cluster management system initiated and maintained by the Google team. It supports not only common cloud platforms but also internal data centers.

View File

@ -1,6 +1,9 @@
---
title: Mesos
weight: 2
menu:
docs:
parent: Resource scheduling
weight: 2
---
Mesos was originally launched by UC Berkeley's AMPLab in 2009. It is licensed under Apache and now operated by Mesosphere, Inc.

View File

@ -1,92 +1,95 @@
---
title: Simulator
weight: 3
---
With its flexibility and significant benefits of reducing time and cost, the
simulator plays an important role in studying and designing a computer
architecture. It is often used to validate specific design schemes
and evaluate the effectiveness of design schemes.
## Workflow
We'll focus on how the simulator is used in TiKV to deal with scheduling
problems.
In general, when there is a lack of resources or the problem is hard to
reproduce, we might consider using the simulator.
The simulation of scheduling problems in a distributed system
usually consists of the following steps:
1. Define the system model of the simulator.
2. Set up the simulation environment.
3. Run the simulation.
4. Inspect the result to check whether it is in line with expectations.
The first step is mainly to figure out which part of your system you want to
simulate. And the model should be as simple as possible. In the second step,
you should set up the environment including the scale of your system and the
characteristics of the workload. In the third step, the simulation will run
and provide the scheduling output. In the final step, you can check the
result and dig into the scheduling problems if the result is not as expected.
## PD Simulator
In PD, we also need a simulator to locate a scheduling problem.
The PD simulator can be used to simulate a large-scale cluster and scenarios
with different users.
For some special scenarios, we can keep their cases in the simulator so that
we can quickly verify the correctness of the scheduling in PD under different
scenarios when we reconstruct the code or add some new features in the future.
Without the simulator, if we want to reproduce some scenarios, we need to apply
for machines, load data, and then wait for the scheduling. It is tedious and
might waste a lot of time.
### Architecture
{{< figure
src="/img/deep-dive/pd-simulator.png"
caption="PD Simulator Architecture"
number="1" >}}
### Components
PD Simulator consists of the following components:
- Driver
_Driver_ is the most important part of the PD Simulator. It is used to do
some initialization and trigger the heartbeat and the corresponding event
according to the tick count.
- Node
_Node_ is used to simulate a TiKV node. It contains the basic information
of a store and can communicate with PD by using the heartbeat through gRPC.
- Raft Engine
_Raft Engine_ records all Raft related information. It is a shared Raft
engine which PD cannot know about.
- Event Runner
For every tick, _Event Runner_ checks if there is an event to execute. if
there is, it will execute the corresponding event.
### Process
The basic process of how PD Simulator works is as follows:
1. When started, PD Simulator will create a driver and initialize a mocked
TiKV cluster which consists of nodes.
2. After PD is bootstrapped, it starts a timer.
3. For each tick, the mocked TiKV cluster will perform some operations, such
as executing Raft commands on the shared Raft engine or sending heartbeats.
The operation to perform depends on the specific case.
4. Finally, PD Simulator will verify whether the result is in line with our
expectations.
PD Simulator does not care about how TiKV actually works in details. It just
sends the messages which PD wants to know about.
---
title: Simulator
menu:
docs:
parent: Resource scheduling
weight: 3
---
With its flexibility and significant benefits of reducing time and cost, the
simulator plays an important role in studying and designing a computer
architecture. It is often used to validate specific design schemes
and evaluate the effectiveness of design schemes.
## Workflow
We'll focus on how the simulator is used in TiKV to deal with scheduling
problems.
In general, when there is a lack of resources or the problem is hard to
reproduce, we might consider using the simulator.
The simulation of scheduling problems in a distributed system
usually consists of the following steps:
1. Define the system model of the simulator.
2. Set up the simulation environment.
3. Run the simulation.
4. Inspect the result to check whether it is in line with expectations.
The first step is mainly to figure out which part of your system you want to
simulate. And the model should be as simple as possible. In the second step,
you should set up the environment including the scale of your system and the
characteristics of the workload. In the third step, the simulation will run
and provide the scheduling output. In the final step, you can check the
result and dig into the scheduling problems if the result is not as expected.
## PD Simulator
In PD, we also need a simulator to locate a scheduling problem.
The PD simulator can be used to simulate a large-scale cluster and scenarios
with different users.
For some special scenarios, we can keep their cases in the simulator so that
we can quickly verify the correctness of the scheduling in PD under different
scenarios when we reconstruct the code or add some new features in the future.
Without the simulator, if we want to reproduce some scenarios, we need to apply
for machines, load data, and then wait for the scheduling. It is tedious and
might waste a lot of time.
### Architecture
{{< figure
src="/img/deep-dive/pd-simulator.png"
caption="PD Simulator Architecture"
number="1" >}}
### Components
PD Simulator consists of the following components:
- Driver
_Driver_ is the most important part of the PD Simulator. It is used to do
some initialization and trigger the heartbeat and the corresponding event
according to the tick count.
- Node
_Node_ is used to simulate a TiKV node. It contains the basic information
of a store and can communicate with PD by using the heartbeat through gRPC.
- Raft Engine
_Raft Engine_ records all Raft related information. It is a shared Raft
engine which PD cannot know about.
- Event Runner
For every tick, _Event Runner_ checks if there is an event to execute. if
there is, it will execute the corresponding event.
### Process
The basic process of how PD Simulator works is as follows:
1. When started, PD Simulator will create a driver and initialize a mocked
TiKV cluster which consists of nodes.
2. After PD is bootstrapped, it starts a timer.
3. For each tick, the mocked TiKV cluster will perform some operations, such
as executing Raft commands on the shared Raft engine or sending heartbeats.
The operation to perform depends on the specific case.
4. Finally, PD Simulator will verify whether the result is in line with our
expectations.
PD Simulator does not care about how TiKV actually works in details. It just
sends the messages which PD wants to know about.

View File

@ -1,47 +1,50 @@
---
title: gRPC
weight: 3
---
Sitting on top of HTTP/2 and protocol buffers, gRPC provides a high-performance
remote procedure call (RPC) framework. It supports features such as
bi-directional streaming and authentication.
gRPC is also part of the [CNCF](https://www.cncf.io/projects/). It is commonly
used in applications like Kubernetes or TiKV.
## Servers and Stubs
gRPC works off the idea that there is a *server* node which can accept
*requests* from client nodes that use a *stub*.
The *server* implements the service interface and runs a gRPC server to handle
all the incoming RPC calls from the client *stubs*. The client *stubs* have the
same methods that the server implements, but instead send the
request to the server.
{{< figure
src="/img/deep-dive/grpc-server-stub.svg"
caption="GRPC Servers and Stubs"
number="1" >}}
## Streams
Sometimes it's convienent to return or accept a stream of messages, instead of
operating in the traditional 1:1 request response format.
gRPC supports both traditional 1:1 and streaming modes, enabling bi-directional
streaming so clients and servers can write in whatever order they like. gRPC
maintains ordering behind the scenes.
```protobuf
service StreamScan {
rpc BatchGet (string, string) returns (stream string) {};
}
```
## A Rust Crate
One of the TiKV maintaining organizations, PingCAP, maintains the
[`grpc-rs`](https://github.com/pingcap/grpc-rs) library, providing Rust bindings
to a C library for gRPC. TiKV uses this crate.
---
title: gRPC
menu:
docs:
parent: Remote Procedure Calls (RPC)
weight: 3
---
Sitting on top of HTTP/2 and protocol buffers, gRPC provides a high-performance
remote procedure call (RPC) framework. It supports features such as
bi-directional streaming and authentication.
gRPC is also part of the [CNCF](https://www.cncf.io/projects/). It is commonly
used in applications like Kubernetes or TiKV.
## Servers and Stubs
gRPC works off the idea that there is a *server* node which can accept
*requests* from client nodes that use a *stub*.
The *server* implements the service interface and runs a gRPC server to handle
all the incoming RPC calls from the client *stubs*. The client *stubs* have the
same methods that the server implements, but instead send the
request to the server.
{{< figure
src="/img/deep-dive/grpc-server-stub.svg"
caption="GRPC Servers and Stubs"
number="1" >}}
## Streams
Sometimes it's convienent to return or accept a stream of messages, instead of
operating in the traditional 1:1 request response format.
gRPC supports both traditional 1:1 and streaming modes, enabling bi-directional
streaming so clients and servers can write in whatever order they like. gRPC
maintains ordering behind the scenes.
```protobuf
service StreamScan {
rpc BatchGet (string, string) returns (stream string) {};
}
```
## A Rust Crate
One of the TiKV maintaining organizations, PingCAP, maintains the
[`grpc-rs`](https://github.com/pingcap/grpc-rs) library, providing Rust bindings
to a C library for gRPC. TiKV uses this crate.

View File

@ -1,95 +1,98 @@
---
title: HTTP/2
weight: 1
---
HTTP/2 is short for 'Hyper Text Transfer Protocol Version 2'. HTTP/2 and its predecessor,
HTTP/1, are the de-facto network protocols in use today.
They define things such as:
* [URIs]
* [Sessions]
* [Status Codes] (eg. 404 Not Found)
* [Methods] (eg. GET, POST, PUT)
* [Headers] (eg. Authorization, User-Agent)
* [Pipelining]
HTTP/2 evolved out of Google's experimental SPDY protocol as a successor to
HTTP/1.1. Since it maintains high-level compatibility with HTTP/1.1, let's first
take a look at HTTP/1.x.
## HTTP/1.x
HTTP/1.0 was developed at CERN in 1989. [IETF RFC 1945] in 1996 was the first
officially recognized HTTP/1.0 version. Just a few years later, HTTP/1.1 was
specified in [IETF RFC 2068]. The standard was improved in [IETF RFC 2616] in 1999.
Both of these specifications eventually were obseleted in 2007 in the following RFCs:
* [RFC 7230, HTTP/1.1: Message Syntax and Routing](https://tools.ietf.org/html/rfc7230)
* [RFC 7231, HTTP/1.1: Semantics and Content](https://tools.ietf.org/html/rfc7231)
* [RFC 7232, HTTP/1.1: Conditional Requests](https://tools.ietf.org/html/rfc7232)
* [RFC 7233, HTTP/1.1: Range Requests](https://tools.ietf.org/html/rfc7233)
* [RFC 7234, HTTP/1.1: Caching](https://tools.ietf.org/html/rfc7234)
* [RFC 7235, HTTP/1.1: Authentication](https://tools.ietf.org/html/rfc7235)
Thankfully it's not necessary to become familiar with every detail of these
RFCs. HTTP/1.0 is a request-response protocol. For now, we'll just focus on this.
### The Request
When a node is connected to another node via TCP/IP, it can make a request by
sending ASCII text as below (the empty newline is required!):
```HTTP
POST / HTTP/1.1
Host: tikv.org
{ data: "Test" }
```
Common headers are things like `Authorization` (for access control), and
`Cache-Control` (for caching).
### The Response
The recipient of a message responds in a similar format:
```HTTP
HTTP/1.1 200 OK
Content-Type: application/json; charset=UTF-8
Connection: close
{ data: "bar" }
```
## HTTP/2
The major differences in HTTP/2 do not lie in aspects like methods, status codes, or
URIs, but in data frames and transportation.
It builds on HTTP/1.1 by adding features like:
* Server Push, to allow the server to respond with more data than requested.
* Multiplexing, to avoid 'head-of-line' blocking problem in HTTP/1.1
* Pipelining, to reduce network wait time.
* Compression of headers, to reduce overall network costs.
Compared to HTTP/1.1, HTTP/2.0 offers applications like TiKV many opportunities
for performance gains.
[IETF RFC 1945]: https://tools.ietf.org/html/rfc1945
[IETF RFC 2068]: https://tools.ietf.org/html/rfc2068
[IETF RFC 2616]: https://tools.ietf.org/html/rfc2616
[IETF RFC 7230]: https://tools.ietf.org/html/rfc7230
[IETF RFC 7231]: https://tools.ietf.org/html/rfc7231
[IETF RFC 7232]: https://tools.ietf.org/html/rfc7232
[IETF RFC 7233]: https://tools.ietf.org/html/rfc7233
[IETF RFC 7234]: https://tools.ietf.org/html/rfc7234
[IETF RFC 7235]: https://tools.ietf.org/html/rfc7235
[URIs]: https://en.wikipedia.org/wiki/Uniform_Resource_Identifier
[Sessions]: https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#HTTP_session
[Status Codes]: https://en.wikipedia.org/wiki/List_of_HTTP_status_codes
[Methods]: https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Request_methods
[Headers]: https://en.wikipedia.org/wiki/List_of_HTTP_header_fields
---
title: HTTP/2
menu:
docs:
parent: Remote Procedure Calls (RPC)
weight: 1
---
HTTP/2 is short for 'Hyper Text Transfer Protocol Version 2'. HTTP/2 and its predecessor,
HTTP/1, are the de-facto network protocols in use today.
They define things such as:
* [URIs]
* [Sessions]
* [Status Codes] (eg. 404 Not Found)
* [Methods] (eg. GET, POST, PUT)
* [Headers] (eg. Authorization, User-Agent)
* [Pipelining]
HTTP/2 evolved out of Google's experimental SPDY protocol as a successor to
HTTP/1.1. Since it maintains high-level compatibility with HTTP/1.1, let's first
take a look at HTTP/1.x.
## HTTP/1.x
HTTP/1.0 was developed at CERN in 1989. [IETF RFC 1945] in 1996 was the first
officially recognized HTTP/1.0 version. Just a few years later, HTTP/1.1 was
specified in [IETF RFC 2068]. The standard was improved in [IETF RFC 2616] in 1999.
Both of these specifications eventually were obseleted in 2007 in the following RFCs:
* [RFC 7230, HTTP/1.1: Message Syntax and Routing](https://tools.ietf.org/html/rfc7230)
* [RFC 7231, HTTP/1.1: Semantics and Content](https://tools.ietf.org/html/rfc7231)
* [RFC 7232, HTTP/1.1: Conditional Requests](https://tools.ietf.org/html/rfc7232)
* [RFC 7233, HTTP/1.1: Range Requests](https://tools.ietf.org/html/rfc7233)
* [RFC 7234, HTTP/1.1: Caching](https://tools.ietf.org/html/rfc7234)
* [RFC 7235, HTTP/1.1: Authentication](https://tools.ietf.org/html/rfc7235)
Thankfully it's not necessary to become familiar with every detail of these
RFCs. HTTP/1.0 is a request-response protocol. For now, we'll just focus on this.
### The Request
When a node is connected to another node via TCP/IP, it can make a request by
sending ASCII text as below (the empty newline is required!):
```HTTP
POST / HTTP/1.1
Host: tikv.org
{ data: "Test" }
```
Common headers are things like `Authorization` (for access control), and
`Cache-Control` (for caching).
### The Response
The recipient of a message responds in a similar format:
```HTTP
HTTP/1.1 200 OK
Content-Type: application/json; charset=UTF-8
Connection: close
{ data: "bar" }
```
## HTTP/2
The major differences in HTTP/2 do not lie in aspects like methods, status codes, or
URIs, but in data frames and transportation.
It builds on HTTP/1.1 by adding features like:
* Server Push, to allow the server to respond with more data than requested.
* Multiplexing, to avoid 'head-of-line' blocking problem in HTTP/1.1
* Pipelining, to reduce network wait time.
* Compression of headers, to reduce overall network costs.
Compared to HTTP/1.1, HTTP/2.0 offers applications like TiKV many opportunities
for performance gains.
[IETF RFC 1945]: https://tools.ietf.org/html/rfc1945
[IETF RFC 2068]: https://tools.ietf.org/html/rfc2068
[IETF RFC 2616]: https://tools.ietf.org/html/rfc2616
[IETF RFC 7230]: https://tools.ietf.org/html/rfc7230
[IETF RFC 7231]: https://tools.ietf.org/html/rfc7231
[IETF RFC 7232]: https://tools.ietf.org/html/rfc7232
[IETF RFC 7233]: https://tools.ietf.org/html/rfc7233
[IETF RFC 7234]: https://tools.ietf.org/html/rfc7234
[IETF RFC 7235]: https://tools.ietf.org/html/rfc7235
[URIs]: https://en.wikipedia.org/wiki/Uniform_Resource_Identifier
[Sessions]: https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#HTTP_session
[Status Codes]: https://en.wikipedia.org/wiki/List_of_HTTP_status_codes
[Methods]: https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Request_methods
[Headers]: https://en.wikipedia.org/wiki/List_of_HTTP_header_fields
[Pipelining]: https://en.wikipedia.org/wiki/HTTP_pipelining

View File

@ -1,66 +1,69 @@
---
title: Remote Procedure Calls (RPC)
weight: 6
---
Communication between services occurs over remote procedure calls. RPCs happen
all the time in distributed systems. To obtain a webpage, your browser has to make at
least one RPC to this website.
TiKV, as a distributed system involving a number of nodes, uses RPCs to
communicate between nodes, as well as the Placement Driver and clients.
As it turns out, exposing functionality as a remote interface isn't a trivial.
Networks are unreliable, systems are diverse and evolving, a huge variety of
languages and formats exist, and even things like encoding are hard!
## On the shoulders of giants
Over the past decades the field of computing has largely settled on a few common
standards.
### Network Protocols
The vast majority of services work over the HTTP or HTTP/2 network protocols.
This solves problems such as:
* [URIs]
* [Sessions]
* [Status Codes] (e.g. 404 Not Found)
* [Methods] (e.g. GET, POST, PUT)
* [Headers] (e.g. Authorization, User-Agent)
* [Pipelining]
TiKV uses **HTTP/2**. HTTP/2 is more performant and capable than HTTP/1 for TiKV uses.
### Interfacing
With those abilities supported, there remains a need to work with structures of
data. Commonly this ends up being an *interface description language* format
like [Protocol Buffers] (protobufs), which TiKV uses. Unlike an *interchange*
*format*, an *interface description language* allows for the definition of
services and RPCs in addition to just data interchange. This solves the
following problems:
* [Encoding] (ASCII? UTF-8? UTF-16? WTF-8?)
* Serialization/Deserialization format (Text-to-`struct` and vice versa)
* Backward/Forward compatibility (e.g. Structure fields changing, being added, removed)
* Service & RPC definition
### Wrapping it all together
Simply having the pieces is not enough. Making it usable for all parties
involved is another story. [gRPC] does a great job wrapping
up the above technologies and providing usable interfaces.
Over the next chapter, we'll look at each of these technologies and how they work.
[gRPC]: https://grpc.io/
[Encoding]: https://en.wikipedia.org/wiki/Character_encoding
[Protocol Buffers]: https://developers.google.com/protocol-buffers/
[URIs]: https://en.wikipedia.org/wiki/Uniform_Resource_Identifier
[Sessions]: https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#HTTP_session
[Status Codes]: https://en.wikipedia.org/wiki/List_of_HTTP_status_codes
[Methods]: https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Request_methods
[Headers]: https://en.wikipedia.org/wiki/List_of_HTTP_header_fields
---
title: Remote Procedure Calls (RPC)
menu:
docs:
parent: Deep Dive
weight: 6
---
Communication between services occurs over remote procedure calls. RPCs happen
all the time in distributed systems. To obtain a webpage, your browser has to make at
least one RPC to this website.
TiKV, as a distributed system involving a number of nodes, uses RPCs to
communicate between nodes, as well as the Placement Driver and clients.
As it turns out, exposing functionality as a remote interface isn't a trivial.
Networks are unreliable, systems are diverse and evolving, a huge variety of
languages and formats exist, and even things like encoding are hard!
## On the shoulders of giants
Over the past decades the field of computing has largely settled on a few common
standards.
### Network Protocols
The vast majority of services work over the HTTP or HTTP/2 network protocols.
This solves problems such as:
* [URIs]
* [Sessions]
* [Status Codes] (e.g. 404 Not Found)
* [Methods] (e.g. GET, POST, PUT)
* [Headers] (e.g. Authorization, User-Agent)
* [Pipelining]
TiKV uses **HTTP/2**. HTTP/2 is more performant and capable than HTTP/1 for TiKV uses.
### Interfacing
With those abilities supported, there remains a need to work with structures of
data. Commonly this ends up being an *interface description language* format
like [Protocol Buffers] (protobufs), which TiKV uses. Unlike an *interchange*
*format*, an *interface description language* allows for the definition of
services and RPCs in addition to just data interchange. This solves the
following problems:
* [Encoding] (ASCII? UTF-8? UTF-16? WTF-8?)
* Serialization/Deserialization format (Text-to-`struct` and vice versa)
* Backward/Forward compatibility (e.g. Structure fields changing, being added, removed)
* Service & RPC definition
### Wrapping it all together
Simply having the pieces is not enough. Making it usable for all parties
involved is another story. [gRPC] does a great job wrapping
up the above technologies and providing usable interfaces.
Over the next chapter, we'll look at each of these technologies and how they work.
[gRPC]: https://grpc.io/
[Encoding]: https://en.wikipedia.org/wiki/Character_encoding
[Protocol Buffers]: https://developers.google.com/protocol-buffers/
[URIs]: https://en.wikipedia.org/wiki/Uniform_Resource_Identifier
[Sessions]: https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#HTTP_session
[Status Codes]: https://en.wikipedia.org/wiki/List_of_HTTP_status_codes
[Methods]: https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Request_methods
[Headers]: https://en.wikipedia.org/wiki/List_of_HTTP_header_fields
[Pipelining]: https://en.wikipedia.org/wiki/HTTP_pipelining

View File

@ -1,71 +1,74 @@
---
title: Protocol Buffers
weight: 2
---
Interface definition languages, such as protobufs, are most commonly used to
store and transmit data between applications.
They define a way to *serialize* structures as text, as well as *deserialize*
it again.
Here's an example of a protobuf message in *text format*:
```protobuf
KvPair {
key: "TiKV"
value: "Astronaut"
}
```
When a message is actually *sent* between two applications, a binary format is
used. You can learn more about the binary format in the [protobuf documentation](https://developers.google.com/protocol-buffers/docs/encoding).
The message above is an instance of a structure predefined in a `.proto`
```protobuf
message KvPair {
string key = 1;
string value = 2;
}
```
The fields are numbered to support backwards compatibility and field renaming.
This makes it possible to evolve your application's communication in a
compatible way.
## Why Probobufs
Protobufs are simply much faster and more efficient than things like JSON.
Additionally, protobufs can generate all the required code for your desired
language.
If you have used [`serde_json`](https://docs.serde.rs/serde_json/) or another
JSON library, you may have experienced the task of defining schemas for
structures. This becomes a maintenance burden as your infrastructure grows to
span many languages.
You need to do this with protocol buffers as well, but you only do it once, and
the protobuf compiler will generate bindings for any language it knows how to.
Protobuf generates code in a backwards compatible manner. If an application
finds unfamiliar data is isn't familiar with, it just ignores them. This allows
for a safe evolution of an API.
## More than just data
Protobufs also enable the definition of *services*. This allows the
definition of RPC calls in the `*.proto` files.
This example demonstrates a service called `ScanService`. It provides a remote
procedure call `Scan` that accepts two strings and returns a stream of `KvPair`s:
```protobuf
service ScanService {
rpc Scan (string, string) returns (repeated KvPair) {};
}
```
This is particularly useful as it allows users to call remote functions almost
as if they were local thanks to code generation.
---
title: Protocol Buffers
menu:
docs:
parent: Remote Procedure Calls (RPC)
weight: 2
---
Interface definition languages, such as protobufs, are most commonly used to
store and transmit data between applications.
They define a way to *serialize* structures as text, as well as *deserialize*
it again.
Here's an example of a protobuf message in *text format*:
```protobuf
KvPair {
key: "TiKV"
value: "Astronaut"
}
```
When a message is actually *sent* between two applications, a binary format is
used. You can learn more about the binary format in the [protobuf documentation](https://developers.google.com/protocol-buffers/docs/encoding).
The message above is an instance of a structure predefined in a `.proto`
```protobuf
message KvPair {
string key = 1;
string value = 2;
}
```
The fields are numbered to support backwards compatibility and field renaming.
This makes it possible to evolve your application's communication in a
compatible way.
## Why Probobufs
Protobufs are simply much faster and more efficient than things like JSON.
Additionally, protobufs can generate all the required code for your desired
language.
If you have used [`serde_json`](https://docs.serde.rs/serde_json/) or another
JSON library, you may have experienced the task of defining schemas for
structures. This becomes a maintenance burden as your infrastructure grows to
span many languages.
You need to do this with protocol buffers as well, but you only do it once, and
the protobuf compiler will generate bindings for any language it knows how to.
Protobuf generates code in a backwards compatible manner. If an application
finds unfamiliar data is isn't familiar with, it just ignores them. This allows
for a safe evolution of an API.
## More than just data
Protobufs also enable the definition of *services*. This allows the
definition of RPC calls in the `*.proto` files.
This example demonstrates a service called `ScanService`. It provides a remote
procedure call `Scan` that accepts two strings and returns a stream of `KvPair`s:
```protobuf
service ScanService {
rpc Scan (string, string) returns (repeated KvPair) {};
}
```
This is particularly useful as it allows users to call remote functions almost
as if they were local thanks to code generation.
Next, we'll use gRPC to provide these services.

View File

@ -1,6 +1,9 @@
---
title: Data Sharding
weight: 2
menu:
docs:
parent: Scalability
weight: 2
---
## What is the partition

View File

@ -1,6 +1,9 @@
---
title: Horizontal or Vertical
weight: 1
menu:
docs:
parent: Scalability
weight: 1
---
Methods of adding more resources for a particular application fall into two broad categories: horizontal and vertical scaling.

View File

@ -1,6 +1,9 @@
---
title: Scalability
weight: 5
menu:
docs:
parent: Deep Dive
weight: 5
---
In the database field, scalability is the term we use to describe the capability of a system to handle a growing amount of work. Even if a system is working reliably and fast today, it doesn't mean it will necessarily work well in the future. One common reason for degradation is the increased load which exceeds what the system can process. In modern systems, the amount of data we handle can far outgrow our original expectations, so scalability is a critical consideration for the design of a database.

View File

@ -1,7 +1,9 @@
---
title: Multi-raft
mathjax: true
weight: 3
menu:
docs:
parent: Scalability
weight: 3
---
If you've researched Consensus before, please note that comparing Multi-Raft to Raft is not at all like comparing Multi-Paxos to Paxos. Here Multi-Raft only means we manage multiple Raft consensus groups on one node. From the above section, we know that there are multiple different partitions on each node, if there is only one Raft group for each node, the partitions losing its meaning. So Raft group is divided into multiple Raft groups in terms of partitions, namely, Region.

View File

@ -0,0 +1,8 @@
---
title: Failure Injection
menu:
docs:
parent: Testing
weight: 1
---

View File

@ -0,0 +1,72 @@
---
title: Testing
menu:
docs:
parent: Deep Dive
weight: 9
---
Testing is a crucial part of any large software project, and a distributed system like TiKV is designed to exist and function under many failure scenarios and degraded states, this dramatically increases testing surface. These states also must be tested in addition to the normal testing done on a project.
In futher sections we'll investigate how we can test distributed systems even with these nearly infinite variables affecting our system state. For now, let's investigate the basics required for almost every project. These simple tests form the foundation for many later tests, and can be run millions of times over the life of a project.
Often, these test are written before or alongside the code which they test, and they're used to guide the development process.
## Unit testing
TiKV includes many unit tests that are run using `cargo test`. These tests typically involve simple functionality and are tested using assertions.
These tests are most useful for testing expected errors (eg trying to open a config file that doesn't exist), and ensuring operations succeed given the correct initial state. These kinds of tests are also very helpful in testing for regressions in bugs that other testing methods have found.
Unit tests are notably able to test project internals. Unlike other testing methods which test the project as a *consumer* of it, unit tests allow you to make assertions about the internal state normally hidden from consumers of your project.
Many languages, like Rust, offer built in unit testing functionality:
```rust
#[test]
fn can_initialize_server() -> Result<(), Error> {
let server = Server::new(Config::default())?;
assert_eq!(server.private_field, true);
Ok(())
}
```
One potential danger of unit tests is that it's very easy to accidently modify private fields, or call private functionality that a dependant project might not be able to use. When testing functionality covering more than a few functions, or more than one module, integration tests are more well suited.
## Integration testing
Projects like TiKV are used as components in larger infrastructure. When doing more complete, functional testing, integration tests offer an easy way to test public functionality of a project.
In Rust, integration tests can also exist as documentation, this means that it's possible to document your project and benefit from a full test suite at the same time.
```rust
/// Create a server with the given config
///
/// ```rust
/// let server = Server::new(Config::default()).unwrap();
/// ```
struct Server { /* ... */ }
```
These kinds of tests are useful for ensuring that a consumer of the project will be able to use it correctly. Having an documentation test suite that demonstrates how a consumer is expected to use the project is especially useful for determining functionality, API, or compatability breakage. These kinds of tests eliminate most trivial unit tests while making them worthwhile to write, since they will be readily used by people to learn the project.
While documentation-based integration tests cover *usability* and *functionality*, they're not always suited for testing corner cases or workloads. Rust's built in integration tests are most well suited to this task.
The [Rust Book](https://doc.rust-lang.org/book/ch11-01-writing-tests.html) has a great chapter on how to write tests in Rust, and which testing strategies are appropriate for which problems.
## Going further
Unit tests and integration tests cover the basics, but even with a comprehensive test suite there can be cases neglected, forgotten, or not even realized possible.
Here's just a few of the situations that can happen:
* A node disappears and is replaced by a new node at the same IP
* Messages between one node and another are simply lost
* The network partitions a cluster into two or more groups
* A network link becomes overloaded, and messages start to queue and eventually fail
* An expected service dependency suddenly disappears
* Thead scheduling and poor memory management leads to data integrity issues
Even this small list offers a nearly endless testing surface. It's not practical to test every possibility. Worse, many of these cases are very difficult to setup in a test.
In the rest of this chapter, we'll investigate how we can overcome this problem by using tools inject a variety of failures, introduce chaos to networks or other I/O, or introduce other chaos to tests.

View File

@ -1,6 +0,0 @@
---
title: Reference
headless: true
draft: true
weight: 3
---

View File

@ -1,5 +0,0 @@
---
title: Tools
draft: true
weight: 1
---

View File

@ -1,5 +0,0 @@
---
title: Tasks
headless: true
weight: 2
---

View File

@ -1,25 +0,0 @@
{{ define "title" }}
TiKV deep dive | {{ .Title }}
{{ end }}
{{ define "main" }}
<section class="section">
<div class="container">
<div class="columns">
<div class="column is-narrow">
{{ partial "deep-dive/toc.html" . }}
</div>
<div class="column">
<p class="title is-size-1 is-size-2-mobile has-text-weight-light">
{{ .Title }}
</p>
<div class="content is-medium docs-content">
{{ .Content }}
</div>
</div>
</div>
</div>
</section>
{{ end }}

View File

@ -1,26 +0,0 @@
{{ define "title" }}
TiKV deep dive | {{ .Title }}
{{ end }}
{{ define "main" }}
<section class="section">
<div class="container">
<div class="columns">
<div class="column is-3">
{{ partial "deep-dive/toc.html" . }}
</div>
<div class="column">
<p class="title is-size-1 is-size-2-mobile has-text-weight-light">
{{ .Title }}
</p>
<div class="content is-medium docs-content">
{{ partial "math.html" . }}
{{ .Content }}
</div>
</div>
</div>
</div>
</section>
{{ end }}

View File

@ -4,17 +4,27 @@ TiKV | {{ .Title }}
{{ define "main" }}
{{ partial "docs/hero.html" . }}
{{ $docs := .Site.Menus.docs }}
{{ $currentPage := (index .Pages 0) }}
<section class="section">
<div class="container">
<div class="columns">
<div class="column is-3">
{{ partial "docs/section-toc.html" . }}
<div class="toc">
{{ range .Site.Menus.docs }}
{{ $submenu := (index (where $docs "Name" .Name) 0) }}
{{ if (or ($currentPage.HasMenuCurrent "docs" $submenu) ($currentPage.IsMenuCurrent "docs" $submenu)) }}
{{ partial "entry-tree.html" (dict "entries" .Children "currentPage" $currentPage ) }}
{{ end }}
{{ end }}
</div>
</div>
<div class="column">
<div class="content is-medium docs-content">
{{ partial "docs/version-warning.html" . }}
{{ partial "math.html" . }}
{{ .Content }}
</div>

View File

@ -4,17 +4,27 @@ TiKV | {{ .Title }}
{{ define "main" }}
{{ partial "docs/hero.html" . }}
{{ $docs := .Site.Menus.docs }}
{{ $currentPage := . }}
<section class="section">
<div class="container">
<div class="columns">
<div class="column is-3">
{{ partial "docs/section-toc.html" . }}
<div class="column is-narrow">
<div class="toc">
{{ range .Site.Menus.docs }}
{{ $submenu := (index (where $docs "Name" .Name) 0) }}
{{ if (or ($currentPage.HasMenuCurrent "docs" $submenu) ($currentPage.IsMenuCurrent "docs" $submenu)) }}
{{ partial "entry-tree.html" (dict "entries" .Children "currentPage" $currentPage ) }}
{{ end }}
{{ end }}
</div>
</div>
<div class="column">
<div class="content is-medium docs-content">
{{ partial "docs/version-warning.html" . }}
{{ partial "math.html" . }}
{{ .Content }}
</div>

View File

@ -1,46 +0,0 @@
{{ $deepDive := where site.Sections "Section" "deep-dive" }}
{{ $currentUrl := .RelPermalink }}
<div class="deep-dive-toc">
<p class="title is-narrow has-text-weight-bold">
TiKV deep dive
</p>
<hr class="has-background-primary" />
<ul>
{{ range $deepDive }}
{{ $isCurrentPage := eq $currentUrl .RelPermalink }}
<li{{ if $isCurrentPage }} class="is-active"{{ end }}>
<strong>{{ .Weight }}</strong>.
<a href="{{ .RelPermalink }}">
{{ .Title }}
</a>
</li>
{{ range .Sections }}
{{ $isCurrentPage := eq $currentUrl .RelPermalink }}
{{ $sectionWeight := .Weight }}
<li{{ if $isCurrentPage }} class="is-active"{{ end }}>
<strong>{{ $sectionWeight }}</strong>.
<a href="{{ .RelPermalink }}">
{{ .Title }}
</a>
</li>
<li>
<ul>
{{ range .Pages }}
{{ $isCurrentPage := eq $currentUrl .RelPermalink }}
<li{{ if $isCurrentPage }} class="is-active"{{ end }}>
<strong>{{ $sectionWeight }}.{{ .Weight }}</strong>.
<a href="{{ .RelPermalink }}">
{{ .Title }}
</a>
</li>
{{ end }}
</ul>
</li>
{{ end }}
{{ end }}
</ul>
</div>

View File

@ -31,16 +31,13 @@
<nav class="tabs is-medium is-boxed">
<div class="container">
<ul>
{{ range $docsSections }}
{{ range .Sections }}
{{ $mainPage := index (where .Pages "Weight" 1) 0 }}
{{ $isCurrentSection := eq .Name $currentSection }}
<li{{ if $isCurrentSection }} class="is-active"{{ end }}>
<a href="{{ $mainPage.RelPermalink }}">
{{ .Name }}
</a>
</li>
{{ end }}
{{ $currentPage := . }}
{{ range .Site.Menus.docs }}
<li {{ if (or ($currentPage.IsMenuCurrent "docs" .) ($currentPage.HasMenuCurrent "docs" .)) }}class="is-active"{{ end }}>
<a href="{{ .URL }}">
{{ .Name }}
</a>
</li>
{{ end }}
</ul>
</div>

View File

@ -1,15 +0,0 @@
{{ $allDocs := where site.RegularPages "Section" "docs" }}
{{ $thisSection := .CurrentSection.Name }}
{{ $currentUrl := .RelPermalink }}
<div class="section-toc">
<ul>
{{ range where $allDocs "CurrentSection.Name" $thisSection }}
{{ $isCurrentPage := eq .RelPermalink $currentUrl }}
<li{{ if $isCurrentPage }} class="is-active"{{ end }}>
<a href="{{ .RelPermalink }}">
{{ .Title }}
</a>
</li>
{{ end }}
</ul>
</div>

View File

@ -0,0 +1,57 @@
{{ $entries := .entries }}
{{ $currentPage := .currentPage }}
<ul>
{{ range $entries }}
{{ if .HasChildren }}
<li>
<a {{ if (or ($currentPage.IsMenuCurrent .Menu .) ($currentPage.HasMenuCurrent .Menu .)) }}class="is-active"{{ end }} href="{{ .URL }}">
{{ .Name }}
</a>
<ul>
{{ $parent := . }}
{{ range .Children }}
{{ if .HasChildren }}
<li>
<a
{{ if (or ($currentPage.IsMenuCurrent .Menu .) ($currentPage.HasMenuCurrent .Menu .)) }}class="is-active"{{ end }}
href="{{ .URL }}">
{{ .Name }}
</a>
<ul>
{{ $parent = . }}
{{ range .Children }}
<li>
<a
{{ if $currentPage.IsMenuCurrent .Menu . }}class="is-active"{{ end }}
href="{{ .URL }}">
{{ .Name }}
</a>
</li>
{{ end }}
</ul>
</li>
{{ else }}
<li>
<a
{{ if $currentPage.IsMenuCurrent .Menu . }}class="is-active"{{ end }}
href="{{ .URL }}">
{{ .Name }}
</a>
</li>
{{ end }}
{{ end }}
</ul>
</li>
{{ else }}
<li>
<a
{{ if $currentPage.IsMenuCurrent .Menu . }}class="is-active"{{ end }}
href="{{ .URL }}">
<span>{{ .Name }}</span>
</a>
</li>
{{ end }}
{{ end }}
</ul>

View File

@ -18,13 +18,26 @@
<hr class="hr has-background-primary">
<ul>
{{ range $docs }}
<li>
<a href="{{ .RelPermalink }}">
{{ .Title }}
<a href="/docs/{{ $latest }}/">
Overview
</a>
</li>
<li>
<a href="/docs/{{ $latest }}/concepts">
Concepts
</a>
</li>
<li>
<a href="/docs/{{ $latest }}/tasks">
Tasks
</a>
</li>
<li>
<a href="/docs/{{ $latest }}/reference">
Reference
</a>
</li>
{{ end }}
</ul>
</div>

View File

@ -5,7 +5,6 @@
{{ $navbarLogo := cond $isHome $blackLogo $whiteLogo }}
{{ $twitter := site.Params.socialmedia.twitter }}
{{ $github := site.Params.socialmedia.github }}
{{ $docs := where site.RegularPages "Section" "docs" }}
{{ $latest := site.Params.versions.latest }}
{{ $color := cond $isHome "light" "black" }}
{{ $blogPosts := where site.RegularPages "Section" "blog" }}
@ -40,60 +39,23 @@
</div>
<div class="navbar-end">
<div class="navbar-item has-dropdown is-hoverable">
<a class="navbar-link" href="/docs">
Docs
</a>
<div class="navbar-dropdown is-hidden-touch">
{{ range $docs }}
<a class="navbar-item" href="{{ .RelPermalink }}">
{{ .Title }}
</a>
{{ end }} <!-- range $docs -->
</div>
</div>
<div class="navbar-item has-dropdown is-hoverable">
<a class="navbar-link" href="/deep-dive/">
Deep dive
</a>
<div class="navbar-dropdown is-hidden-touch">
{{ range $deepDive }}
<a class="navbar-item" href="{{ .RelPermalink }}">
<strong>{{ .Weight }}</strong>. {{ .Title }}
{{ range .Site.Menus.nav }}
<div class="navbar-item has-dropdown is-hoverable">
<a class="navbar-link" href="{{ .URL }}">
{{ .Name }}
</a>
{{ range .Sections }}
<a class="navbar-item" href="{{ .RelPermalink }}">
<strong>{{ .Weight }}</strong>. {{ .Title }}
</a>
{{ end }}
{{ if .HasChildren }}
<div class="navbar-dropdown is-hidden-touch">
{{ range .Children }}
<a class="navbar-item" href="{{ .URL }}">
{{ .Name }}
</a>
{{ end }}
</div>
{{ end }}
</div>
</div>
<div class="navbar-item has-dropdown is-hoverable">
<div class="navbar-link">
Community
</div>
<div class="navbar-dropdown">
<a class="navbar-item" href="https://forum.tikv.org" target="_blank">
Forum
</a>
<a class="navbar-item" href="/chat" target="_blank">
Chat
</a>
<a class="navbar-item" href="/adopters">
Adopters
</a>
<a class="navbar-item" href="https://branding.cncf.io/projects/tikv" target="_blank">
Branding
</a>
</div>
</div>
{{ end }}
<div class="navbar-item has-dropdown is-hoverable">
<a class="navbar-link" href="/blog">