mirror of https://github.com/tikv/website.git
commit
6c90ecce9b
10
Makefile
10
Makefile
|
@ -2,7 +2,13 @@ serve:
|
|||
hugo server \
|
||||
--buildDrafts \
|
||||
--buildFuture \
|
||||
--disableFastRender
|
||||
--disableFastRender \
|
||||
--bind 0.0.0.0
|
||||
|
||||
serve-production:
|
||||
hugo server \
|
||||
--disableFastRender \
|
||||
--bind 0.0.0.0
|
||||
|
||||
production-build:
|
||||
hugo --minify
|
||||
|
@ -12,4 +18,4 @@ preview-build:
|
|||
--buildDrafts \
|
||||
--buildFuture \
|
||||
--baseURL $(DEPLOY_PRIME_URL) \
|
||||
--minify
|
||||
--minify
|
|
@ -68,19 +68,18 @@ $colors: mergeColorMaps(("twitter-blue": ($twitter-blue, $white)), $colors)
|
|||
.docs-content
|
||||
padding-bottom: 8rem
|
||||
|
||||
.deep-dive-toc
|
||||
padding-bottom: 2rem
|
||||
margin-right: 1rem
|
||||
.toc
|
||||
padding: 1rem 0 3rem 2rem
|
||||
|
||||
li
|
||||
font-size: 1.1rem
|
||||
|
||||
&.is-active
|
||||
a
|
||||
color: $tikv-blue
|
||||
font-weight: 700
|
||||
a.is-active
|
||||
color: $tikv-blue
|
||||
font-weight: 700
|
||||
|
||||
ul
|
||||
ul
|
||||
list-style: square !important;
|
||||
ul
|
||||
margin-left: 1rem
|
||||
|
||||
|
|
47
config.toml
47
config.toml
|
@ -1,47 +0,0 @@
|
|||
title = "TiKV"
|
||||
baseURL = "https://tikv.org"
|
||||
languageCode = "en-us"
|
||||
pygmentsCodeFences = true
|
||||
pygmentsUseClasses = true
|
||||
disableKinds = ["taxonomy", "taxonomyTerm"]
|
||||
|
||||
[params]
|
||||
favicon = "favicon.png"
|
||||
googleAnalyticsId = "UA-130734531-1"
|
||||
|
||||
[params.versions]
|
||||
latest = "3.0"
|
||||
|
||||
[params.description]
|
||||
brief = "A distributed transactional key-value database"
|
||||
long = "Based on the design of [Google Spanner](https://ai.google/research/pubs/pub39966) and [HBase](https://hbase.apache.org), but simpler to manage and without dependencies on any distributed filesystem"
|
||||
|
||||
[params.fonts]
|
||||
sansserif = "Titillium Web"
|
||||
monospace = "Inconsolata"
|
||||
|
||||
[params.logos]
|
||||
white = "img/logos/horizontal/white/tikv-horizontal-white.png"
|
||||
color = "img/logos/horizontal/color/tikv-horizontal-color.png"
|
||||
black = "img/logos/horizontal/black/tikv-horizontal-black.png"
|
||||
cncf = "img/logos/cncf-color.png"
|
||||
card = "img/logos/card.png"
|
||||
|
||||
[params.assets]
|
||||
fontAwesomeVersion = "5.3.1"
|
||||
js = ["jquery-3.3.1", "anchor", "app"]
|
||||
css = ["syntax"]
|
||||
|
||||
[params.socialmedia]
|
||||
twitter = "tikvproject"
|
||||
github = "https://github.com/tikv/tikv"
|
||||
|
||||
[outputs]
|
||||
home = ["HTML", "REDIRECTS"]
|
||||
|
||||
[outputFormats.REDIRECTS]
|
||||
mediaType = "text/netlify"
|
||||
baseName = "_redirects"
|
||||
|
||||
[mediaTypes."text/netlify"]
|
||||
delimiter = ""
|
|
@ -0,0 +1,61 @@
|
|||
title: "TiKV"
|
||||
baseURL: "https://tikv.org"
|
||||
languageCode: "en-us"
|
||||
pygmentsCodeFences: true
|
||||
pygmentsUseClasses: true
|
||||
disableKinds: ["taxonomy", "taxonomyTerm"]
|
||||
|
||||
menu:
|
||||
nav:
|
||||
- name: Forum
|
||||
url: https://forum.tikv.org
|
||||
parent: Community
|
||||
weight: 1
|
||||
- name: Chat
|
||||
url: /chat
|
||||
parent: Community
|
||||
weight: 2
|
||||
- name: Branding
|
||||
url: https://branding.cncf.io/projects/tikv
|
||||
parent: Community
|
||||
weight: 4
|
||||
- name: Docs
|
||||
url: /docs/3.0/concepts
|
||||
weight: 1
|
||||
|
||||
|
||||
params:
|
||||
favicon: "favicon.png"
|
||||
googleAnalyticsId: "UA-130734531-1"
|
||||
versions:
|
||||
latest: "3.0"
|
||||
description:
|
||||
brief: "A distributed transactional key-value database"
|
||||
long: "Based on the design of [Google Spanner](https://ai.google/research/pubs/pub39966) and [HBase](https://hbase.apache.org), but simpler to manage and without dependencies on any distributed filesystem"
|
||||
fonts:
|
||||
sansserif: "Titillium Web"
|
||||
monospace: "Inconsolata"
|
||||
logos:
|
||||
white: "img/logos/horizontal/white/tikv-horizontal-white.png"
|
||||
color: "img/logos/horizontal/color/tikv-horizontal-color.png"
|
||||
black: "img/logos/horizontal/black/tikv-horizontal-black.png"
|
||||
cncf: "img/logos/cncf-color.png"
|
||||
card: "img/logos/card.png"
|
||||
assets:
|
||||
fontAwesomeVersion: "5.3.1"
|
||||
js: ["jquery-3.3.1", "anchor", "app"]
|
||||
css: ["syntax"]
|
||||
socialmedia:
|
||||
twitter: "tikvproject"
|
||||
github: "https://github.com/tikv/tikv"
|
||||
output:
|
||||
home: ["HTML", "REDIRECTS"]
|
||||
|
||||
outputFormats:
|
||||
REDIRECTS:
|
||||
mediaType: "text/netlify"
|
||||
baseName: "_redirects"
|
||||
|
||||
mediaTypes:
|
||||
"text/netlify":
|
||||
delimiter: ""
|
|
@ -1,5 +1,9 @@
|
|||
---
|
||||
title: TiKV adopters
|
||||
title: Adopters
|
||||
menu:
|
||||
nav:
|
||||
parent: Community
|
||||
weight: 3
|
||||
---
|
||||
|
||||
TiKV has been adopted by many companies across a wide range of industries. The table below lists many of them:
|
||||
|
|
|
@ -1,9 +1,14 @@
|
|||
---
|
||||
title: TiKV overview
|
||||
description: Some basic facts about TiKV
|
||||
weight: 1
|
||||
aliases:
|
||||
- /docs
|
||||
menu:
|
||||
nav:
|
||||
name: Concepts
|
||||
parent: Docs
|
||||
weight: 1
|
||||
docs:
|
||||
name: Concepts
|
||||
weight: 1
|
||||
---
|
||||
|
||||
**TiKV** is a distributed transactional key-value database originally created by [PingCAP](https://pingcap.com/en) to complement [TiDB](https://github.com/pingcap/tidb).
|
|
@ -1,9 +1,9 @@
|
|||
---
|
||||
title: APIs
|
||||
description: Interact with TiKV using the raw key-value API or the transactional key-value API
|
||||
weight: 4
|
||||
aliases:
|
||||
- /docs/apis
|
||||
menu:
|
||||
docs:
|
||||
parent: Concepts
|
||||
---
|
||||
|
||||
TiKV offers two APIs that you can interact with:
|
|
@ -1,9 +1,9 @@
|
|||
---
|
||||
title: Architecture
|
||||
description: How TiKV works and how it was built
|
||||
weight: 2
|
||||
aliases:
|
||||
- /docs/architecture
|
||||
menu:
|
||||
docs:
|
||||
parent: Concepts
|
||||
---
|
||||
|
||||
This page discusses the core concepts and architecture behind TiKV, including:
|
|
@ -0,0 +1,11 @@
|
|||
---
|
||||
title: Reference
|
||||
headless: true
|
||||
draft: true
|
||||
menu:
|
||||
nav:
|
||||
parent: Docs
|
||||
weight: 3
|
||||
reference:
|
||||
weight: 4
|
||||
---
|
|
@ -1,5 +1,4 @@
|
|||
---
|
||||
title: Clients
|
||||
draft: true
|
||||
weight: 2
|
||||
---
|
|
@ -0,0 +1,7 @@
|
|||
---
|
||||
title: Tools
|
||||
draft: true
|
||||
menu:
|
||||
docs:
|
||||
parent: Reference
|
||||
---
|
|
@ -1,7 +1,14 @@
|
|||
---
|
||||
title: Quickstart
|
||||
description: Run TiKV in your local environment using Docker Compose
|
||||
weight: 1
|
||||
menu:
|
||||
nav:
|
||||
name: Tasks
|
||||
parent: Docs
|
||||
weight: 2
|
||||
docs:
|
||||
name: Tasks
|
||||
weight: 2
|
||||
---
|
||||
|
||||
This guide describes how to quickly deploy a TiKV testing cluster using [Docker Compose](https://docs.docker.com/compose/) on a single machine. Currently, this installation method is supported only on Linux.
|
|
@ -1,7 +1,9 @@
|
|||
---
|
||||
title: Configure TiKV
|
||||
description: Configure a wide range of TiKV facets, including RocksDB, gRPC, the Placement Driver, and more
|
||||
weight: 3
|
||||
menu:
|
||||
docs:
|
||||
parent: Tasks
|
||||
---
|
||||
|
||||
## RocksDB configuration {#rocksdb}
|
|
@ -1,8 +1,10 @@
|
|||
---
|
||||
title: Install and deploy
|
||||
description: Run TiKV using Ansible or Docker
|
||||
weight: 2
|
||||
draft: true
|
||||
menu:
|
||||
docs:
|
||||
parent: Tasks
|
||||
---
|
||||
|
||||
This document tells you how to install TiKV using:
|
|
@ -1,8 +1,8 @@
|
|||
---
|
||||
title: Secure TiKV
|
||||
weight: 4
|
||||
aliases:
|
||||
- /docs/security
|
||||
menu:
|
||||
docs:
|
||||
parent: Tasks
|
||||
---
|
||||
|
||||
This page discusses how to secure your TiKV deployment. Learn how to:
|
|
@ -1,5 +0,0 @@
|
|||
---
|
||||
title: Concepts
|
||||
headless: true
|
||||
weight: 1
|
||||
---
|
|
@ -1,6 +1,9 @@
|
|||
---
|
||||
title: Byzantine Failure
|
||||
weight: 2
|
||||
menu:
|
||||
docs:
|
||||
parent: Consensus algorithm
|
||||
weight: 2
|
||||
---
|
||||
|
||||
Consensus algorithms are typically either *Byzantine Fault Tolerant*, or not. Succinctly, systems which can withstand Byzantine faults are able to withstand misbehaving peers. Most distributed systems you would use inside of a VLAN, such as Kafka, TiKV, and etcd, are not Byzantine Fault Tolerant.
|
|
@ -1,6 +1,9 @@
|
|||
---
|
||||
title: CAP Theorem
|
||||
weight: 1
|
||||
menu:
|
||||
docs:
|
||||
parent: Consensus algorithm
|
||||
weight: 1
|
||||
---
|
||||
|
||||
In 2000, Eric Brewer presented [“Towards Robust Distributed Systems”](http://awoc.wolski.fi/dlib/big-data/Brewer_podc_keynote_2000.pdf) which detailed the CAP Theorem. Succinctly, the theorem declares that a distributed system may only choose two of the following three attributes:
|
|
@ -1,6 +1,9 @@
|
|||
---
|
||||
title: Consensus algorithm
|
||||
weight: 2
|
||||
menu:
|
||||
docs:
|
||||
parent: Deep Dive
|
||||
weight: 2
|
||||
---
|
||||
|
||||
When building a distributed system one principal goal is often to build in fault-tolerance. That is, if one particular node in a network goes down, or if there is a network partition, the systems continues to operate. The cluster of nodes taking part in a distributed consensus protocol must come to agreement regarding values, and once that decision is reached, that choice is final, even if some nodes were in a faulty state at the time.
|
|
@ -1,6 +1,9 @@
|
|||
---
|
||||
title: Paxos
|
||||
weight: 3
|
||||
menu:
|
||||
docs:
|
||||
parent: Consensus algorithm
|
||||
weight: 3
|
||||
---
|
||||
|
||||
Paxos is a protocol that Leslie Lamport and others have written extensively about. The most succinct paper describing Paxos is ["Paxos Made Easy"](https://lamport.azurewebsites.net/pubs/paxos-simple.pdf) published by Lamport in 2001. The original paper ["The Part-Time Parliment"](http://lamport.azurewebsites.net/pubs/pubs.html#lamport-paxos) was published in 1989.
|
|
@ -1,6 +1,9 @@
|
|||
---
|
||||
title: Raft
|
||||
weight: 4
|
||||
menu:
|
||||
docs:
|
||||
parent: Consensus algorithm
|
||||
weight: 4
|
||||
---
|
||||
|
||||
In 2014, Diego Ongaro and John Ousterhout presented the Raft algorithm. It is explained succinctly in a [paper](https://raft.github.io/raft.pdf) and detailed at length in a [thesis](https://ramcloud.stanford.edu/~ongaro/thesis.pdf).
|
|
@ -1,6 +1,9 @@
|
|||
---
|
||||
title: Distributed SQL
|
||||
weight: 1
|
||||
menu:
|
||||
docs:
|
||||
parent: Distributed SQL over TiKV
|
||||
weight: 2
|
||||
---
|
||||
|
||||
By now we already know how [TiDB]'s relational structure is encoded into the Key-Value form with version. In this section, we will focus on the following questions:
|
|
@ -1,6 +1,9 @@
|
|||
---
|
||||
title: Distributed SQL over TiKV
|
||||
weight: 8
|
||||
menu:
|
||||
docs:
|
||||
parent: Deep Dive
|
||||
weight: 8
|
||||
---
|
||||
|
||||
TiKV is the storage layer for [TiDB], a distributed HTAP SQL database. So far,
|
|
@ -1,6 +1,9 @@
|
|||
---
|
||||
title: Distributed Algorithms
|
||||
weight: 2
|
||||
menu:
|
||||
docs:
|
||||
parent: Distributed transaction
|
||||
weight: 2
|
||||
---
|
||||
|
||||
## Two-Phase Commit
|
|
@ -1,6 +1,9 @@
|
|||
---
|
||||
title: Distributed transaction
|
||||
weight: 4
|
||||
menu:
|
||||
docs:
|
||||
parent: Deep Dive
|
||||
weight: 4
|
||||
---
|
||||
|
||||
As TiKV is a distributed transactional key-value database, transaction is a core feature of TiKV. In this chapter we will talk about general implementations of distributed transaction and some implementation details in TiKV.
|
|
@ -1,6 +1,9 @@
|
|||
---
|
||||
title: Isolation Level
|
||||
weight: 1
|
||||
menu:
|
||||
docs:
|
||||
parent: Distributed transaction
|
||||
weight: 1
|
||||
---
|
||||
|
||||
Isolation is one of the ACID (Atomicity, Consistency, Isolation, Durability) properties. It determines how transaction integrity is visible to other users and systems. For example, when a user is creating a Purchase Order and has created the header, but not the Purchase Order lines, is the header available for other systems/users (carrying out concurrent operations, such as a report on Purchase Orders) to see?
|
|
@ -1,6 +1,9 @@
|
|||
---
|
||||
title: Locking
|
||||
weight: 3
|
||||
menu:
|
||||
docs:
|
||||
parent: Distributed transaction
|
||||
weight: 3
|
||||
---
|
||||
|
||||
To prevent lost updates and dirty reads, locking is employed to manage the actions of multiple concurrent users on a database. The two types of locking are pessimistic locking and optimistic locking.
|
|
@ -1,81 +1,84 @@
|
|||
---
|
||||
title: Optimized Percolator
|
||||
weight: 6
|
||||
---
|
||||
|
||||
As said in [previous chapter](../percolator), TiKV makes use of Percolator's transaction algorithm. In TiKV's implementation, there are some optimizations on Percolator. In this chapter, we will introduce these optimizations in TiKV.
|
||||
|
||||
## Parallel Prewrite
|
||||
|
||||
In practice, for a single transaction, we don't want to do prewrites one by one. When there are dozens of TiKV nodes in the cluster, we hope the prewrite can be executed concurrently on these TiKV nodes.
|
||||
|
||||
In TiKV's implementation, when committing a transaction, the keys in the transaction will be divided into several batches and each batch will be prewritten in parallel. It doesn't matter whether the primary key is written first.
|
||||
|
||||
If a conflict happens during a transaction's prewrite phase, the prewrite process will be canceled and rollback will be performed on all keys affected by the transaction. Doing rollback on a key will leave a `Rollback` record in `CF_WRITE`(Percolator's `write` column), which is not described in Google's Percolator paper. The `Rollback` record is a mark to indicate that the transaction with `start_ts` in the record has been rolled back, and if a prewrite request arrives later than the rollback request, the prewrite will not succeed. This situation may be caused by network issues. The correctness won't be broken if we allow the prewrite to succeed. However, the key will be locked and unavailable until the lock's TTL expires.
|
||||
|
||||
## Short Value in Write Column
|
||||
|
||||
As mentioned in [Percolator in TiKV](../percolator/#percolator-in-tikv), TiKV uses RocksDB's column families to save different columns of Percolator. Different column families of RocksDB are actually different LSM-Trees. When we access a value, we need to search firstly the `CF_WRITE` to find the `start_ts` of the next record, and then the corresponding record in `CF_DEFAULT`. If a value is very small, it is wasteful to search RocksDB twice.
|
||||
|
||||
The optimization in TiKV is to avoid handling `CF_DEFAULT` for short values. If the value is short enough, it will not be put into `CF_DEFAULT` during the prewrite phase. Instead, it will be embedded in the lock and saved in `CF_LOCK`. Then in the commit phase, the value will be moved out of the lock and inlined in the write record. Therefore, we can access and manipulate short values without having to handle `CF_DEFAULT`.
|
||||
|
||||
## Point Read Without Timestamp
|
||||
|
||||
Timestamps are critical to providing isolation for transactions. For every transaction, we allocate a unique `start_ts` for it, and ensures transaction T can only see the data committed before T's `start_ts`.
|
||||
|
||||
But if transaction T does nothing but reads a single key, is it really necessary to allocate it a `start_ts`? The answer is no. We can simply read the newest version directly, because it's equivalent to reading with `start_ts` which is exactly the instant when the key is read. It's even ok to read a locked key, because it's equivalent to reading with the `start_ts` allocated before the lock's `start_ts`.
|
||||
|
||||
## Calculated Commit Timestamp
|
||||
|
||||
{{< warning >}}
|
||||
This optimization hasn't been finished yet, but will be available in the future. [RFC](https://github.com/tikv/rfcs/pull/25).
|
||||
{{</ warning >}}
|
||||
|
||||
To provide Snapshot Isolation, we must ensure all transactional reads are
|
||||
repeatable. The `commit_ts` should be large enough so that the transaction will
|
||||
not be committed before a valid read. Otherwise, Repeatable Read will be broken.
|
||||
For example:
|
||||
|
||||
1. Txn1 gets `start_ts` 100
|
||||
2. Txn2 gets `start_ts` 200
|
||||
3. Txn2 reads key `"k1"` and gets value `"1"`
|
||||
4. Txn1 prewrites `"k1"` with value `"2"`
|
||||
5. Txn1 commits with `commit_ts` 101
|
||||
6. Tnx2 reads key `"k1"` and gets value `"2"`
|
||||
|
||||
Txn2 reads `"k1"` twice but gets two different results. If `commit_ts` is
|
||||
allocated from PD, this will not happen, because Txn2's first read must happen
|
||||
before Txn1's prewrite while Txn1's `commit_ts` must be requested after
|
||||
finishing prewrite. And as a result, Txn2's `commit_ts` must be larger than
|
||||
Txn1's `start_ts`.
|
||||
|
||||
On the other hand, `commit_ts` can't be arbitrarily large. If the `commit_ts` is
|
||||
ahead of the actual time, the committed data may be unreadable by other new
|
||||
transactions, which breaks integrity. We are not sure whether a timestamp is
|
||||
ahead of the actual time if we don't ask PD.
|
||||
|
||||
To conclude, in order not to break the Snapshot Isolation and the integrity, a
|
||||
valid range for `commit_ts` should be:
|
||||
|
||||
```text
|
||||
max{start_ts, max_read_ts_of_written_keys} < commit_ts <= now
|
||||
```
|
||||
|
||||
So here comes a method to calculate the commit_ts:
|
||||
|
||||
```text
|
||||
commit_ts = max{start_ts, region_1_max_read_ts, region_2_max_read_ts, ...} + 1
|
||||
```
|
||||
|
||||
where `region_N_max_read_ts` is the maximum timestamp of all reads on the
|
||||
region, for all regions involved in the transaction.
|
||||
|
||||
## Single Region 1PC
|
||||
|
||||
{{< warning >}}
|
||||
This optimization haven't been finished yet, but will be available in the future.
|
||||
{{</ warning >}}
|
||||
|
||||
For non-distributed databases, it's easy to provide ACID transactions; but for distributed databases, usually 2PC (two-phase commit) is required to make transactions ACID. Percolator provides such a 2PC algorithm, which is adopted by TiKV.
|
||||
|
||||
---
|
||||
title: Optimized Percolator
|
||||
menu:
|
||||
docs:
|
||||
parent: Distributed transaction
|
||||
weight: 6
|
||||
---
|
||||
|
||||
As said in [previous chapter](../percolator), TiKV makes use of Percolator's transaction algorithm. In TiKV's implementation, there are some optimizations on Percolator. In this chapter, we will introduce these optimizations in TiKV.
|
||||
|
||||
## Parallel Prewrite
|
||||
|
||||
In practice, for a single transaction, we don't want to do prewrites one by one. When there are dozens of TiKV nodes in the cluster, we hope the prewrite can be executed concurrently on these TiKV nodes.
|
||||
|
||||
In TiKV's implementation, when committing a transaction, the keys in the transaction will be divided into several batches and each batch will be prewritten in parallel. It doesn't matter whether the primary key is written first.
|
||||
|
||||
If a conflict happens during a transaction's prewrite phase, the prewrite process will be canceled and rollback will be performed on all keys affected by the transaction. Doing rollback on a key will leave a `Rollback` record in `CF_WRITE`(Percolator's `write` column), which is not described in Google's Percolator paper. The `Rollback` record is a mark to indicate that the transaction with `start_ts` in the record has been rolled back, and if a prewrite request arrives later than the rollback request, the prewrite will not succeed. This situation may be caused by network issues. The correctness won't be broken if we allow the prewrite to succeed. However, the key will be locked and unavailable until the lock's TTL expires.
|
||||
|
||||
## Short Value in Write Column
|
||||
|
||||
As mentioned in [Percolator in TiKV](../percolator/#percolator-in-tikv), TiKV uses RocksDB's column families to save different columns of Percolator. Different column families of RocksDB are actually different LSM-Trees. When we access a value, we need to search firstly the `CF_WRITE` to find the `start_ts` of the next record, and then the corresponding record in `CF_DEFAULT`. If a value is very small, it is wasteful to search RocksDB twice.
|
||||
|
||||
The optimization in TiKV is to avoid handling `CF_DEFAULT` for short values. If the value is short enough, it will not be put into `CF_DEFAULT` during the prewrite phase. Instead, it will be embedded in the lock and saved in `CF_LOCK`. Then in the commit phase, the value will be moved out of the lock and inlined in the write record. Therefore, we can access and manipulate short values without having to handle `CF_DEFAULT`.
|
||||
|
||||
## Point Read Without Timestamp
|
||||
|
||||
Timestamps are critical to providing isolation for transactions. For every transaction, we allocate a unique `start_ts` for it, and ensures transaction T can only see the data committed before T's `start_ts`.
|
||||
|
||||
But if transaction T does nothing but reads a single key, is it really necessary to allocate it a `start_ts`? The answer is no. We can simply read the newest version directly, because it's equivalent to reading with `start_ts` which is exactly the instant when the key is read. It's even ok to read a locked key, because it's equivalent to reading with the `start_ts` allocated before the lock's `start_ts`.
|
||||
|
||||
## Calculated Commit Timestamp
|
||||
|
||||
{{< warning >}}
|
||||
This optimization hasn't been finished yet, but will be available in the future. [RFC](https://github.com/tikv/rfcs/pull/25).
|
||||
{{</ warning >}}
|
||||
|
||||
To provide Snapshot Isolation, we must ensure all transactional reads are
|
||||
repeatable. The `commit_ts` should be large enough so that the transaction will
|
||||
not be committed before a valid read. Otherwise, Repeatable Read will be broken.
|
||||
For example:
|
||||
|
||||
1. Txn1 gets `start_ts` 100
|
||||
2. Txn2 gets `start_ts` 200
|
||||
3. Txn2 reads key `"k1"` and gets value `"1"`
|
||||
4. Txn1 prewrites `"k1"` with value `"2"`
|
||||
5. Txn1 commits with `commit_ts` 101
|
||||
6. Tnx2 reads key `"k1"` and gets value `"2"`
|
||||
|
||||
Txn2 reads `"k1"` twice but gets two different results. If `commit_ts` is
|
||||
allocated from PD, this will not happen, because Txn2's first read must happen
|
||||
before Txn1's prewrite while Txn1's `commit_ts` must be requested after
|
||||
finishing prewrite. And as a result, Txn2's `commit_ts` must be larger than
|
||||
Txn1's `start_ts`.
|
||||
|
||||
On the other hand, `commit_ts` can't be arbitrarily large. If the `commit_ts` is
|
||||
ahead of the actual time, the committed data may be unreadable by other new
|
||||
transactions, which breaks integrity. We are not sure whether a timestamp is
|
||||
ahead of the actual time if we don't ask PD.
|
||||
|
||||
To conclude, in order not to break the Snapshot Isolation and the integrity, a
|
||||
valid range for `commit_ts` should be:
|
||||
|
||||
```text
|
||||
max{start_ts, max_read_ts_of_written_keys} < commit_ts <= now
|
||||
```
|
||||
|
||||
So here comes a method to calculate the commit_ts:
|
||||
|
||||
```text
|
||||
commit_ts = max{start_ts, region_1_max_read_ts, region_2_max_read_ts, ...} + 1
|
||||
```
|
||||
|
||||
where `region_N_max_read_ts` is the maximum timestamp of all reads on the
|
||||
region, for all regions involved in the transaction.
|
||||
|
||||
## Single Region 1PC
|
||||
|
||||
{{< warning >}}
|
||||
This optimization haven't been finished yet, but will be available in the future.
|
||||
{{</ warning >}}
|
||||
|
||||
For non-distributed databases, it's easy to provide ACID transactions; but for distributed databases, usually 2PC (two-phase commit) is required to make transactions ACID. Percolator provides such a 2PC algorithm, which is adopted by TiKV.
|
||||
|
||||
Considering that write batches are done atomically in a single Region, we come up with this realization that if a transaction affects only one region, 2PC is actually unnecessary. Once there is no write conflict, the transaction can be committed directly. Based on [the previous optimization](#calculated-commit-ts), the `commit_ts` can be set to `max_read_ts` of the region directly. In this way, we saved an RPC and a write operation (including a Raft committing and RocksDB writing) in TiKV for single-region transactions.
|
|
@ -1,6 +1,9 @@
|
|||
---
|
||||
title: Percolator
|
||||
weight: 5
|
||||
menu:
|
||||
docs:
|
||||
parent: Distributed transaction
|
||||
weight: 5
|
||||
---
|
||||
|
||||
TiKV supports distributed transactions, which is inspired by Google's [Percolator](https://ai.google/research/pubs/pub36726.pdf). In this section, we will briefly introduce Percolator and how we make use of it in TiKV.
|
|
@ -1,6 +1,9 @@
|
|||
---
|
||||
title: Timestamp Oracle
|
||||
weight: 4
|
||||
menu:
|
||||
docs:
|
||||
parent: Distributed transaction
|
||||
weight: 4
|
||||
---
|
||||
|
||||
The timestamp oracle plays a significant role in the Percolator Transaction model, it is a server that hands out timestamps in strictly increasing order, a property required for correct operation of the snapshot isolation protocol.
|
|
@ -1,7 +1,11 @@
|
|||
---
|
||||
title: Introduction
|
||||
headless: true
|
||||
weight: 1
|
||||
title: Deep Dive
|
||||
menu:
|
||||
docs:
|
||||
weight: 3
|
||||
nav:
|
||||
parent: Docs
|
||||
weight: 3
|
||||
---
|
||||
|
||||
[TiKV](https://github.com/tikv/tikv) is a distributed, transactional key-value database. It has been widely adopted in many critical production environments — see the [TiKV adopters](https://github.com/tikv/tikv/blob/master/docs/adopters.md). It has also been accepted by the [Cloud Native Computing Foundation](https://www.cfnc.org) as a [Sandbox project](https://www.cncf.io/blog/2018/08/28/cncf-to-host-tikv-in-the-sandbox/) in August, 2018.
|
|
@ -1,7 +1,9 @@
|
|||
---
|
||||
title: B-Tree vs LSM-Tree
|
||||
mathjax: true
|
||||
weight: 1
|
||||
menu:
|
||||
docs:
|
||||
parent: Key-value engine
|
||||
weight: 1
|
||||
---
|
||||
|
||||
The [B-tree](https://en.wikipedia.org/wiki/B-tree) and the [Log-Structured Merge-tree](https://en.wikipedia.org/wiki/Log-structured_merge-tree) (LSM-tree) are the two most widely used data structures for data-intensive applications to organize and store data. However, each of them has its own advantages and disadvantages. This article aims to use quantitative approaches to compare these two data structures.
|
|
@ -1,6 +1,9 @@
|
|||
---
|
||||
title: Key-value engine
|
||||
weight: 3
|
||||
menu:
|
||||
docs:
|
||||
parent: Deep Dive
|
||||
weight: 3
|
||||
---
|
||||
|
||||
A key-value engine serves as the bottommost layer in a key-value
|
|
@ -1,6 +1,9 @@
|
|||
---
|
||||
title: RocksDB
|
||||
weight: 2
|
||||
menu:
|
||||
docs:
|
||||
parent: Key-value engine
|
||||
weight: 2
|
||||
---
|
||||
|
||||
RocksDB is a persistent key-value store for fast storage environment.
|
|
@ -1,6 +1,9 @@
|
|||
---
|
||||
title: Resource scheduling
|
||||
weight: 7
|
||||
menu:
|
||||
docs:
|
||||
parent: Deep Dive
|
||||
weight: 7
|
||||
---
|
||||
|
||||
In a distributed database environment, resource scheduling needs to meet the following requirements:
|
|
@ -1,6 +1,9 @@
|
|||
---
|
||||
title: Kubernetes
|
||||
weight: 1
|
||||
menu:
|
||||
docs:
|
||||
parent: Resource scheduling
|
||||
weight: 1
|
||||
---
|
||||
|
||||
Kubernetes is a Docker-based open source container cluster management system initiated and maintained by the Google team. It supports not only common cloud platforms but also internal data centers.
|
|
@ -1,6 +1,9 @@
|
|||
---
|
||||
title: Mesos
|
||||
weight: 2
|
||||
menu:
|
||||
docs:
|
||||
parent: Resource scheduling
|
||||
weight: 2
|
||||
---
|
||||
|
||||
Mesos was originally launched by UC Berkeley's AMPLab in 2009. It is licensed under Apache and now operated by Mesosphere, Inc.
|
|
@ -1,92 +1,95 @@
|
|||
---
|
||||
title: Simulator
|
||||
weight: 3
|
||||
---
|
||||
|
||||
With its flexibility and significant benefits of reducing time and cost, the
|
||||
simulator plays an important role in studying and designing a computer
|
||||
architecture. It is often used to validate specific design schemes
|
||||
and evaluate the effectiveness of design schemes.
|
||||
|
||||
## Workflow
|
||||
|
||||
We'll focus on how the simulator is used in TiKV to deal with scheduling
|
||||
problems.
|
||||
In general, when there is a lack of resources or the problem is hard to
|
||||
reproduce, we might consider using the simulator.
|
||||
|
||||
The simulation of scheduling problems in a distributed system
|
||||
usually consists of the following steps:
|
||||
|
||||
1. Define the system model of the simulator.
|
||||
2. Set up the simulation environment.
|
||||
3. Run the simulation.
|
||||
4. Inspect the result to check whether it is in line with expectations.
|
||||
|
||||
The first step is mainly to figure out which part of your system you want to
|
||||
simulate. And the model should be as simple as possible. In the second step,
|
||||
you should set up the environment including the scale of your system and the
|
||||
characteristics of the workload. In the third step, the simulation will run
|
||||
and provide the scheduling output. In the final step, you can check the
|
||||
result and dig into the scheduling problems if the result is not as expected.
|
||||
|
||||
## PD Simulator
|
||||
|
||||
In PD, we also need a simulator to locate a scheduling problem.
|
||||
The PD simulator can be used to simulate a large-scale cluster and scenarios
|
||||
with different users.
|
||||
|
||||
For some special scenarios, we can keep their cases in the simulator so that
|
||||
we can quickly verify the correctness of the scheduling in PD under different
|
||||
scenarios when we reconstruct the code or add some new features in the future.
|
||||
Without the simulator, if we want to reproduce some scenarios, we need to apply
|
||||
for machines, load data, and then wait for the scheduling. It is tedious and
|
||||
might waste a lot of time.
|
||||
|
||||
### Architecture
|
||||
|
||||
{{< figure
|
||||
src="/img/deep-dive/pd-simulator.png"
|
||||
caption="PD Simulator Architecture"
|
||||
number="1" >}}
|
||||
|
||||
### Components
|
||||
|
||||
PD Simulator consists of the following components:
|
||||
|
||||
- Driver
|
||||
|
||||
_Driver_ is the most important part of the PD Simulator. It is used to do
|
||||
some initialization and trigger the heartbeat and the corresponding event
|
||||
according to the tick count.
|
||||
|
||||
- Node
|
||||
|
||||
_Node_ is used to simulate a TiKV node. It contains the basic information
|
||||
of a store and can communicate with PD by using the heartbeat through gRPC.
|
||||
|
||||
- Raft Engine
|
||||
|
||||
_Raft Engine_ records all Raft related information. It is a shared Raft
|
||||
engine which PD cannot know about.
|
||||
|
||||
- Event Runner
|
||||
|
||||
For every tick, _Event Runner_ checks if there is an event to execute. if
|
||||
there is, it will execute the corresponding event.
|
||||
|
||||
### Process
|
||||
|
||||
The basic process of how PD Simulator works is as follows:
|
||||
|
||||
1. When started, PD Simulator will create a driver and initialize a mocked
|
||||
TiKV cluster which consists of nodes.
|
||||
2. After PD is bootstrapped, it starts a timer.
|
||||
3. For each tick, the mocked TiKV cluster will perform some operations, such
|
||||
as executing Raft commands on the shared Raft engine or sending heartbeats.
|
||||
The operation to perform depends on the specific case.
|
||||
4. Finally, PD Simulator will verify whether the result is in line with our
|
||||
expectations.
|
||||
|
||||
PD Simulator does not care about how TiKV actually works in details. It just
|
||||
sends the messages which PD wants to know about.
|
||||
---
|
||||
title: Simulator
|
||||
menu:
|
||||
docs:
|
||||
parent: Resource scheduling
|
||||
weight: 3
|
||||
---
|
||||
|
||||
With its flexibility and significant benefits of reducing time and cost, the
|
||||
simulator plays an important role in studying and designing a computer
|
||||
architecture. It is often used to validate specific design schemes
|
||||
and evaluate the effectiveness of design schemes.
|
||||
|
||||
## Workflow
|
||||
|
||||
We'll focus on how the simulator is used in TiKV to deal with scheduling
|
||||
problems.
|
||||
In general, when there is a lack of resources or the problem is hard to
|
||||
reproduce, we might consider using the simulator.
|
||||
|
||||
The simulation of scheduling problems in a distributed system
|
||||
usually consists of the following steps:
|
||||
|
||||
1. Define the system model of the simulator.
|
||||
2. Set up the simulation environment.
|
||||
3. Run the simulation.
|
||||
4. Inspect the result to check whether it is in line with expectations.
|
||||
|
||||
The first step is mainly to figure out which part of your system you want to
|
||||
simulate. And the model should be as simple as possible. In the second step,
|
||||
you should set up the environment including the scale of your system and the
|
||||
characteristics of the workload. In the third step, the simulation will run
|
||||
and provide the scheduling output. In the final step, you can check the
|
||||
result and dig into the scheduling problems if the result is not as expected.
|
||||
|
||||
## PD Simulator
|
||||
|
||||
In PD, we also need a simulator to locate a scheduling problem.
|
||||
The PD simulator can be used to simulate a large-scale cluster and scenarios
|
||||
with different users.
|
||||
|
||||
For some special scenarios, we can keep their cases in the simulator so that
|
||||
we can quickly verify the correctness of the scheduling in PD under different
|
||||
scenarios when we reconstruct the code or add some new features in the future.
|
||||
Without the simulator, if we want to reproduce some scenarios, we need to apply
|
||||
for machines, load data, and then wait for the scheduling. It is tedious and
|
||||
might waste a lot of time.
|
||||
|
||||
### Architecture
|
||||
|
||||
{{< figure
|
||||
src="/img/deep-dive/pd-simulator.png"
|
||||
caption="PD Simulator Architecture"
|
||||
number="1" >}}
|
||||
|
||||
### Components
|
||||
|
||||
PD Simulator consists of the following components:
|
||||
|
||||
- Driver
|
||||
|
||||
_Driver_ is the most important part of the PD Simulator. It is used to do
|
||||
some initialization and trigger the heartbeat and the corresponding event
|
||||
according to the tick count.
|
||||
|
||||
- Node
|
||||
|
||||
_Node_ is used to simulate a TiKV node. It contains the basic information
|
||||
of a store and can communicate with PD by using the heartbeat through gRPC.
|
||||
|
||||
- Raft Engine
|
||||
|
||||
_Raft Engine_ records all Raft related information. It is a shared Raft
|
||||
engine which PD cannot know about.
|
||||
|
||||
- Event Runner
|
||||
|
||||
For every tick, _Event Runner_ checks if there is an event to execute. if
|
||||
there is, it will execute the corresponding event.
|
||||
|
||||
### Process
|
||||
|
||||
The basic process of how PD Simulator works is as follows:
|
||||
|
||||
1. When started, PD Simulator will create a driver and initialize a mocked
|
||||
TiKV cluster which consists of nodes.
|
||||
2. After PD is bootstrapped, it starts a timer.
|
||||
3. For each tick, the mocked TiKV cluster will perform some operations, such
|
||||
as executing Raft commands on the shared Raft engine or sending heartbeats.
|
||||
The operation to perform depends on the specific case.
|
||||
4. Finally, PD Simulator will verify whether the result is in line with our
|
||||
expectations.
|
||||
|
||||
PD Simulator does not care about how TiKV actually works in details. It just
|
||||
sends the messages which PD wants to know about.
|
|
@ -1,47 +1,50 @@
|
|||
---
|
||||
title: gRPC
|
||||
weight: 3
|
||||
---
|
||||
|
||||
Sitting on top of HTTP/2 and protocol buffers, gRPC provides a high-performance
|
||||
remote procedure call (RPC) framework. It supports features such as
|
||||
bi-directional streaming and authentication.
|
||||
|
||||
gRPC is also part of the [CNCF](https://www.cncf.io/projects/). It is commonly
|
||||
used in applications like Kubernetes or TiKV.
|
||||
|
||||
## Servers and Stubs
|
||||
|
||||
gRPC works off the idea that there is a *server* node which can accept
|
||||
*requests* from client nodes that use a *stub*.
|
||||
|
||||
The *server* implements the service interface and runs a gRPC server to handle
|
||||
all the incoming RPC calls from the client *stubs*. The client *stubs* have the
|
||||
same methods that the server implements, but instead send the
|
||||
request to the server.
|
||||
|
||||
{{< figure
|
||||
src="/img/deep-dive/grpc-server-stub.svg"
|
||||
caption="GRPC Servers and Stubs"
|
||||
number="1" >}}
|
||||
|
||||
## Streams
|
||||
|
||||
Sometimes it's convienent to return or accept a stream of messages, instead of
|
||||
operating in the traditional 1:1 request response format.
|
||||
|
||||
gRPC supports both traditional 1:1 and streaming modes, enabling bi-directional
|
||||
streaming so clients and servers can write in whatever order they like. gRPC
|
||||
maintains ordering behind the scenes.
|
||||
|
||||
```protobuf
|
||||
service StreamScan {
|
||||
rpc BatchGet (string, string) returns (stream string) {};
|
||||
}
|
||||
```
|
||||
|
||||
## A Rust Crate
|
||||
|
||||
One of the TiKV maintaining organizations, PingCAP, maintains the
|
||||
[`grpc-rs`](https://github.com/pingcap/grpc-rs) library, providing Rust bindings
|
||||
to a C library for gRPC. TiKV uses this crate.
|
||||
---
|
||||
title: gRPC
|
||||
menu:
|
||||
docs:
|
||||
parent: Remote Procedure Calls (RPC)
|
||||
weight: 3
|
||||
---
|
||||
|
||||
Sitting on top of HTTP/2 and protocol buffers, gRPC provides a high-performance
|
||||
remote procedure call (RPC) framework. It supports features such as
|
||||
bi-directional streaming and authentication.
|
||||
|
||||
gRPC is also part of the [CNCF](https://www.cncf.io/projects/). It is commonly
|
||||
used in applications like Kubernetes or TiKV.
|
||||
|
||||
## Servers and Stubs
|
||||
|
||||
gRPC works off the idea that there is a *server* node which can accept
|
||||
*requests* from client nodes that use a *stub*.
|
||||
|
||||
The *server* implements the service interface and runs a gRPC server to handle
|
||||
all the incoming RPC calls from the client *stubs*. The client *stubs* have the
|
||||
same methods that the server implements, but instead send the
|
||||
request to the server.
|
||||
|
||||
{{< figure
|
||||
src="/img/deep-dive/grpc-server-stub.svg"
|
||||
caption="GRPC Servers and Stubs"
|
||||
number="1" >}}
|
||||
|
||||
## Streams
|
||||
|
||||
Sometimes it's convienent to return or accept a stream of messages, instead of
|
||||
operating in the traditional 1:1 request response format.
|
||||
|
||||
gRPC supports both traditional 1:1 and streaming modes, enabling bi-directional
|
||||
streaming so clients and servers can write in whatever order they like. gRPC
|
||||
maintains ordering behind the scenes.
|
||||
|
||||
```protobuf
|
||||
service StreamScan {
|
||||
rpc BatchGet (string, string) returns (stream string) {};
|
||||
}
|
||||
```
|
||||
|
||||
## A Rust Crate
|
||||
|
||||
One of the TiKV maintaining organizations, PingCAP, maintains the
|
||||
[`grpc-rs`](https://github.com/pingcap/grpc-rs) library, providing Rust bindings
|
||||
to a C library for gRPC. TiKV uses this crate.
|
|
@ -1,95 +1,98 @@
|
|||
---
|
||||
title: HTTP/2
|
||||
weight: 1
|
||||
---
|
||||
HTTP/2 is short for 'Hyper Text Transfer Protocol Version 2'. HTTP/2 and its predecessor,
|
||||
HTTP/1, are the de-facto network protocols in use today.
|
||||
|
||||
They define things such as:
|
||||
|
||||
* [URIs]
|
||||
* [Sessions]
|
||||
* [Status Codes] (eg. 404 Not Found)
|
||||
* [Methods] (eg. GET, POST, PUT)
|
||||
* [Headers] (eg. Authorization, User-Agent)
|
||||
* [Pipelining]
|
||||
|
||||
HTTP/2 evolved out of Google's experimental SPDY protocol as a successor to
|
||||
HTTP/1.1. Since it maintains high-level compatibility with HTTP/1.1, let's first
|
||||
take a look at HTTP/1.x.
|
||||
|
||||
## HTTP/1.x
|
||||
|
||||
HTTP/1.0 was developed at CERN in 1989. [IETF RFC 1945] in 1996 was the first
|
||||
officially recognized HTTP/1.0 version. Just a few years later, HTTP/1.1 was
|
||||
specified in [IETF RFC 2068]. The standard was improved in [IETF RFC 2616] in 1999.
|
||||
|
||||
Both of these specifications eventually were obseleted in 2007 in the following RFCs:
|
||||
|
||||
* [RFC 7230, HTTP/1.1: Message Syntax and Routing](https://tools.ietf.org/html/rfc7230)
|
||||
* [RFC 7231, HTTP/1.1: Semantics and Content](https://tools.ietf.org/html/rfc7231)
|
||||
* [RFC 7232, HTTP/1.1: Conditional Requests](https://tools.ietf.org/html/rfc7232)
|
||||
* [RFC 7233, HTTP/1.1: Range Requests](https://tools.ietf.org/html/rfc7233)
|
||||
* [RFC 7234, HTTP/1.1: Caching](https://tools.ietf.org/html/rfc7234)
|
||||
* [RFC 7235, HTTP/1.1: Authentication](https://tools.ietf.org/html/rfc7235)
|
||||
|
||||
Thankfully it's not necessary to become familiar with every detail of these
|
||||
RFCs. HTTP/1.0 is a request-response protocol. For now, we'll just focus on this.
|
||||
|
||||
### The Request
|
||||
|
||||
When a node is connected to another node via TCP/IP, it can make a request by
|
||||
sending ASCII text as below (the empty newline is required!):
|
||||
|
||||
```HTTP
|
||||
POST / HTTP/1.1
|
||||
Host: tikv.org
|
||||
|
||||
{ data: "Test" }
|
||||
```
|
||||
|
||||
Common headers are things like `Authorization` (for access control), and
|
||||
`Cache-Control` (for caching).
|
||||
|
||||
### The Response
|
||||
|
||||
The recipient of a message responds in a similar format:
|
||||
|
||||
```HTTP
|
||||
HTTP/1.1 200 OK
|
||||
Content-Type: application/json; charset=UTF-8
|
||||
Connection: close
|
||||
|
||||
{ data: "bar" }
|
||||
```
|
||||
|
||||
## HTTP/2
|
||||
|
||||
The major differences in HTTP/2 do not lie in aspects like methods, status codes, or
|
||||
URIs, but in data frames and transportation.
|
||||
|
||||
It builds on HTTP/1.1 by adding features like:
|
||||
|
||||
* Server Push, to allow the server to respond with more data than requested.
|
||||
* Multiplexing, to avoid 'head-of-line' blocking problem in HTTP/1.1
|
||||
* Pipelining, to reduce network wait time.
|
||||
* Compression of headers, to reduce overall network costs.
|
||||
|
||||
Compared to HTTP/1.1, HTTP/2.0 offers applications like TiKV many opportunities
|
||||
for performance gains.
|
||||
|
||||
[IETF RFC 1945]: https://tools.ietf.org/html/rfc1945
|
||||
[IETF RFC 2068]: https://tools.ietf.org/html/rfc2068
|
||||
[IETF RFC 2616]: https://tools.ietf.org/html/rfc2616
|
||||
[IETF RFC 7230]: https://tools.ietf.org/html/rfc7230
|
||||
[IETF RFC 7231]: https://tools.ietf.org/html/rfc7231
|
||||
[IETF RFC 7232]: https://tools.ietf.org/html/rfc7232
|
||||
[IETF RFC 7233]: https://tools.ietf.org/html/rfc7233
|
||||
[IETF RFC 7234]: https://tools.ietf.org/html/rfc7234
|
||||
[IETF RFC 7235]: https://tools.ietf.org/html/rfc7235
|
||||
[URIs]: https://en.wikipedia.org/wiki/Uniform_Resource_Identifier
|
||||
[Sessions]: https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#HTTP_session
|
||||
[Status Codes]: https://en.wikipedia.org/wiki/List_of_HTTP_status_codes
|
||||
[Methods]: https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Request_methods
|
||||
[Headers]: https://en.wikipedia.org/wiki/List_of_HTTP_header_fields
|
||||
---
|
||||
title: HTTP/2
|
||||
menu:
|
||||
docs:
|
||||
parent: Remote Procedure Calls (RPC)
|
||||
weight: 1
|
||||
---
|
||||
HTTP/2 is short for 'Hyper Text Transfer Protocol Version 2'. HTTP/2 and its predecessor,
|
||||
HTTP/1, are the de-facto network protocols in use today.
|
||||
|
||||
They define things such as:
|
||||
|
||||
* [URIs]
|
||||
* [Sessions]
|
||||
* [Status Codes] (eg. 404 Not Found)
|
||||
* [Methods] (eg. GET, POST, PUT)
|
||||
* [Headers] (eg. Authorization, User-Agent)
|
||||
* [Pipelining]
|
||||
|
||||
HTTP/2 evolved out of Google's experimental SPDY protocol as a successor to
|
||||
HTTP/1.1. Since it maintains high-level compatibility with HTTP/1.1, let's first
|
||||
take a look at HTTP/1.x.
|
||||
|
||||
## HTTP/1.x
|
||||
|
||||
HTTP/1.0 was developed at CERN in 1989. [IETF RFC 1945] in 1996 was the first
|
||||
officially recognized HTTP/1.0 version. Just a few years later, HTTP/1.1 was
|
||||
specified in [IETF RFC 2068]. The standard was improved in [IETF RFC 2616] in 1999.
|
||||
|
||||
Both of these specifications eventually were obseleted in 2007 in the following RFCs:
|
||||
|
||||
* [RFC 7230, HTTP/1.1: Message Syntax and Routing](https://tools.ietf.org/html/rfc7230)
|
||||
* [RFC 7231, HTTP/1.1: Semantics and Content](https://tools.ietf.org/html/rfc7231)
|
||||
* [RFC 7232, HTTP/1.1: Conditional Requests](https://tools.ietf.org/html/rfc7232)
|
||||
* [RFC 7233, HTTP/1.1: Range Requests](https://tools.ietf.org/html/rfc7233)
|
||||
* [RFC 7234, HTTP/1.1: Caching](https://tools.ietf.org/html/rfc7234)
|
||||
* [RFC 7235, HTTP/1.1: Authentication](https://tools.ietf.org/html/rfc7235)
|
||||
|
||||
Thankfully it's not necessary to become familiar with every detail of these
|
||||
RFCs. HTTP/1.0 is a request-response protocol. For now, we'll just focus on this.
|
||||
|
||||
### The Request
|
||||
|
||||
When a node is connected to another node via TCP/IP, it can make a request by
|
||||
sending ASCII text as below (the empty newline is required!):
|
||||
|
||||
```HTTP
|
||||
POST / HTTP/1.1
|
||||
Host: tikv.org
|
||||
|
||||
{ data: "Test" }
|
||||
```
|
||||
|
||||
Common headers are things like `Authorization` (for access control), and
|
||||
`Cache-Control` (for caching).
|
||||
|
||||
### The Response
|
||||
|
||||
The recipient of a message responds in a similar format:
|
||||
|
||||
```HTTP
|
||||
HTTP/1.1 200 OK
|
||||
Content-Type: application/json; charset=UTF-8
|
||||
Connection: close
|
||||
|
||||
{ data: "bar" }
|
||||
```
|
||||
|
||||
## HTTP/2
|
||||
|
||||
The major differences in HTTP/2 do not lie in aspects like methods, status codes, or
|
||||
URIs, but in data frames and transportation.
|
||||
|
||||
It builds on HTTP/1.1 by adding features like:
|
||||
|
||||
* Server Push, to allow the server to respond with more data than requested.
|
||||
* Multiplexing, to avoid 'head-of-line' blocking problem in HTTP/1.1
|
||||
* Pipelining, to reduce network wait time.
|
||||
* Compression of headers, to reduce overall network costs.
|
||||
|
||||
Compared to HTTP/1.1, HTTP/2.0 offers applications like TiKV many opportunities
|
||||
for performance gains.
|
||||
|
||||
[IETF RFC 1945]: https://tools.ietf.org/html/rfc1945
|
||||
[IETF RFC 2068]: https://tools.ietf.org/html/rfc2068
|
||||
[IETF RFC 2616]: https://tools.ietf.org/html/rfc2616
|
||||
[IETF RFC 7230]: https://tools.ietf.org/html/rfc7230
|
||||
[IETF RFC 7231]: https://tools.ietf.org/html/rfc7231
|
||||
[IETF RFC 7232]: https://tools.ietf.org/html/rfc7232
|
||||
[IETF RFC 7233]: https://tools.ietf.org/html/rfc7233
|
||||
[IETF RFC 7234]: https://tools.ietf.org/html/rfc7234
|
||||
[IETF RFC 7235]: https://tools.ietf.org/html/rfc7235
|
||||
[URIs]: https://en.wikipedia.org/wiki/Uniform_Resource_Identifier
|
||||
[Sessions]: https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#HTTP_session
|
||||
[Status Codes]: https://en.wikipedia.org/wiki/List_of_HTTP_status_codes
|
||||
[Methods]: https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Request_methods
|
||||
[Headers]: https://en.wikipedia.org/wiki/List_of_HTTP_header_fields
|
||||
[Pipelining]: https://en.wikipedia.org/wiki/HTTP_pipelining
|
|
@ -1,66 +1,69 @@
|
|||
---
|
||||
title: Remote Procedure Calls (RPC)
|
||||
weight: 6
|
||||
---
|
||||
|
||||
Communication between services occurs over remote procedure calls. RPCs happen
|
||||
all the time in distributed systems. To obtain a webpage, your browser has to make at
|
||||
least one RPC to this website.
|
||||
|
||||
TiKV, as a distributed system involving a number of nodes, uses RPCs to
|
||||
communicate between nodes, as well as the Placement Driver and clients.
|
||||
|
||||
As it turns out, exposing functionality as a remote interface isn't a trivial.
|
||||
Networks are unreliable, systems are diverse and evolving, a huge variety of
|
||||
languages and formats exist, and even things like encoding are hard!
|
||||
|
||||
## On the shoulders of giants
|
||||
|
||||
Over the past decades the field of computing has largely settled on a few common
|
||||
standards.
|
||||
|
||||
### Network Protocols
|
||||
|
||||
The vast majority of services work over the HTTP or HTTP/2 network protocols.
|
||||
This solves problems such as:
|
||||
|
||||
* [URIs]
|
||||
* [Sessions]
|
||||
* [Status Codes] (e.g. 404 Not Found)
|
||||
* [Methods] (e.g. GET, POST, PUT)
|
||||
* [Headers] (e.g. Authorization, User-Agent)
|
||||
* [Pipelining]
|
||||
|
||||
TiKV uses **HTTP/2**. HTTP/2 is more performant and capable than HTTP/1 for TiKV uses.
|
||||
|
||||
### Interfacing
|
||||
|
||||
With those abilities supported, there remains a need to work with structures of
|
||||
data. Commonly this ends up being an *interface description language* format
|
||||
like [Protocol Buffers] (protobufs), which TiKV uses. Unlike an *interchange*
|
||||
*format*, an *interface description language* allows for the definition of
|
||||
services and RPCs in addition to just data interchange. This solves the
|
||||
following problems:
|
||||
|
||||
* [Encoding] (ASCII? UTF-8? UTF-16? WTF-8?)
|
||||
* Serialization/Deserialization format (Text-to-`struct` and vice versa)
|
||||
* Backward/Forward compatibility (e.g. Structure fields changing, being added, removed)
|
||||
* Service & RPC definition
|
||||
|
||||
### Wrapping it all together
|
||||
|
||||
Simply having the pieces is not enough. Making it usable for all parties
|
||||
involved is another story. [gRPC] does a great job wrapping
|
||||
up the above technologies and providing usable interfaces.
|
||||
|
||||
Over the next chapter, we'll look at each of these technologies and how they work.
|
||||
|
||||
[gRPC]: https://grpc.io/
|
||||
[Encoding]: https://en.wikipedia.org/wiki/Character_encoding
|
||||
[Protocol Buffers]: https://developers.google.com/protocol-buffers/
|
||||
[URIs]: https://en.wikipedia.org/wiki/Uniform_Resource_Identifier
|
||||
[Sessions]: https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#HTTP_session
|
||||
[Status Codes]: https://en.wikipedia.org/wiki/List_of_HTTP_status_codes
|
||||
[Methods]: https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Request_methods
|
||||
[Headers]: https://en.wikipedia.org/wiki/List_of_HTTP_header_fields
|
||||
---
|
||||
title: Remote Procedure Calls (RPC)
|
||||
menu:
|
||||
docs:
|
||||
parent: Deep Dive
|
||||
weight: 6
|
||||
---
|
||||
|
||||
Communication between services occurs over remote procedure calls. RPCs happen
|
||||
all the time in distributed systems. To obtain a webpage, your browser has to make at
|
||||
least one RPC to this website.
|
||||
|
||||
TiKV, as a distributed system involving a number of nodes, uses RPCs to
|
||||
communicate between nodes, as well as the Placement Driver and clients.
|
||||
|
||||
As it turns out, exposing functionality as a remote interface isn't a trivial.
|
||||
Networks are unreliable, systems are diverse and evolving, a huge variety of
|
||||
languages and formats exist, and even things like encoding are hard!
|
||||
|
||||
## On the shoulders of giants
|
||||
|
||||
Over the past decades the field of computing has largely settled on a few common
|
||||
standards.
|
||||
|
||||
### Network Protocols
|
||||
|
||||
The vast majority of services work over the HTTP or HTTP/2 network protocols.
|
||||
This solves problems such as:
|
||||
|
||||
* [URIs]
|
||||
* [Sessions]
|
||||
* [Status Codes] (e.g. 404 Not Found)
|
||||
* [Methods] (e.g. GET, POST, PUT)
|
||||
* [Headers] (e.g. Authorization, User-Agent)
|
||||
* [Pipelining]
|
||||
|
||||
TiKV uses **HTTP/2**. HTTP/2 is more performant and capable than HTTP/1 for TiKV uses.
|
||||
|
||||
### Interfacing
|
||||
|
||||
With those abilities supported, there remains a need to work with structures of
|
||||
data. Commonly this ends up being an *interface description language* format
|
||||
like [Protocol Buffers] (protobufs), which TiKV uses. Unlike an *interchange*
|
||||
*format*, an *interface description language* allows for the definition of
|
||||
services and RPCs in addition to just data interchange. This solves the
|
||||
following problems:
|
||||
|
||||
* [Encoding] (ASCII? UTF-8? UTF-16? WTF-8?)
|
||||
* Serialization/Deserialization format (Text-to-`struct` and vice versa)
|
||||
* Backward/Forward compatibility (e.g. Structure fields changing, being added, removed)
|
||||
* Service & RPC definition
|
||||
|
||||
### Wrapping it all together
|
||||
|
||||
Simply having the pieces is not enough. Making it usable for all parties
|
||||
involved is another story. [gRPC] does a great job wrapping
|
||||
up the above technologies and providing usable interfaces.
|
||||
|
||||
Over the next chapter, we'll look at each of these technologies and how they work.
|
||||
|
||||
[gRPC]: https://grpc.io/
|
||||
[Encoding]: https://en.wikipedia.org/wiki/Character_encoding
|
||||
[Protocol Buffers]: https://developers.google.com/protocol-buffers/
|
||||
[URIs]: https://en.wikipedia.org/wiki/Uniform_Resource_Identifier
|
||||
[Sessions]: https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#HTTP_session
|
||||
[Status Codes]: https://en.wikipedia.org/wiki/List_of_HTTP_status_codes
|
||||
[Methods]: https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Request_methods
|
||||
[Headers]: https://en.wikipedia.org/wiki/List_of_HTTP_header_fields
|
||||
[Pipelining]: https://en.wikipedia.org/wiki/HTTP_pipelining
|
|
@ -1,71 +1,74 @@
|
|||
---
|
||||
title: Protocol Buffers
|
||||
weight: 2
|
||||
---
|
||||
Interface definition languages, such as protobufs, are most commonly used to
|
||||
store and transmit data between applications.
|
||||
|
||||
They define a way to *serialize* structures as text, as well as *deserialize*
|
||||
it again.
|
||||
|
||||
Here's an example of a protobuf message in *text format*:
|
||||
|
||||
```protobuf
|
||||
KvPair {
|
||||
key: "TiKV"
|
||||
value: "Astronaut"
|
||||
}
|
||||
```
|
||||
|
||||
When a message is actually *sent* between two applications, a binary format is
|
||||
used. You can learn more about the binary format in the [protobuf documentation](https://developers.google.com/protocol-buffers/docs/encoding).
|
||||
|
||||
The message above is an instance of a structure predefined in a `.proto`
|
||||
|
||||
```protobuf
|
||||
message KvPair {
|
||||
string key = 1;
|
||||
string value = 2;
|
||||
}
|
||||
```
|
||||
|
||||
The fields are numbered to support backwards compatibility and field renaming.
|
||||
This makes it possible to evolve your application's communication in a
|
||||
compatible way.
|
||||
|
||||
## Why Probobufs
|
||||
|
||||
Protobufs are simply much faster and more efficient than things like JSON.
|
||||
Additionally, protobufs can generate all the required code for your desired
|
||||
language.
|
||||
|
||||
If you have used [`serde_json`](https://docs.serde.rs/serde_json/) or another
|
||||
JSON library, you may have experienced the task of defining schemas for
|
||||
structures. This becomes a maintenance burden as your infrastructure grows to
|
||||
span many languages.
|
||||
|
||||
You need to do this with protocol buffers as well, but you only do it once, and
|
||||
the protobuf compiler will generate bindings for any language it knows how to.
|
||||
|
||||
Protobuf generates code in a backwards compatible manner. If an application
|
||||
finds unfamiliar data is isn't familiar with, it just ignores them. This allows
|
||||
for a safe evolution of an API.
|
||||
|
||||
## More than just data
|
||||
|
||||
Protobufs also enable the definition of *services*. This allows the
|
||||
definition of RPC calls in the `*.proto` files.
|
||||
|
||||
This example demonstrates a service called `ScanService`. It provides a remote
|
||||
procedure call `Scan` that accepts two strings and returns a stream of `KvPair`s:
|
||||
|
||||
```protobuf
|
||||
service ScanService {
|
||||
rpc Scan (string, string) returns (repeated KvPair) {};
|
||||
}
|
||||
```
|
||||
|
||||
This is particularly useful as it allows users to call remote functions almost
|
||||
as if they were local thanks to code generation.
|
||||
|
||||
---
|
||||
title: Protocol Buffers
|
||||
menu:
|
||||
docs:
|
||||
parent: Remote Procedure Calls (RPC)
|
||||
weight: 2
|
||||
---
|
||||
Interface definition languages, such as protobufs, are most commonly used to
|
||||
store and transmit data between applications.
|
||||
|
||||
They define a way to *serialize* structures as text, as well as *deserialize*
|
||||
it again.
|
||||
|
||||
Here's an example of a protobuf message in *text format*:
|
||||
|
||||
```protobuf
|
||||
KvPair {
|
||||
key: "TiKV"
|
||||
value: "Astronaut"
|
||||
}
|
||||
```
|
||||
|
||||
When a message is actually *sent* between two applications, a binary format is
|
||||
used. You can learn more about the binary format in the [protobuf documentation](https://developers.google.com/protocol-buffers/docs/encoding).
|
||||
|
||||
The message above is an instance of a structure predefined in a `.proto`
|
||||
|
||||
```protobuf
|
||||
message KvPair {
|
||||
string key = 1;
|
||||
string value = 2;
|
||||
}
|
||||
```
|
||||
|
||||
The fields are numbered to support backwards compatibility and field renaming.
|
||||
This makes it possible to evolve your application's communication in a
|
||||
compatible way.
|
||||
|
||||
## Why Probobufs
|
||||
|
||||
Protobufs are simply much faster and more efficient than things like JSON.
|
||||
Additionally, protobufs can generate all the required code for your desired
|
||||
language.
|
||||
|
||||
If you have used [`serde_json`](https://docs.serde.rs/serde_json/) or another
|
||||
JSON library, you may have experienced the task of defining schemas for
|
||||
structures. This becomes a maintenance burden as your infrastructure grows to
|
||||
span many languages.
|
||||
|
||||
You need to do this with protocol buffers as well, but you only do it once, and
|
||||
the protobuf compiler will generate bindings for any language it knows how to.
|
||||
|
||||
Protobuf generates code in a backwards compatible manner. If an application
|
||||
finds unfamiliar data is isn't familiar with, it just ignores them. This allows
|
||||
for a safe evolution of an API.
|
||||
|
||||
## More than just data
|
||||
|
||||
Protobufs also enable the definition of *services*. This allows the
|
||||
definition of RPC calls in the `*.proto` files.
|
||||
|
||||
This example demonstrates a service called `ScanService`. It provides a remote
|
||||
procedure call `Scan` that accepts two strings and returns a stream of `KvPair`s:
|
||||
|
||||
```protobuf
|
||||
service ScanService {
|
||||
rpc Scan (string, string) returns (repeated KvPair) {};
|
||||
}
|
||||
```
|
||||
|
||||
This is particularly useful as it allows users to call remote functions almost
|
||||
as if they were local thanks to code generation.
|
||||
|
||||
Next, we'll use gRPC to provide these services.
|
|
@ -1,6 +1,9 @@
|
|||
---
|
||||
title: Data Sharding
|
||||
weight: 2
|
||||
menu:
|
||||
docs:
|
||||
parent: Scalability
|
||||
weight: 2
|
||||
---
|
||||
|
||||
## What is the partition
|
|
@ -1,6 +1,9 @@
|
|||
---
|
||||
title: Horizontal or Vertical
|
||||
weight: 1
|
||||
menu:
|
||||
docs:
|
||||
parent: Scalability
|
||||
weight: 1
|
||||
---
|
||||
|
||||
Methods of adding more resources for a particular application fall into two broad categories: horizontal and vertical scaling.
|
|
@ -1,6 +1,9 @@
|
|||
---
|
||||
title: Scalability
|
||||
weight: 5
|
||||
menu:
|
||||
docs:
|
||||
parent: Deep Dive
|
||||
weight: 5
|
||||
---
|
||||
|
||||
In the database field, scalability is the term we use to describe the capability of a system to handle a growing amount of work. Even if a system is working reliably and fast today, it doesn't mean it will necessarily work well in the future. One common reason for degradation is the increased load which exceeds what the system can process. In modern systems, the amount of data we handle can far outgrow our original expectations, so scalability is a critical consideration for the design of a database.
|
|
@ -1,7 +1,9 @@
|
|||
---
|
||||
title: Multi-raft
|
||||
mathjax: true
|
||||
weight: 3
|
||||
menu:
|
||||
docs:
|
||||
parent: Scalability
|
||||
weight: 3
|
||||
---
|
||||
|
||||
If you've researched Consensus before, please note that comparing Multi-Raft to Raft is not at all like comparing Multi-Paxos to Paxos. Here Multi-Raft only means we manage multiple Raft consensus groups on one node. From the above section, we know that there are multiple different partitions on each node, if there is only one Raft group for each node, the partitions losing its meaning. So Raft group is divided into multiple Raft groups in terms of partitions, namely, Region.
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
title: Failure Injection
|
||||
menu:
|
||||
docs:
|
||||
parent: Testing
|
||||
weight: 1
|
||||
---
|
||||
|
|
@ -0,0 +1,72 @@
|
|||
---
|
||||
title: Testing
|
||||
menu:
|
||||
docs:
|
||||
parent: Deep Dive
|
||||
weight: 9
|
||||
---
|
||||
|
||||
Testing is a crucial part of any large software project, and a distributed system like TiKV is designed to exist and function under many failure scenarios and degraded states, this dramatically increases testing surface. These states also must be tested in addition to the normal testing done on a project.
|
||||
|
||||
In futher sections we'll investigate how we can test distributed systems even with these nearly infinite variables affecting our system state. For now, let's investigate the basics required for almost every project. These simple tests form the foundation for many later tests, and can be run millions of times over the life of a project.
|
||||
|
||||
Often, these test are written before or alongside the code which they test, and they're used to guide the development process.
|
||||
|
||||
## Unit testing
|
||||
|
||||
TiKV includes many unit tests that are run using `cargo test`. These tests typically involve simple functionality and are tested using assertions.
|
||||
|
||||
These tests are most useful for testing expected errors (eg trying to open a config file that doesn't exist), and ensuring operations succeed given the correct initial state. These kinds of tests are also very helpful in testing for regressions in bugs that other testing methods have found.
|
||||
|
||||
Unit tests are notably able to test project internals. Unlike other testing methods which test the project as a *consumer* of it, unit tests allow you to make assertions about the internal state normally hidden from consumers of your project.
|
||||
|
||||
Many languages, like Rust, offer built in unit testing functionality:
|
||||
|
||||
```rust
|
||||
#[test]
|
||||
fn can_initialize_server() -> Result<(), Error> {
|
||||
let server = Server::new(Config::default())?;
|
||||
assert_eq!(server.private_field, true);
|
||||
Ok(())
|
||||
}
|
||||
```
|
||||
|
||||
One potential danger of unit tests is that it's very easy to accidently modify private fields, or call private functionality that a dependant project might not be able to use. When testing functionality covering more than a few functions, or more than one module, integration tests are more well suited.
|
||||
|
||||
## Integration testing
|
||||
|
||||
Projects like TiKV are used as components in larger infrastructure. When doing more complete, functional testing, integration tests offer an easy way to test public functionality of a project.
|
||||
|
||||
In Rust, integration tests can also exist as documentation, this means that it's possible to document your project and benefit from a full test suite at the same time.
|
||||
|
||||
```rust
|
||||
/// Create a server with the given config
|
||||
///
|
||||
/// ```rust
|
||||
/// let server = Server::new(Config::default()).unwrap();
|
||||
/// ```
|
||||
struct Server { /* ... */ }
|
||||
```
|
||||
|
||||
These kinds of tests are useful for ensuring that a consumer of the project will be able to use it correctly. Having an documentation test suite that demonstrates how a consumer is expected to use the project is especially useful for determining functionality, API, or compatability breakage. These kinds of tests eliminate most trivial unit tests while making them worthwhile to write, since they will be readily used by people to learn the project.
|
||||
|
||||
While documentation-based integration tests cover *usability* and *functionality*, they're not always suited for testing corner cases or workloads. Rust's built in integration tests are most well suited to this task.
|
||||
|
||||
The [Rust Book](https://doc.rust-lang.org/book/ch11-01-writing-tests.html) has a great chapter on how to write tests in Rust, and which testing strategies are appropriate for which problems.
|
||||
|
||||
## Going further
|
||||
|
||||
Unit tests and integration tests cover the basics, but even with a comprehensive test suite there can be cases neglected, forgotten, or not even realized possible.
|
||||
|
||||
Here's just a few of the situations that can happen:
|
||||
|
||||
* A node disappears and is replaced by a new node at the same IP
|
||||
* Messages between one node and another are simply lost
|
||||
* The network partitions a cluster into two or more groups
|
||||
* A network link becomes overloaded, and messages start to queue and eventually fail
|
||||
* An expected service dependency suddenly disappears
|
||||
* Thead scheduling and poor memory management leads to data integrity issues
|
||||
|
||||
Even this small list offers a nearly endless testing surface. It's not practical to test every possibility. Worse, many of these cases are very difficult to setup in a test.
|
||||
|
||||
In the rest of this chapter, we'll investigate how we can overcome this problem by using tools inject a variety of failures, introduce chaos to networks or other I/O, or introduce other chaos to tests.
|
|
@ -1,6 +0,0 @@
|
|||
---
|
||||
title: Reference
|
||||
headless: true
|
||||
draft: true
|
||||
weight: 3
|
||||
---
|
|
@ -1,5 +0,0 @@
|
|||
---
|
||||
title: Tools
|
||||
draft: true
|
||||
weight: 1
|
||||
---
|
|
@ -1,5 +0,0 @@
|
|||
---
|
||||
title: Tasks
|
||||
headless: true
|
||||
weight: 2
|
||||
---
|
|
@ -1,25 +0,0 @@
|
|||
{{ define "title" }}
|
||||
TiKV deep dive | {{ .Title }}
|
||||
{{ end }}
|
||||
|
||||
{{ define "main" }}
|
||||
<section class="section">
|
||||
<div class="container">
|
||||
<div class="columns">
|
||||
<div class="column is-narrow">
|
||||
{{ partial "deep-dive/toc.html" . }}
|
||||
</div>
|
||||
|
||||
<div class="column">
|
||||
<p class="title is-size-1 is-size-2-mobile has-text-weight-light">
|
||||
{{ .Title }}
|
||||
</p>
|
||||
|
||||
<div class="content is-medium docs-content">
|
||||
{{ .Content }}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
{{ end }}
|
|
@ -1,26 +0,0 @@
|
|||
{{ define "title" }}
|
||||
TiKV deep dive | {{ .Title }}
|
||||
{{ end }}
|
||||
|
||||
{{ define "main" }}
|
||||
<section class="section">
|
||||
<div class="container">
|
||||
<div class="columns">
|
||||
<div class="column is-3">
|
||||
{{ partial "deep-dive/toc.html" . }}
|
||||
</div>
|
||||
|
||||
<div class="column">
|
||||
<p class="title is-size-1 is-size-2-mobile has-text-weight-light">
|
||||
{{ .Title }}
|
||||
</p>
|
||||
|
||||
<div class="content is-medium docs-content">
|
||||
{{ partial "math.html" . }}
|
||||
{{ .Content }}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
{{ end }}
|
|
@ -4,17 +4,27 @@ TiKV | {{ .Title }}
|
|||
|
||||
{{ define "main" }}
|
||||
{{ partial "docs/hero.html" . }}
|
||||
{{ $docs := .Site.Menus.docs }}
|
||||
{{ $currentPage := (index .Pages 0) }}
|
||||
|
||||
<section class="section">
|
||||
<div class="container">
|
||||
<div class="columns">
|
||||
<div class="column is-3">
|
||||
{{ partial "docs/section-toc.html" . }}
|
||||
<div class="toc">
|
||||
{{ range .Site.Menus.docs }}
|
||||
{{ $submenu := (index (where $docs "Name" .Name) 0) }}
|
||||
{{ if (or ($currentPage.HasMenuCurrent "docs" $submenu) ($currentPage.IsMenuCurrent "docs" $submenu)) }}
|
||||
{{ partial "entry-tree.html" (dict "entries" .Children "currentPage" $currentPage ) }}
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="column">
|
||||
<div class="content is-medium docs-content">
|
||||
{{ partial "docs/version-warning.html" . }}
|
||||
{{ partial "math.html" . }}
|
||||
|
||||
{{ .Content }}
|
||||
</div>
|
||||
|
|
|
@ -4,17 +4,27 @@ TiKV | {{ .Title }}
|
|||
|
||||
{{ define "main" }}
|
||||
{{ partial "docs/hero.html" . }}
|
||||
{{ $docs := .Site.Menus.docs }}
|
||||
{{ $currentPage := . }}
|
||||
|
||||
<section class="section">
|
||||
<div class="container">
|
||||
<div class="columns">
|
||||
<div class="column is-3">
|
||||
{{ partial "docs/section-toc.html" . }}
|
||||
<div class="column is-narrow">
|
||||
<div class="toc">
|
||||
{{ range .Site.Menus.docs }}
|
||||
{{ $submenu := (index (where $docs "Name" .Name) 0) }}
|
||||
{{ if (or ($currentPage.HasMenuCurrent "docs" $submenu) ($currentPage.IsMenuCurrent "docs" $submenu)) }}
|
||||
{{ partial "entry-tree.html" (dict "entries" .Children "currentPage" $currentPage ) }}
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="column">
|
||||
<div class="content is-medium docs-content">
|
||||
{{ partial "docs/version-warning.html" . }}
|
||||
{{ partial "math.html" . }}
|
||||
|
||||
{{ .Content }}
|
||||
</div>
|
||||
|
|
|
@ -1,46 +0,0 @@
|
|||
{{ $deepDive := where site.Sections "Section" "deep-dive" }}
|
||||
{{ $currentUrl := .RelPermalink }}
|
||||
<div class="deep-dive-toc">
|
||||
<p class="title is-narrow has-text-weight-bold">
|
||||
TiKV deep dive
|
||||
</p>
|
||||
|
||||
<hr class="has-background-primary" />
|
||||
|
||||
<ul>
|
||||
{{ range $deepDive }}
|
||||
{{ $isCurrentPage := eq $currentUrl .RelPermalink }}
|
||||
<li{{ if $isCurrentPage }} class="is-active"{{ end }}>
|
||||
<strong>{{ .Weight }}</strong>.
|
||||
<a href="{{ .RelPermalink }}">
|
||||
{{ .Title }}
|
||||
</a>
|
||||
</li>
|
||||
|
||||
{{ range .Sections }}
|
||||
{{ $isCurrentPage := eq $currentUrl .RelPermalink }}
|
||||
{{ $sectionWeight := .Weight }}
|
||||
<li{{ if $isCurrentPage }} class="is-active"{{ end }}>
|
||||
<strong>{{ $sectionWeight }}</strong>.
|
||||
<a href="{{ .RelPermalink }}">
|
||||
{{ .Title }}
|
||||
</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<ul>
|
||||
{{ range .Pages }}
|
||||
{{ $isCurrentPage := eq $currentUrl .RelPermalink }}
|
||||
<li{{ if $isCurrentPage }} class="is-active"{{ end }}>
|
||||
<strong>{{ $sectionWeight }}.{{ .Weight }}</strong>.
|
||||
<a href="{{ .RelPermalink }}">
|
||||
{{ .Title }}
|
||||
</a>
|
||||
</li>
|
||||
{{ end }}
|
||||
</ul>
|
||||
</li>
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
</ul>
|
||||
</div>
|
|
@ -31,16 +31,13 @@
|
|||
<nav class="tabs is-medium is-boxed">
|
||||
<div class="container">
|
||||
<ul>
|
||||
{{ range $docsSections }}
|
||||
{{ range .Sections }}
|
||||
{{ $mainPage := index (where .Pages "Weight" 1) 0 }}
|
||||
{{ $isCurrentSection := eq .Name $currentSection }}
|
||||
<li{{ if $isCurrentSection }} class="is-active"{{ end }}>
|
||||
<a href="{{ $mainPage.RelPermalink }}">
|
||||
{{ .Name }}
|
||||
</a>
|
||||
</li>
|
||||
{{ end }}
|
||||
{{ $currentPage := . }}
|
||||
{{ range .Site.Menus.docs }}
|
||||
<li {{ if (or ($currentPage.IsMenuCurrent "docs" .) ($currentPage.HasMenuCurrent "docs" .)) }}class="is-active"{{ end }}>
|
||||
<a href="{{ .URL }}">
|
||||
{{ .Name }}
|
||||
</a>
|
||||
</li>
|
||||
{{ end }}
|
||||
</ul>
|
||||
</div>
|
||||
|
|
|
@ -1,15 +0,0 @@
|
|||
{{ $allDocs := where site.RegularPages "Section" "docs" }}
|
||||
{{ $thisSection := .CurrentSection.Name }}
|
||||
{{ $currentUrl := .RelPermalink }}
|
||||
<div class="section-toc">
|
||||
<ul>
|
||||
{{ range where $allDocs "CurrentSection.Name" $thisSection }}
|
||||
{{ $isCurrentPage := eq .RelPermalink $currentUrl }}
|
||||
<li{{ if $isCurrentPage }} class="is-active"{{ end }}>
|
||||
<a href="{{ .RelPermalink }}">
|
||||
{{ .Title }}
|
||||
</a>
|
||||
</li>
|
||||
{{ end }}
|
||||
</ul>
|
||||
</div>
|
|
@ -0,0 +1,57 @@
|
|||
|
||||
{{ $entries := .entries }}
|
||||
{{ $currentPage := .currentPage }}
|
||||
|
||||
<ul>
|
||||
{{ range $entries }}
|
||||
{{ if .HasChildren }}
|
||||
<li>
|
||||
<a {{ if (or ($currentPage.IsMenuCurrent .Menu .) ($currentPage.HasMenuCurrent .Menu .)) }}class="is-active"{{ end }} href="{{ .URL }}">
|
||||
{{ .Name }}
|
||||
</a>
|
||||
<ul>
|
||||
{{ $parent := . }}
|
||||
{{ range .Children }}
|
||||
{{ if .HasChildren }}
|
||||
<li>
|
||||
<a
|
||||
{{ if (or ($currentPage.IsMenuCurrent .Menu .) ($currentPage.HasMenuCurrent .Menu .)) }}class="is-active"{{ end }}
|
||||
href="{{ .URL }}">
|
||||
{{ .Name }}
|
||||
</a>
|
||||
<ul>
|
||||
{{ $parent = . }}
|
||||
{{ range .Children }}
|
||||
<li>
|
||||
<a
|
||||
{{ if $currentPage.IsMenuCurrent .Menu . }}class="is-active"{{ end }}
|
||||
href="{{ .URL }}">
|
||||
{{ .Name }}
|
||||
</a>
|
||||
</li>
|
||||
{{ end }}
|
||||
</ul>
|
||||
</li>
|
||||
{{ else }}
|
||||
<li>
|
||||
<a
|
||||
{{ if $currentPage.IsMenuCurrent .Menu . }}class="is-active"{{ end }}
|
||||
href="{{ .URL }}">
|
||||
{{ .Name }}
|
||||
</a>
|
||||
</li>
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
</ul>
|
||||
</li>
|
||||
{{ else }}
|
||||
<li>
|
||||
<a
|
||||
{{ if $currentPage.IsMenuCurrent .Menu . }}class="is-active"{{ end }}
|
||||
href="{{ .URL }}">
|
||||
<span>{{ .Name }}</span>
|
||||
</a>
|
||||
</li>
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
</ul>
|
|
@ -18,13 +18,26 @@
|
|||
<hr class="hr has-background-primary">
|
||||
|
||||
<ul>
|
||||
{{ range $docs }}
|
||||
<li>
|
||||
<a href="{{ .RelPermalink }}">
|
||||
{{ .Title }}
|
||||
<a href="/docs/{{ $latest }}/">
|
||||
Overview
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="/docs/{{ $latest }}/concepts">
|
||||
Concepts
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="/docs/{{ $latest }}/tasks">
|
||||
Tasks
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="/docs/{{ $latest }}/reference">
|
||||
Reference
|
||||
</a>
|
||||
</li>
|
||||
{{ end }}
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
{{ $navbarLogo := cond $isHome $blackLogo $whiteLogo }}
|
||||
{{ $twitter := site.Params.socialmedia.twitter }}
|
||||
{{ $github := site.Params.socialmedia.github }}
|
||||
{{ $docs := where site.RegularPages "Section" "docs" }}
|
||||
{{ $latest := site.Params.versions.latest }}
|
||||
{{ $color := cond $isHome "light" "black" }}
|
||||
{{ $blogPosts := where site.RegularPages "Section" "blog" }}
|
||||
|
@ -40,60 +39,23 @@
|
|||
</div>
|
||||
|
||||
<div class="navbar-end">
|
||||
<div class="navbar-item has-dropdown is-hoverable">
|
||||
<a class="navbar-link" href="/docs">
|
||||
Docs
|
||||
</a>
|
||||
|
||||
<div class="navbar-dropdown is-hidden-touch">
|
||||
{{ range $docs }}
|
||||
<a class="navbar-item" href="{{ .RelPermalink }}">
|
||||
{{ .Title }}
|
||||
</a>
|
||||
{{ end }} <!-- range $docs -->
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="navbar-item has-dropdown is-hoverable">
|
||||
<a class="navbar-link" href="/deep-dive/">
|
||||
Deep dive
|
||||
</a>
|
||||
|
||||
<div class="navbar-dropdown is-hidden-touch">
|
||||
{{ range $deepDive }}
|
||||
<a class="navbar-item" href="{{ .RelPermalink }}">
|
||||
<strong>{{ .Weight }}</strong>. {{ .Title }}
|
||||
{{ range .Site.Menus.nav }}
|
||||
<div class="navbar-item has-dropdown is-hoverable">
|
||||
<a class="navbar-link" href="{{ .URL }}">
|
||||
{{ .Name }}
|
||||
</a>
|
||||
|
||||
{{ range .Sections }}
|
||||
<a class="navbar-item" href="{{ .RelPermalink }}">
|
||||
<strong>{{ .Weight }}</strong>. {{ .Title }}
|
||||
</a>
|
||||
{{ end }}
|
||||
{{ if .HasChildren }}
|
||||
<div class="navbar-dropdown is-hidden-touch">
|
||||
{{ range .Children }}
|
||||
<a class="navbar-item" href="{{ .URL }}">
|
||||
{{ .Name }}
|
||||
</a>
|
||||
{{ end }}
|
||||
</div>
|
||||
{{ end }}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="navbar-item has-dropdown is-hoverable">
|
||||
<div class="navbar-link">
|
||||
Community
|
||||
</div>
|
||||
|
||||
<div class="navbar-dropdown">
|
||||
<a class="navbar-item" href="https://forum.tikv.org" target="_blank">
|
||||
Forum
|
||||
</a>
|
||||
<a class="navbar-item" href="/chat" target="_blank">
|
||||
Chat
|
||||
</a>
|
||||
<a class="navbar-item" href="/adopters">
|
||||
Adopters
|
||||
</a>
|
||||
<a class="navbar-item" href="https://branding.cncf.io/projects/tikv" target="_blank">
|
||||
Branding
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
{{ end }}
|
||||
|
||||
<div class="navbar-item has-dropdown is-hoverable">
|
||||
<a class="navbar-link" href="/blog">
|
||||
|
|
Loading…
Reference in New Issue