Merge branch 'main' into developing_md

This commit is contained in:
Iramis Valentin 2025-03-26 14:25:28 -04:00 committed by GitHub
commit 15b51dd93c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
34 changed files with 286 additions and 138 deletions

8
.gitignore vendored
View File

@ -2,10 +2,10 @@
.DS_Store
# tofu related
tofu/main/*/.terraform
tofu/main/*/.terraform.lock.hcl
tofu/main/*/terraform.tfstate
tofu/main/*/terraform.tfstate.*
tofu/**/*/.terraform
tofu/**/*/.terraform.lock.hcl
tofu/**/*/terraform.tfstate
tofu/**/*/terraform.tfstate.*
tofu/main/*/*config
*.tfvars

View File

@ -22,6 +22,18 @@ To recreate environments:
- `dartboard reapply` runs `destroy` and then `apply`, tearing down and recreating test configuration infrastructure without any software (Rancher, load generation, moniroting...)
- `dartboard redeploy` runs `destroy` and then `deploy`, tearing down and recreating the full environment, infrastructure and software (use this if unsure)
### "Bring Your Own" AWS VPC
There is some manual configuration required in order to use an existing AWS VPC instead of having the tofu modules create a full set of networking resources.
1. Have an existing VPC with a DHCP options set configured so that DNS = "AmazonProvidedDNS".
2. Create three subnets, requirements are as follows:
1. One subnet should contain the substring "public" (case-sensitive), and should be tagged with `Tier = Public` (case-sensitive)
2. One subnet should contain the substring "private" (case-sensitive), and should be tagged with `Tier = Private` (case-sensitive)
3. One subnet should contain the substring "secondary-private" (case-sensitive), and should be tagged with `Tier = SecondaryPrivate` (case-sensitive)
4. Each subnet should be assigned to the VPC you intend to use
Once these resources are manually setup, you can set the `existing_vpc_name` tofu variable in your Dart file and deploy as you normally would.
## Installation
Download and unpack a [release](https://github.com/rancher/dartboard/releases/), it's a self-contained binary.
@ -80,7 +92,7 @@ pkill -f 'ssh .*-o IgnoreUnknown=TofuCreatedThisTunnel.*'
If an Azure VM is not accessible via SSH, try the following:
- add the `boot_diagnostics = true` option in `inputs.tf`
- apply or re-deploy
- in the Azure Portal, click on Home -> Virtual Machines -> <name> -> Help -> Reset Password
- in the Azure Portal, click on Home -> Virtual Machines -> <name> -> Help -> Reset Password
- then Home -> Virtual Machines -> <name> -> Help -> Serial Console
That should give you access to the VM's console, where you can log in with the new password and troubleshoot.

View File

@ -114,7 +114,7 @@ func Deploy(cli *cli.Context) error {
return GetAccess(cli)
}
func chartInstall(kubeConf string, chart chart, vals map[string]any) error {
func chartInstall(kubeConf string, chart chart, vals map[string]any, extraArgs ...string) error {
var err error
name := chart.name
@ -126,7 +126,7 @@ func chartInstall(kubeConf string, chart chart, vals map[string]any) error {
log.Printf("Installing chart %q (%s)\n", namespace+"/"+name, path)
if err = helm.Install(kubeConf, path, name, namespace, vals); err != nil {
if err = helm.Install(kubeConf, path, name, namespace, vals, extraArgs...); err != nil {
return fmt.Errorf("chart %s: %w", name, err)
}
return nil
@ -190,7 +190,29 @@ func chartInstallRancher(r *dart.Dart, rancherImageTag string, cluster *tofu.Clu
chartVals := getRancherValsJSON(r.ChartVariables.RancherImageOverride, rancherImageTag, r.ChartVariables.AdminPassword, rancherClusterName, rancherClusterURL, r.ChartVariables.RancherReplicas)
return chartInstall(cluster.Kubeconfig, chartRancher, chartVals)
var extraArgs []string
if r.ChartVariables.RancherValues != "" {
p, err := writeValuesFile(r.ChartVariables.RancherValues)
if err != nil {
return fmt.Errorf("writing extra values file: %w", err)
}
defer os.Remove(p)
extraArgs = append(extraArgs, "-f", p)
}
return chartInstall(cluster.Kubeconfig, chartRancher, chartVals, extraArgs...)
}
func writeValuesFile(content string) (string, error) {
p, err := os.CreateTemp("", "values-*.yaml")
if err != nil {
return "", err
}
if _, err := io.WriteString(p, content); err != nil {
return "", err
}
return p.Name(), nil
}
func chartInstallRancherIngress(cluster *tofu.Cluster) error {

View File

@ -162,8 +162,7 @@ func importImageIntoK3d(tf *tofu.Tofu, image string, cluster tofu.Cluster) error
}
if len(images) > 0 {
err = k3d.ImageImport(cluster, images[0])
if err != nil {
if err := k3d.ImageImport(cluster.Name, images[0]); err != nil {
return err
}
}

View File

@ -77,6 +77,10 @@ chart_variables:
# rancher_image_override: rancher/rancher
# rancher_image_tag_override: v2.8.6-debug-1
# Set arbitrary helm values (in yaml format) for installing Rancher
# rancher_values: |
# features: "my-feature-flag=true"
test_variables:
test_config_maps: 2000
test_secrets: 2000

View File

@ -103,6 +103,10 @@ chart_variables:
# rancher_image_override: rancher/rancher
# rancher_image_tag_override: v2.8.6-debug-1
# Set arbitrary helm values (in yaml format) for installing Rancher
# rancher_values: |
# features: "my-feature-flag=true"
test_variables:
test_config_maps: 2000
test_secrets: 2000

View File

@ -59,6 +59,11 @@ chart_variables:
# rancher_image_override: rancher/rancher
# rancher_image_tag_override: v2.8.6-debug-1
# Set arbitrary helm values (in yaml format) for installing Rancher
# rancher_values: |
# features: "my-feature-flag=true"
test_variables:
test_config_maps: 2000
test_secrets: 2000

View File

@ -43,7 +43,7 @@ See [the rke2 installation script in this repo](../rke2/install_rke2.sh) for det
## Full configuration details
All infrastructure is defined via [Terraform](https://www.terraform.io/) files in the [20220923_250_pods_per_node](https://github.com/moio/scalability-tests/tree/20220923_250_pods_per_node) branch.
All infrastructure is defined via [Terraform](https://www.terraform.io/) files in the [20220923_250_pods_per_node](https://github.com/rancher/dartboard/tree/20220923_250_pods_per_node) branch.
Note in particular [inputs.tf](../inputs.tf) for the main parameters.
@ -58,7 +58,7 @@ Note in particular [inputs.tf](../inputs.tf) for the main parameters.
- get [Terraform](https://www.terraform.io/downloads)
- check out this project
```shell
git clone https://github.com/moio/scalability-tests.git
git clone https://github.com/rancher/dartboard.git
cd scalability-tests
git checkout 20220923_250_pods_per_node
```

View File

@ -45,7 +45,7 @@ See [the rke2 installation script in this repo](../rke2/install_rke2.sh) for det
## Full configuration details
All infrastructure is defined via [Terraform](https://www.terraform.io/) files in the [20220928_250_pods_per_node_with_cluster_operations](https://github.com/moio/scalability-tests/tree/20220928_250_pods_per_node_with_cluster_operations) branch.
All infrastructure is defined via [Terraform](https://www.terraform.io/) files in the [20220928_250_pods_per_node_with_cluster_operations](https://github.com/rancher/dartboard/tree/20220928_250_pods_per_node_with_cluster_operations) branch.
Note in particular [inputs.tf](../inputs.tf) for the main parameters.
@ -62,7 +62,7 @@ Note in particular [inputs.tf](../inputs.tf) for the main parameters.
- get [Terraform](https://www.terraform.io/downloads)
- check out this project
```shell
git clone https://github.com/moio/scalability-tests.git
git clone https://github.com/rancher/dartboard.git
cd scalability-tests
git checkout 20220928_250_pods_per_node_with_cluster_operations
```

View File

@ -79,8 +79,8 @@ See [the rke2 installation script in this repo](../rke2/install_rke2.sh) for det
## Full configuration details
All infrastructure is defined via [Terraform](https://www.terraform.io/) files in the [20221003_300_pods_per_node](https://github.com/moio/scalability-tests/tree/20221003_300_pods_per_node) branch. Note in particular [inputs.tf](../inputs.tf) for the main parameters.
All tests are driven by [Cypress](https://www.cypress.io/) files in the [cypress](https://github.com/moio/scalability-tests/tree/20221003_300_pods_per_node/cypress) directory.
All infrastructure is defined via [Terraform](https://www.terraform.io/) files in the [20221003_300_pods_per_node](https://github.com/rancher/dartboard/tree/20221003_300_pods_per_node) branch. Note in particular [inputs.tf](../inputs.tf) for the main parameters.
All tests are driven by [Cypress](https://www.cypress.io/) files in the [cypress](https://github.com/rancher/dartboard/tree/20221003_300_pods_per_node/cypress) directory.
## Reproduction Instructions
@ -102,7 +102,7 @@ All tests are driven by [Cypress](https://www.cypress.io/) files in the [cypress
- clone this project:
```shell
git clone https://github.com/moio/scalability-tests.git
git clone https://github.com/rancher/dartboard.git
cd scalability-tests
git checkout 20221003_300_pods_per_node
```

View File

@ -69,9 +69,9 @@ References:
## Full configuration details
All infrastructure is defined via [Terraform](https://www.terraform.io/) files in the [20221128_api_load_benchmarks](https://github.com/moio/scalability-tests/tree/20221128_api_load_benchmarks/terraform) branch. Note in particular [inputs.tf](https://github.com/moio/scalability-tests/blob/20221128_api_load_benchmarks/terraform/inputs.tf) for the main parameters.
Initial configuration is driven by [Cypress](https://www.cypress.io/) files in the [cypress/e2e](https://github.com/moio/scalability-tests/tree/20221128_api_load_benchmarks/cypress/cypress/e2e) directory.
Benchmark Python scripts are available in the [util](https://github.com/moio/scalability-tests/tree/20221128_api_load_benchmarks/util) directory.
All infrastructure is defined via [Terraform](https://www.terraform.io/) files in the [20221128_api_load_benchmarks](https://github.com/rancher/dartboard/tree/20221128_api_load_benchmarks/terraform) branch. Note in particular [inputs.tf](https://github.com/rancher/dartboard/blob/20221128_api_load_benchmarks/terraform/inputs.tf) for the main parameters.
Initial configuration is driven by [Cypress](https://www.cypress.io/) files in the [cypress/e2e](https://github.com/rancher/dartboard/tree/20221128_api_load_benchmarks/cypress/cypress/e2e) directory.
Benchmark Python scripts are available in the [util](https://github.com/rancher/dartboard/tree/20221128_api_load_benchmarks/util) directory.
## Reproduction Instructions
@ -92,7 +92,7 @@ Benchmark Python scripts are available in the [util](https://github.com/moio/sca
- clone this project:
```shell
git clone https://github.com/moio/scalability-tests.git
git clone https://github.com/rancher/dartboard.git
cd scalability-tests
git checkout 20221128_api_load_benchmarks
```

View File

@ -45,9 +45,9 @@ References:
## Full configuration details
All infrastructure is defined via [Terraform](https://www.terraform.io/) files in the [20221130_can-i_microbenchmark](https://github.com/moio/scalability-tests/tree/20221128_api_load_benchmarks/terraform) branch. Note in particular [inputs.tf](https://github.com/moio/scalability-tests/blob/20221130_can-i_microbenchmark/terraform/inputs.tf) for the main parameters.
Initial configuration is driven by [Cypress](https://www.cypress.io/) files in the [cypress/e2e](https://github.com/moio/scalability-tests/tree/20221130_can-i_microbenchmark/cypress/cypress/e2e) directory.
Benchmark Python scripts are available in the [util](https://github.com/moio/scalability-tests/tree/20221130_can-i_microbenchmark/util) directory.
All infrastructure is defined via [Terraform](https://www.terraform.io/) files in the [20221130_can-i_microbenchmark](https://github.com/rancher/dartboard/tree/20221128_api_load_benchmarks/terraform) branch. Note in particular [inputs.tf](https://github.com/rancher/dartboard/blob/20221130_can-i_microbenchmark/terraform/inputs.tf) for the main parameters.
Initial configuration is driven by [Cypress](https://www.cypress.io/) files in the [cypress/e2e](https://github.com/rancher/dartboard/tree/20221130_can-i_microbenchmark/cypress/cypress/e2e) directory.
Benchmark Python scripts are available in the [util](https://github.com/rancher/dartboard/tree/20221130_can-i_microbenchmark/util) directory.
## Reproduction Instructions
@ -68,7 +68,7 @@ Benchmark Python scripts are available in the [util](https://github.com/moio/sca
- clone this project:
```shell
git clone https://github.com/moio/scalability-tests.git
git clone https://github.com/rancher/dartboard.git
cd scalability-tests
git checkout 20221130_can-i_microbenchmark
```

View File

@ -29,8 +29,8 @@ No significant difference in list performance of small ConfigMaps, up to 256K of
## Full configuration details
All infrastructure is defined via [Terraform](https://www.terraform.io/) files in the [20221215_kine_locality_test](https://github.com/moio/scalability-tests/tree/20221215_kine_locality_test/terraform) branch.
Benchmark Python script is available in the [util](https://github.com/moio/scalability-tests/tree/20221215_kine_locality_test/util) directory.
All infrastructure is defined via [Terraform](https://www.terraform.io/) files in the [20221215_kine_locality_test](https://github.com/rancher/dartboard/tree/20221215_kine_locality_test/terraform) branch.
Benchmark Python script is available in the [util](https://github.com/rancher/dartboard/tree/20221215_kine_locality_test/util) directory.
## Reproduction Instructions
@ -50,7 +50,7 @@ Benchmark Python script is available in the [util](https://github.com/moio/scala
- clone this project:
```shell
git clone https://github.com/moio/scalability-tests.git
git clone https://github.com/rancher/dartboard.git
cd scalability-tests
git checkout 20221215_kine_locality_test
```

View File

@ -37,9 +37,9 @@ Under test conditions, according to collected measures described below:
## Full configuration details
All infrastructure is defined in [Terraform](https://www.terraform.io/) files in the [20230306_steve_vai_tests](https://github.com/moio/scalability-tests/tree/20230306_steve_vai_tests/terraform) branch.
All infrastructure is defined in [Terraform](https://www.terraform.io/) files in the [20230306_steve_vai_tests](https://github.com/rancher/dartboard/tree/20230306_steve_vai_tests/terraform) branch.
[k6](https://k6.io) load test scripts are defined in the [k6](https://github.com/moio/scalability-tests/tree/20230306_steve_vai_tests/k6) directory.
[k6](https://k6.io) load test scripts are defined in the [k6](https://github.com/rancher/dartboard/tree/20230306_steve_vai_tests/k6) directory.
## Reproduction Instructions
@ -57,7 +57,7 @@ All infrastructure is defined in [Terraform](https://www.terraform.io/) files in
Deploy the k3d infrastructure and install Rancher:
```shell
# clone this project
git clone https://github.com/moio/scalability-tests.git
git clone https://github.com/rancher/dartboard.git
cd scalability-tests
git checkout 20230306_steve_vai_tests
@ -155,7 +155,7 @@ In the example above, retrieving a page up to 100 resources from the local clust
#### Analysis of results
Full results are available in the [20230306 - steve vai test results](https://github.com/moio/scalability-tests/tree/20230306_steve_vai_tests/docs/20230306%20-%20steve%20vai%20test%20results) directory, summary is in the table below:
Full results are available in the [20230306 - steve vai test results](https://github.com/rancher/dartboard/tree/20230306_steve_vai_tests/docs/20230306%20-%20steve%20vai%20test%20results) directory, summary is in the table below:
![table showing a summary of results](images/20230306-table.png)
An [Excel file](https://mysuse-my.sharepoint.com/:x:/g/personal/moio_suse_com/ERaeDyfE25xLoQFKiMYa8bgBOb2z24wKNhTp0FVMVumDMA?e=nGOPMy) is available for SUSE employees.

View File

@ -39,9 +39,9 @@ Under test conditions, according to collected measures described below:
## Full configuration details
All infrastructure is defined in [Terraform](https://www.terraform.io/) files in the [20230503_steve_vai_tests_higher_scale](https://github.com/moio/scalability-tests/tree/20230503_steve_vai_tests_higher_scale/terraform) branch.
All infrastructure is defined in [Terraform](https://www.terraform.io/) files in the [20230503_steve_vai_tests_higher_scale](https://github.com/rancher/dartboard/tree/20230503_steve_vai_tests_higher_scale/terraform) branch.
[k6](https://k6.io) load test scripts are defined in the [k6](https://github.com/moio/scalability-tests/tree/20230503_steve_vai_tests_higher_scale/k6) directory.
[k6](https://k6.io) load test scripts are defined in the [k6](https://github.com/rancher/dartboard/tree/20230503_steve_vai_tests_higher_scale/k6) directory.
## Reproduction Instructions
@ -86,7 +86,7 @@ TAG=vai make quickbuild
Deploy the k3d infrastructure, install Rancher, set up clusters for tests, import built images:
```shell
# clone this project
git clone https://github.com/moio/scalability-tests.git
git clone https://github.com/rancher/dartboard.git
cd scalability-tests
git checkout 20230503_steve_vai_tests_higher_scale
@ -108,7 +108,7 @@ First, we create a given number of ConfigMaps in a test namespace via a k6 scrip
Then, we simulate 10 virtual users listing all ConfigMaps in that namespace via another k6 script. Each user will repeat the listing 30 times (for statistical accuracy of measures). The page size is of 100, like in the current UI. We exercise both the k8s based pagination implementation, using the `limit`/`continue` parameters and currently used by the [dashboard](https://github.com/rancher/dashboard/) UI, as well as the new Steve-cache pagination implementation using the `page`/`pagesize` parameters. We test both local and downstream cluster. Tests are repeated for `baseline` and `vai` images.
Details on tests are available in the [bin/run_test.js](https://github.com/moio/scalability-tests/blob/20230503_steve_vai_tests_higher_scale/bin/run_tests.mjs) script source file.
Details on tests are available in the [bin/run_test.js](https://github.com/rancher/dartboard/blob/20230503_steve_vai_tests_higher_scale/bin/run_tests.mjs) script source file.
#### Procedure
@ -133,7 +133,7 @@ Interpreting results: the script will output one `results.csv` file with the fol
* `p(99)` 99th percentile - 99% of requests had a duration less than or equal to this value
* `count` total number of requests
Full results are available in the [results.csv](https://github.com/moio/scalability-tests/tree/20230503_steve_vai_tests_higher_scale/docs/20230503%20-%20steve%20vai%20test%20higher%20scale%20results/results.csv) file, summary is in the table below:
Full results are available in the [results.csv](https://github.com/rancher/dartboard/tree/20230503_steve_vai_tests_higher_scale/docs/20230503%20-%20steve%20vai%20test%20higher%20scale%20results/results.csv) file, summary is in the table below:
![table showing a summary of results](images/20230503-table.png)
An [Excel file](https://mysuse-my.sharepoint.com/:x:/g/personal/moio_suse_com/ETkus1LxojlBm7aYWdswNX0BmmkfrQt0NET3oO6QujnNgw?e=bexG44) is available for SUSE employees.

View File

@ -75,7 +75,7 @@ The increase in the CPU usage to almost 5 cores in _fig.1_ and 7 cores in _fig.2
Note that, in absence of resources (users, roles, projects, ConfigMaps, Secrets) the load is minimal and the CPU load varies from ~0.750 to ~0.950 cores. In those conditions, base OS processes running on the nodes have a higher CPU footprint.
You can find more screenshots of the resource usage of these two scenarios in the [100 clusters x 1 node grafana screenshots folder](https://github.com/moio/scalability-tests/tree/20231222_rke2_100_clusters_1vs3_nodes_comparison/docs/20231222%20-%20RKE2%20100%20clusters%201vs3%20nodes%20comparison/grafana-screenshots/RKE4x) and in the [100 clusters x 3 nodes grafana screenshots folder](https://github.com/moio/scalability-tests/tree/20231222_rke2_100_clusters_1vs3_nodes_comparison/docs/20231222%20-%20RKE2%20100%20clusters%201vs3%20nodes%20comparison/grafana-screenshots/RKE5x).
You can find more screenshots of the resource usage of these two scenarios in the [100 clusters x 1 node grafana screenshots folder](https://github.com/rancher/dartboard/tree/20231222_rke2_100_clusters_1vs3_nodes_comparison/docs/20231222%20-%20RKE2%20100%20clusters%201vs3%20nodes%20comparison/grafana-screenshots/RKE4x) and in the [100 clusters x 3 nodes grafana screenshots folder](https://github.com/rancher/dartboard/tree/20231222_rke2_100_clusters_1vs3_nodes_comparison/docs/20231222%20-%20RKE2%20100%20clusters%201vs3%20nodes%20comparison/grafana-screenshots/RKE5x).
### Rancher API response time benchmarks
@ -104,9 +104,9 @@ For more results, check the available data shared in the [available data section
## Full configuration details
All infrastructure is defined in [Terraform](https://www.terraform.io/) files in the [20231222_rke2_100_clusters_1vs3_nodes_comparison](https://github.com/moio/scalability-tests/tree/20231222_rke2_100_clusters_1vs3_nodes_comparison/terraform) branch.
All infrastructure is defined in [Terraform](https://www.terraform.io/) files in the [20231222_rke2_100_clusters_1vs3_nodes_comparison](https://github.com/rancher/dartboard/tree/20231222_rke2_100_clusters_1vs3_nodes_comparison/terraform) branch.
[k6](https://k6.io) load test scripts are defined in the [k6](https://github.com/moio/scalability-tests/tree/20231222_rke2_100_clusters_1vs3_nodes_comparison/k6) directory.
[k6](https://k6.io) load test scripts are defined in the [k6](https://github.com/rancher/dartboard/tree/20231222_rke2_100_clusters_1vs3_nodes_comparison/k6) directory.
## Reproduction Instructions
@ -128,7 +128,7 @@ Log into Azure via the CLI:
Deploy the RKE2 environment, install Rancher, set up clusters for tests:
```shell
# clone this project
git clone -b 20231222_rke2_100_clusters_1vs3_nodes_comparison https://github.com/moio/scalability-tests.git
git clone -b 20231222_rke2_100_clusters_1vs3_nodes_comparison https://github.com/rancher/dartboard.git
cd scalability-tests
export TERRAFORM_WORK_DIR=terraform/main/azure
@ -137,7 +137,7 @@ export TERRAFORM_WORK_DIR=terraform/main/azure
./bin/setup.mjs && ./bin/run_tests.mjs
````
>[!NOTE]
>by default the branch will setup the 100 clusters x 1 node scenario: if you want to run the 100 clusters x 3 nodes one you may want to change the server_count value in the [azure configuration file](https://github.com/moio/scalability-tests/blob/20231222_rke2_100_clusters_1vs3_nodes_comparison/terraform/main/azure/inputs.tf#L28) to 3 before running the /bin/setup.mjs && ./bin/run_tests.mjs command.
>by default the branch will setup the 100 clusters x 1 node scenario: if you want to run the 100 clusters x 3 nodes one you may want to change the server_count value in the [azure configuration file](https://github.com/rancher/dartboard/blob/20231222_rke2_100_clusters_1vs3_nodes_comparison/terraform/main/azure/inputs.tf#L28) to 3 before running the /bin/setup.mjs && ./bin/run_tests.mjs command.
Once the system is provisioned, to get Rancher and clusters access info:
@ -175,9 +175,9 @@ important output data points are:
## Available data
All the data collected from the tests is shared in the [_RKE2 100 clusters 1vs3 nodes comparison_ results directory](https://github.com/moio/scalability-tests/tree/20231222_rke2_100_clusters_1vs3_nodes_comparison/docs/20231222%20-%20RKE2%20100%20clusters%201vs3%20nodes%20comparison/).
All the data collected from the tests is shared in the [_RKE2 100 clusters 1vs3 nodes comparison_ results directory](https://github.com/rancher/dartboard/tree/20231222_rke2_100_clusters_1vs3_nodes_comparison/docs/20231222%20-%20RKE2%20100%20clusters%201vs3%20nodes%20comparison/).
There you will find:
* a list of the deployed scenarios ([_list.txt_ file](https://github.com/moio/scalability-tests/tree/20231222_rke2_100_clusters_1vs3_nodes_comparison/docs/20231222%20-%20RKE2%20100%20clusters%201vs3%20nodes%20comparison/list.txt))
* the raw data from the single tests ([_tests_raw_data_ folder](https://github.com/moio/scalability-tests/tree/20231222_rke2_100_clusters_1vs3_nodes_comparison/docs/20231222%20-%20RKE2%20100%20clusters%201vs3%20nodes%20comparison/tests_raw_data))
* screenshots from a Grafana dashboard showing CPU and Memory usage from the tested scenarios ([_grafana-screenshots_ folder](https://github.com/moio/scalability-tests/tree/20231222_rke2_100_clusters_1vs3_nodes_comparison/docs/20231222%20-%20RKE2%20100%20clusters%201vs3%20nodes%20comparison/grafana-screenshots))
* an [OpenDocument spreadsheet](https://github.com/moio/scalability-tests/tree/20231222_rke2_100_clusters_1vs3_nodes_comparison/docs/20231222%20-%20RKE2%20100%20clusters%201vs3%20nodes%20comparison/scalability_tests_summary.ods) containing all the results with few graphs
* a list of the deployed scenarios ([_list.txt_ file](https://github.com/rancher/dartboard/tree/20231222_rke2_100_clusters_1vs3_nodes_comparison/docs/20231222%20-%20RKE2%20100%20clusters%201vs3%20nodes%20comparison/list.txt))
* the raw data from the single tests ([_tests_raw_data_ folder](https://github.com/rancher/dartboard/tree/20231222_rke2_100_clusters_1vs3_nodes_comparison/docs/20231222%20-%20RKE2%20100%20clusters%201vs3%20nodes%20comparison/tests_raw_data))
* screenshots from a Grafana dashboard showing CPU and Memory usage from the tested scenarios ([_grafana-screenshots_ folder](https://github.com/rancher/dartboard/tree/20231222_rke2_100_clusters_1vs3_nodes_comparison/docs/20231222%20-%20RKE2%20100%20clusters%201vs3%20nodes%20comparison/grafana-screenshots))
* an [OpenDocument spreadsheet](https://github.com/rancher/dartboard/tree/20231222_rke2_100_clusters_1vs3_nodes_comparison/docs/20231222%20-%20RKE2%20100%20clusters%201vs3%20nodes%20comparison/scalability_tests_summary.ods) containing all the results with few graphs

View File

@ -32,6 +32,7 @@ type ChartVariables struct {
RancherMonitoringVersion string `yaml:"rancher_monitoring_version"`
CertManagerVersion string `yaml:"cert_manager_version"`
TesterGrafanaVersion string `yaml:"tester_grafana_version"`
RancherValues string `yaml:"rancher_values"`
}
type TestVariables struct {

View File

@ -25,7 +25,7 @@ import (
"github.com/rancher/dartboard/internal/vendored"
)
func Install(kubecfg, chartLocation, releaseName, namespace string, vals map[string]any) error {
func Install(kubecfg, chartLocation, releaseName, namespace string, vals map[string]any, extraArgs ...string) error {
args := []string{
"--kubeconfig=" + kubecfg,
"upgrade",
@ -46,6 +46,7 @@ func Install(kubecfg, chartLocation, releaseName, namespace string, vals map[str
}
args = append(args, "--set-json="+valueString)
}
args = append(args, extraArgs...)
cmd := vendored.Command("helm", args...)
var errStream strings.Builder

View File

@ -21,12 +21,11 @@ import (
"os"
"strings"
"github.com/rancher/dartboard/internal/tofu"
"github.com/rancher/dartboard/internal/vendored"
)
func ImageImport(cluster tofu.Cluster, image string) error {
args := []string{"image", "import", "--cluster", strings.Replace(cluster.Context, "k3d-", "", -1), image}
func ImageImport(k3dClusterName string, image string) error {
args := []string{"image", "import", "--cluster", k3dClusterName, image}
cmd := vendored.Command("k3d", args...)
var errStream strings.Builder

View File

@ -162,7 +162,7 @@ func K6run(kubeconfig, testPath string, envVars, tags map[string]string, printLo
quotedArgs = append(quotedArgs, "-e", shellescape.Quote(fmt.Sprintf("%s=%s", k, v)))
}
quotedArgs = append(quotedArgs, shellescape.Quote(testPath))
log.Printf("Running equivalent of:\nk6 %s\n", strings.Join(quotedArgs, " "))
log.Printf("Running equivalent of:\n./bin/k6 %s\n", strings.Join(quotedArgs, " "))
// if a kubeconfig is specified, upload it as secret to later mount it
if path, ok := envVars["KUBECONFIG"]; ok {

View File

@ -52,6 +52,7 @@ type Addresses struct {
type Cluster struct {
AppAddresses ClusterAppAddresses `json:"app_addresses"`
Name string `json:"name"`
Context string `json:"context"`
IngressClassName string `json:"ingress_class_name"`
Kubeconfig string `json:"kubeconfig"`

View File

@ -8,6 +8,7 @@ module "network" {
project_name = var.project_name
region = var.region
availability_zone = var.availability_zone
existing_vpc_name = var.existing_vpc_name
bastion_host_ami = length(var.bastion_host_ami) > 0 ? var.bastion_host_ami : null
ssh_bastion_user = var.ssh_bastion_user
ssh_public_key_path = var.ssh_public_key_path

View File

@ -87,6 +87,12 @@ variable "availability_zone" {
default = "us-east-1a"
}
variable "existing_vpc_name" {
description = "Name of existing VPC to use. If null, a new VPC will be created"
type = string
default = null
}
variable "bastion_host_ami" {
description = "AMI ID"
default = "ami-0e55a8b472a265e3f"

View File

@ -1,3 +1,9 @@
provider "k3d" {
fixes = {
"dns" = false
}
}
module "network" {
source = "../../modules/k3d/network"
project_name = var.project_name

View File

@ -7,7 +7,7 @@ terraform {
}
k3d = {
source = "moio/k3d"
version = "0.0.10"
version = "0.0.12"
}
}
}

View File

@ -0,0 +1,51 @@
# Data source to look up existing VPC
data "aws_vpc" "existing" {
count = local.create_vpc ? 0 : 1
filter {
name = "tag:Name"
values = [var.existing_vpc_name]
}
}
data "aws_internet_gateway" "existing" {
count = local.create_vpc ? 0 : 1
filter {
name = "attachment.vpc-id"
values = [local.vpc_id]
}
}
# Data sources to look up existing subnets
data "aws_subnet" "public" {
count = local.create_vpc ? 0 : 1
vpc_id = one(data.aws_vpc.existing[*].id)
availability_zone = var.availability_zone
tags = {
Name = "*public*",
Tier = "Public"
}
}
data "aws_subnet" "private" {
count = local.create_vpc ? 0 : 1
vpc_id = one(data.aws_vpc.existing[*].id)
availability_zone = var.availability_zone
tags = {
Name = "*private*"
Tier = "Private"
}
}
data "aws_subnet" "secondary_private" {
count = local.create_vpc && var.secondary_availability_zone != null ? 0 : 1
vpc_id = one(data.aws_vpc.existing[*].id)
availability_zone = var.secondary_availability_zone
tags = {
Name = "*secondary*private*"
Tier = "SecondaryPrivate"
}
}

View File

@ -1,11 +1,6 @@
/*
This module sets up a class B VPC sliced into three subnets, one public and one or two private.
The public network has an Internet Gateway and accepts SSH connections only.
The private networks have Internet access but do not accept any connections.
A secondary private connection is optional, and is used to support RDS use cases.
*/
# VPC resource created only when existing_vpc_name is null
resource "aws_vpc" "main" {
count = local.create_vpc ? 1 : 0
cidr_block = "172.16.0.0/16"
enable_dns_support = true
enable_dns_hostnames = true
@ -16,7 +11,21 @@ resource "aws_vpc" "main" {
}
}
# Update locals to use coalescing for resource selection
locals {
vpc_id = coalesce(one(aws_vpc.main[*].id), one(data.aws_vpc.existing[*].id))
vpc_cidr_block = coalesce(one(aws_vpc.main[*].cidr_block), one(data.aws_vpc.existing[*].cidr_block))
internet_gateway_id = coalesce(one(aws_internet_gateway.main[*].id), one(data.aws_internet_gateway.existing[*].id))
public_subnet_id = coalesce(one(aws_subnet.public[*].id), one(data.aws_subnet.public[*].id))
private_subnet_id = coalesce(one(aws_subnet.private[*].id), one(data.aws_subnet.private[*].id))
secondary_private_subnet_id = coalesce(one(aws_subnet.secondary_private[*].id), one(data.aws_subnet.secondary_private[*].id))
create_vpc = var.existing_vpc_name == null
}
resource "aws_internet_gateway" "main" {
count = local.create_vpc ? 1 : 0
vpc_id = local.vpc_id
tags = {
@ -25,12 +34,8 @@ resource "aws_internet_gateway" "main" {
}
}
locals {
vpc_id = aws_vpc.main.id
vpc_cidr_block = aws_vpc.main.cidr_block
}
resource "aws_eip" "nat_eip" {
tags = {
Project = var.project_name
Name = "${var.project_name}-nat-eip"
@ -39,9 +44,9 @@ resource "aws_eip" "nat_eip" {
resource "aws_nat_gateway" "nat" {
allocation_id = aws_eip.nat_eip.id
subnet_id = aws_subnet.public.id
subnet_id = local.public_subnet_id
depends_on = [aws_internet_gateway.main]
depends_on = [data.aws_internet_gateway.existing, aws_internet_gateway.main]
tags = {
Project = var.project_name
@ -49,17 +54,52 @@ resource "aws_nat_gateway" "nat" {
}
}
resource "aws_route_table" "public" {
vpc_id = local.vpc_id
route {
cidr_block = "0.0.0.0/0"
gateway_id = aws_internet_gateway.main.id
}
resource "aws_subnet" "public" {
count = local.create_vpc ? 1 : 0
availability_zone = var.availability_zone
vpc_id = local.vpc_id
cidr_block = "172.16.0.0/24"
map_public_ip_on_launch = true
tags = {
Project = var.project_name
Name = "${var.project_name}-public-route-table"
Name = "${var.project_name}-public-subnet"
}
}
resource "aws_subnet" "private" {
count = local.create_vpc ? 1 : 0
availability_zone = var.availability_zone
vpc_id = local.vpc_id
cidr_block = "172.16.1.0/24"
map_public_ip_on_launch = false
tags = {
Project = var.project_name
Name = "${var.project_name}-private-subnet"
}
}
resource "aws_subnet" "secondary_private" {
count = local.create_vpc && var.secondary_availability_zone != null ? 1 : 0
availability_zone = var.secondary_availability_zone
vpc_id = local.vpc_id
cidr_block = "172.16.2.0/24"
map_public_ip_on_launch = false
tags = {
Project = var.project_name
Name = "${var.project_name}-secondary-private-subnet"
}
}
resource "aws_key_pair" "key_pair" {
key_name = "${var.project_name}-key-pair"
public_key = file(var.ssh_public_key_path)
tags = {
Project = var.project_name
Name = "${var.project_name}-ssh-key-pair"
}
}
@ -68,6 +108,20 @@ resource "aws_main_route_table_association" "vpc_internet" {
route_table_id = aws_route_table.public.id
}
resource "aws_route_table" "public" {
vpc_id = local.vpc_id
route {
cidr_block = "0.0.0.0/0"
gateway_id = local.internet_gateway_id
}
tags = {
Project = var.project_name
Name = "${var.project_name}-public-route-table"
}
}
resource "aws_route_table" "private" {
vpc_id = local.vpc_id
@ -82,53 +136,16 @@ resource "aws_route_table" "private" {
}
}
resource "aws_subnet" "public" {
availability_zone = var.availability_zone
vpc_id = local.vpc_id
cidr_block = "172.16.0.0/24"
map_public_ip_on_launch = true
tags = {
Project = var.project_name
Name = "${var.project_name}-public-subnet"
}
}
resource "aws_route_table_association" "public" {
subnet_id = aws_subnet.public.id
subnet_id = local.public_subnet_id
route_table_id = aws_route_table.public.id
}
resource "aws_subnet" "private" {
availability_zone = var.availability_zone
vpc_id = local.vpc_id
cidr_block = "172.16.1.0/24"
map_public_ip_on_launch = false
tags = {
Project = var.project_name
Name = "${var.project_name}-private-subnet"
}
}
resource "aws_route_table_association" "private" {
subnet_id = aws_subnet.private.id
subnet_id = local.private_subnet_id
route_table_id = aws_route_table.private.id
}
resource "aws_subnet" "secondary_private" {
count = var.secondary_availability_zone != null ? 1 : 0
availability_zone = var.secondary_availability_zone
vpc_id = local.vpc_id
cidr_block = "172.16.2.0/24"
map_public_ip_on_launch = false
tags = {
Project = var.project_name
Name = "${var.project_name}-secondary-private-subnet"
}
}
resource "aws_route_table_association" "secondary_private" {
count = var.secondary_availability_zone != null ? 1 : 0
subnet_id = aws_subnet.secondary_private[0].id
@ -136,6 +153,7 @@ resource "aws_route_table_association" "secondary_private" {
}
resource "aws_vpc_dhcp_options" "dhcp_options" {
count = local.create_vpc ? 1 : 0
domain_name = var.region == "us-east-1" ? "ec2.internal" : "${var.region}.compute.internal"
domain_name_servers = ["AmazonProvidedDNS"]
@ -146,8 +164,9 @@ resource "aws_vpc_dhcp_options" "dhcp_options" {
}
resource "aws_vpc_dhcp_options_association" "vpc_dhcp_options" {
count = local.create_vpc ? 1 : 0
vpc_id = local.vpc_id
dhcp_options_id = aws_vpc_dhcp_options.dhcp_options.id
dhcp_options_id = aws_vpc_dhcp_options.dhcp_options[0].id
}
resource "aws_security_group" "public" {
@ -222,16 +241,7 @@ resource "aws_security_group" "private" {
}
}
resource "aws_key_pair" "key_pair" {
key_name = "${var.project_name}-key-pair"
public_key = file(var.ssh_public_key_path)
tags = {
Project = var.project_name
Name = "${var.project_name}-ssh-key-pair"
}
}
# Update the bastion module configuration
module "bastion" {
source = "../node"
project_name = var.project_name
@ -247,8 +257,8 @@ module "bastion" {
}
network_config = {
availability_zone : var.availability_zone,
public_subnet_id : aws_subnet.public.id
private_subnet_id : aws_subnet.private.id
public_subnet_id : local.public_subnet_id
private_subnet_id : local.private_subnet_id
secondary_private_subnet_id : var.secondary_availability_zone != null ? aws_subnet.secondary_private[0].id : null
public_security_group_id : aws_security_group.public.id
private_security_group_id : aws_security_group.private.id

View File

@ -1,9 +1,9 @@
output "config" {
value = {
availability_zone : var.availability_zone,
public_subnet_id : aws_subnet.public.id,
private_subnet_id : aws_subnet.private.id,
secondary_private_subnet_id : var.secondary_availability_zone != null ? aws_subnet.secondary_private[0].id : null,
public_subnet_id : local.public_subnet_id,
private_subnet_id : local.private_subnet_id,
secondary_private_subnet_id : var.secondary_availability_zone != null ? local.secondary_private_subnet_id : null,
public_security_group_id : aws_security_group.public.id,
private_security_group_id : aws_security_group.private.id,
ssh_key_name : aws_key_pair.key_pair.key_name,

View File

@ -45,3 +45,10 @@ variable "bastion_host_instance_type" {
description = "EC2 instance type"
default = "t4g.small"
}
# Variables for existing VPC configuration
variable "existing_vpc_name" {
description = "Name of existing VPC to use. If null, a new VPC will be created"
type = string
default = null
}

View File

@ -15,11 +15,13 @@ variable "distro_version" {
variable "server_count" {
description = "Number of server nodes in this cluster"
type = number
default = 1
}
variable "agent_count" {
description = "Number of agent nodes in this cluster"
type = number
default = 0
}
@ -41,16 +43,19 @@ variable "ssh_user" {
variable "local_kubernetes_api_port" {
description = "Local port this cluster's Kubernetes API will be published to (via SSH tunnel)"
type = number
default = 6445
}
variable "tunnel_app_http_port" {
description = "Local port this cluster's http endpoints will be published to (via SSH tunnel)"
type = number
default = 8080
}
variable "tunnel_app_https_port" {
description = "Local port this cluster's https endpoints will be published to (via SSH tunnel)"
type = number
default = 8443
}
@ -62,11 +67,13 @@ variable "sans" {
variable "max_pods" {
description = "Maximum number of pods per node"
type = number
default = 110
}
variable "node_cidr_mask_size" {
description = "Size of the CIDR mask for nodes. Increase when increasing max_pods so that 2^(32-node_cidr_max_size) > 2 * max_pods"
type = number
default = 24
}

View File

@ -16,11 +16,13 @@ variable "distro_version" {
variable "server_count" {
description = "Number of server nodes in this cluster"
type = number
default = 1
}
variable "agent_count" {
description = "Number of agent nodes in this cluster"
type = number
default = 0
}
@ -42,16 +44,19 @@ variable "ssh_user" {
variable "local_kubernetes_api_port" {
description = "Port this cluster's Kubernetes API will be published to (for inclusion in kubeconfig)"
type = number
default = 6443
}
variable "tunnel_app_http_port" {
description = "Local port this cluster's http endpoints will be published to (via SSH tunnel)"
type = number
default = 8080
}
variable "tunnel_app_https_port" {
description = "Local port this cluster's https endpoints will be published to (via SSH tunnel)"
type = number
default = 8443
}
@ -63,11 +68,13 @@ variable "sans" {
variable "max_pods" {
description = "Maximum number of pods per node"
type = number
default = 110
}
variable "node_cidr_mask_size" {
description = "Size of the CIDR mask for nodes. Increase when increasing max_pods so that 2^(32-node_cidr_max_size) > 2 * max_pods"
type = number
default = 24
}

View File

@ -98,15 +98,18 @@ variable "project_name" {
variable "first_kubernetes_api_port" {
description = "Port number where the Kubernetes API of the first cluster is published locally. Other clusters' ports are published in successive ports"
type = number
default = 7445
}
variable "first_app_http_port" {
description = "Port number where the first server's port 80 is published locally. Other clusters' ports are published in successive ports"
type = number
default = 9080
}
variable "first_app_https_port" {
description = "Port number where the first server's port 443 is published locally. Other clusters' ports are published in successive ports"
type = number
default = 9443
}

View File

@ -325,6 +325,7 @@ resource "k3d_cluster" "cluster" {
locals {
local_kubernetes_api_url = nonsensitive(k3d_cluster.cluster[0].credentials[0].host)
k3d_cluster_name = "${var.project_name}-${var.name}"
}
resource "local_file" "kubeconfig" {
@ -337,19 +338,19 @@ resource "local_file" "kubeconfig" {
certificate-authority-data = base64encode(k3d_cluster.cluster[0].credentials[0].cluster_ca_certificate)
server = local.local_kubernetes_api_url
}
name = "k3d-${var.project_name}-${var.name}"
name = "k3d-${local.k3d_cluster_name}"
}
]
contexts = [
{
context = {
cluster = "k3d-${var.project_name}-${var.name}"
user : "admin@k3d-${var.project_name}-${var.name}"
cluster = "k3d-${local.k3d_cluster_name}"
user : "admin@k3d-${local.k3d_cluster_name}"
}
name = "k3d-${var.project_name}-${var.name}"
name = "k3d-${local.k3d_cluster_name}"
}
]
current-context = "k3d-${var.project_name}-${var.name}"
current-context = "k3d-${local.k3d_cluster_name}"
kind = "Config"
preferences = {}
users = [
@ -358,7 +359,7 @@ resource "local_file" "kubeconfig" {
client-certificate-data : base64encode(k3d_cluster.cluster[0].credentials[0].client_certificate)
client-key-data : base64encode(k3d_cluster.cluster[0].credentials[0].client_key)
}
name : "admin@k3d-${var.project_name}-${var.name}"
name : "admin@k3d-${local.k3d_cluster_name}"
}
]
})

View File

@ -1,14 +1,15 @@
output "config" {
value = {
kubeconfig = var.server_count > 0 ? abspath(local_file.kubeconfig[0].filename) : null
context = var.name
context = "k3d-${local.k3d_cluster_name}"
name = local.k3d_cluster_name
// addresses of the Kubernetes API server
kubernetes_addresses = {
// resolvable over the Internet
public = null
// resolvable from the network this cluster runs in
private = "k3d-${var.project_name}-${var.name}-server-0"
private = "https://k3d-${var.project_name}-${var.name}-server-0:6443"
// resolvable from the host running OpenTofu
tunnel = local.local_kubernetes_api_url
}