mirror of https://github.com/etcd-io/dbtester.git
Merge pull request #254 from gyuho/control
control: collect system metrics in client side
This commit is contained in:
commit
c1f8337339
|
|
@ -18,6 +18,10 @@ data_latency_distribution_all: data-latency-distribution-all.csv
|
|||
data_latency_throughput_timeseries: data-latency-throughput-timeseries.csv
|
||||
data_latency_by_key_number: data-latency-by-key-number.csv
|
||||
|
||||
# client machine's system metrics
|
||||
client_system_metrics: client-system-metrics.csv
|
||||
client_system_metrics_interpolated: client-system-metrics-interpolated.csv
|
||||
|
||||
# start database by sending RPC calls to agents
|
||||
step1:
|
||||
skip_start_database: false
|
||||
|
|
|
|||
|
|
@ -18,6 +18,10 @@ data_latency_distribution_all: data-latency-distribution-all.csv
|
|||
data_latency_throughput_timeseries: data-latency-throughput-timeseries.csv
|
||||
data_latency_by_key_number: data-latency-by-key-number.csv
|
||||
|
||||
# client machine's system metrics
|
||||
client_system_metrics: client-system-metrics.csv
|
||||
client_system_metrics_interpolated: client-system-metrics-interpolated.csv
|
||||
|
||||
# start database by sending RPC calls to agents
|
||||
step1:
|
||||
skip_start_database: false
|
||||
|
|
|
|||
|
|
@ -18,6 +18,10 @@ data_latency_distribution_all: data-latency-distribution-all.csv
|
|||
data_latency_throughput_timeseries: data-latency-throughput-timeseries.csv
|
||||
data_latency_by_key_number: data-latency-by-key-number.csv
|
||||
|
||||
# client machine's system metrics
|
||||
client_system_metrics: client-system-metrics.csv
|
||||
client_system_metrics_interpolated: client-system-metrics-interpolated.csv
|
||||
|
||||
# start database by sending RPC calls to agents
|
||||
step1:
|
||||
skip_start_database: false
|
||||
|
|
|
|||
|
|
@ -18,6 +18,10 @@ data_latency_distribution_all: data-latency-distribution-all.csv
|
|||
data_latency_throughput_timeseries: data-latency-throughput-timeseries.csv
|
||||
data_latency_by_key_number: data-latency-by-key-number.csv
|
||||
|
||||
# client machine's system metrics
|
||||
client_system_metrics: client-system-metrics.csv
|
||||
client_system_metrics_interpolated: client-system-metrics-interpolated.csv
|
||||
|
||||
# start database by sending RPC calls to agents
|
||||
step1:
|
||||
skip_start_database: false
|
||||
|
|
|
|||
|
|
@ -18,6 +18,10 @@ data_latency_distribution_all: data-latency-distribution-all.csv
|
|||
data_latency_throughput_timeseries: data-latency-throughput-timeseries.csv
|
||||
data_latency_by_key_number: data-latency-by-key-number.csv
|
||||
|
||||
# client machine's system metrics
|
||||
client_system_metrics: client-system-metrics.csv
|
||||
client_system_metrics_interpolated: client-system-metrics-interpolated.csv
|
||||
|
||||
# start database by sending RPC calls to agents
|
||||
step1:
|
||||
skip_start_database: false
|
||||
|
|
|
|||
|
|
@ -18,6 +18,10 @@ data_latency_distribution_all: data-latency-distribution-all.csv
|
|||
data_latency_throughput_timeseries: data-latency-throughput-timeseries.csv
|
||||
data_latency_by_key_number: data-latency-by-key-number.csv
|
||||
|
||||
# client machine's system metrics
|
||||
client_system_metrics: client-system-metrics.csv
|
||||
client_system_metrics_interpolated: client-system-metrics-interpolated.csv
|
||||
|
||||
# start database by sending RPC calls to agents
|
||||
step1:
|
||||
skip_start_database: false
|
||||
|
|
|
|||
|
|
@ -18,6 +18,10 @@ data_latency_distribution_all: data-latency-distribution-all.csv
|
|||
data_latency_throughput_timeseries: data-latency-throughput-timeseries.csv
|
||||
data_latency_by_key_number: data-latency-by-key-number.csv
|
||||
|
||||
# client machine's system metrics
|
||||
client_system_metrics: client-system-metrics.csv
|
||||
client_system_metrics_interpolated: client-system-metrics-interpolated.csv
|
||||
|
||||
# start database by sending RPC calls to agents
|
||||
step1:
|
||||
skip_start_database: false
|
||||
|
|
|
|||
|
|
@ -18,6 +18,10 @@ data_latency_distribution_all: data-latency-distribution-all.csv
|
|||
data_latency_throughput_timeseries: data-latency-throughput-timeseries.csv
|
||||
data_latency_by_key_number: data-latency-by-key-number.csv
|
||||
|
||||
# client machine's system metrics
|
||||
client_system_metrics: client-system-metrics.csv
|
||||
client_system_metrics_interpolated: client-system-metrics-interpolated.csv
|
||||
|
||||
# start database by sending RPC calls to agents
|
||||
step1:
|
||||
skip_start_database: false
|
||||
|
|
|
|||
|
|
@ -18,6 +18,10 @@ data_latency_distribution_all: data-latency-distribution-all.csv
|
|||
data_latency_throughput_timeseries: data-latency-throughput-timeseries.csv
|
||||
data_latency_by_key_number: data-latency-by-key-number.csv
|
||||
|
||||
# client machine's system metrics
|
||||
client_system_metrics: client-system-metrics.csv
|
||||
client_system_metrics_interpolated: client-system-metrics-interpolated.csv
|
||||
|
||||
# start database by sending RPC calls to agents
|
||||
step1:
|
||||
skip_start_database: false
|
||||
|
|
|
|||
|
|
@ -18,6 +18,10 @@ data_latency_distribution_all: data-latency-distribution-all.csv
|
|||
data_latency_throughput_timeseries: data-latency-throughput-timeseries.csv
|
||||
data_latency_by_key_number: data-latency-by-key-number.csv
|
||||
|
||||
# client machine's system metrics
|
||||
client_system_metrics: client-system-metrics.csv
|
||||
client_system_metrics_interpolated: client-system-metrics-interpolated.csv
|
||||
|
||||
# start database by sending RPC calls to agents
|
||||
step1:
|
||||
skip_start_database: false
|
||||
|
|
|
|||
|
|
@ -18,6 +18,10 @@ data_latency_distribution_all: data-latency-distribution-all.csv
|
|||
data_latency_throughput_timeseries: data-latency-throughput-timeseries.csv
|
||||
data_latency_by_key_number: data-latency-by-key-number.csv
|
||||
|
||||
# client machine's system metrics
|
||||
client_system_metrics: client-system-metrics.csv
|
||||
client_system_metrics_interpolated: client-system-metrics-interpolated.csv
|
||||
|
||||
# start database by sending RPC calls to agents
|
||||
step1:
|
||||
skip_start_database: false
|
||||
|
|
|
|||
|
|
@ -18,6 +18,10 @@ data_latency_distribution_all: data-latency-distribution-all.csv
|
|||
data_latency_throughput_timeseries: data-latency-throughput-timeseries.csv
|
||||
data_latency_by_key_number: data-latency-by-key-number.csv
|
||||
|
||||
# client machine's system metrics
|
||||
client_system_metrics: client-system-metrics.csv
|
||||
client_system_metrics_interpolated: client-system-metrics-interpolated.csv
|
||||
|
||||
# start database by sending RPC calls to agents
|
||||
step1:
|
||||
skip_start_database: false
|
||||
|
|
|
|||
|
|
@ -17,10 +17,13 @@ package control
|
|||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/coreos/dbtester/pkg/ntp"
|
||||
"github.com/coreos/etcd/pkg/netutil"
|
||||
"github.com/gyuho/psn"
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
|
|
@ -32,9 +35,26 @@ var Command = &cobra.Command{
|
|||
}
|
||||
|
||||
var configPath string
|
||||
var diskDevice string
|
||||
var networkInterface string
|
||||
|
||||
func init() {
|
||||
dn, err := psn.GetDevice("/")
|
||||
if err != nil {
|
||||
plog.Warningf("cannot get disk device mounted at '/' (%v)", err)
|
||||
}
|
||||
nm, err := netutil.GetDefaultInterfaces()
|
||||
if err != nil {
|
||||
plog.Warningf("cannot detect default network interface (%v)", err)
|
||||
}
|
||||
var nt string
|
||||
for k := range nm {
|
||||
nt = k
|
||||
break
|
||||
}
|
||||
Command.PersistentFlags().StringVarP(&configPath, "config", "c", "", "YAML configuration file path.")
|
||||
Command.PersistentFlags().StringVar(&diskDevice, "disk-device", dn, "Disk device to collect disk statistics metrics from.")
|
||||
Command.PersistentFlags().StringVar(&networkInterface, "network-interface", nt, "Network interface to record in/outgoing packets.")
|
||||
}
|
||||
|
||||
func commandFunc(cmd *cobra.Command, args []string) error {
|
||||
|
|
@ -52,6 +72,7 @@ func commandFunc(cmd *cobra.Command, args []string) error {
|
|||
default:
|
||||
return fmt.Errorf("%q is not supported", cfg.Database)
|
||||
}
|
||||
|
||||
if !cfg.Step2.SkipStressDatabase {
|
||||
switch cfg.Step2.BenchType {
|
||||
case "write":
|
||||
|
|
@ -70,6 +91,66 @@ func commandFunc(cmd *cobra.Command, args []string) error {
|
|||
cfg.Step4.GoogleCloudStorageKey = string(bts)
|
||||
}
|
||||
|
||||
pid := int64(os.Getpid())
|
||||
plog.Infof("starting collecting system metrics at %q [disk device: %q | network interface: %q | PID: %d]", cfg.ClientSystemMetrics, diskDevice, networkInterface, pid)
|
||||
if err = os.RemoveAll(cfg.ClientSystemMetrics); err != nil {
|
||||
return err
|
||||
}
|
||||
tcfg := &psn.TopConfig{
|
||||
Exec: psn.DefaultTopPath,
|
||||
IntervalSecond: 1,
|
||||
PID: pid,
|
||||
}
|
||||
var metricsCSV *psn.CSV
|
||||
metricsCSV, err = psn.NewCSV(
|
||||
cfg.ClientSystemMetrics,
|
||||
pid,
|
||||
diskDevice,
|
||||
networkInterface,
|
||||
"",
|
||||
tcfg,
|
||||
)
|
||||
if err = metricsCSV.Add(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
donec, sysdonec := make(chan struct{}), make(chan struct{})
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case <-time.After(time.Second):
|
||||
if err := metricsCSV.Add(); err != nil {
|
||||
plog.Errorf("psn.CSV.Add error (%v)", err)
|
||||
continue
|
||||
}
|
||||
|
||||
case <-donec:
|
||||
plog.Infof("finishing collecting system metrics; saving CSV at %q", cfg.ClientSystemMetrics)
|
||||
|
||||
if err := metricsCSV.Save(); err != nil {
|
||||
plog.Errorf("psn.CSV.Save(%q) error %v", metricsCSV.FilePath, err)
|
||||
} else {
|
||||
plog.Infof("CSV saved at %q", metricsCSV.FilePath)
|
||||
}
|
||||
|
||||
interpolated, err := metricsCSV.Interpolate()
|
||||
if err != nil {
|
||||
plog.Fatalf("psn.CSV.Interpolate(%q) failed with %v", metricsCSV.FilePath, err)
|
||||
}
|
||||
interpolated.FilePath = cfg.ClientSystemMetricsInterpolated
|
||||
if err := interpolated.Save(); err != nil {
|
||||
plog.Errorf("psn.CSV.Save(%q) error %v", interpolated.FilePath, err)
|
||||
} else {
|
||||
plog.Infof("CSV saved at %q", interpolated.FilePath)
|
||||
}
|
||||
|
||||
close(sysdonec)
|
||||
plog.Infof("finished collecting system metrics")
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// protoc sorts the 'repeated' type data
|
||||
// encode in string to enforce ordering of IPs
|
||||
cfg.PeerIPString = strings.Join(cfg.PeerIPs, "___")
|
||||
|
|
@ -82,12 +163,12 @@ func commandFunc(cmd *cobra.Command, args []string) error {
|
|||
cfg.DatabaseEndpoints[i] = fmt.Sprintf("%s:%d", cfg.PeerIPs[i], cfg.DatabasePort)
|
||||
}
|
||||
|
||||
no, nerr := ntp.DefaultSync()
|
||||
plog.Infof("npt update output: %q", no)
|
||||
plog.Infof("npt update error: %v", nerr)
|
||||
|
||||
println()
|
||||
if !cfg.Step1.SkipStartDatabase {
|
||||
no, nerr := ntp.DefaultSync()
|
||||
plog.Infof("npt update output: %q", no)
|
||||
plog.Infof("npt update error: %v", nerr)
|
||||
|
||||
plog.Info("step 1: starting databases...")
|
||||
if err = step1StartDatabase(cfg); err != nil {
|
||||
return err
|
||||
|
|
@ -117,6 +198,9 @@ func commandFunc(cmd *cobra.Command, args []string) error {
|
|||
time.Sleep(time.Second)
|
||||
saveDatasizeSummary(cfg, idxToResponse)
|
||||
|
||||
close(donec)
|
||||
<-sysdonec
|
||||
|
||||
if cfg.Step4.UploadLogs {
|
||||
println()
|
||||
time.Sleep(3 * time.Second)
|
||||
|
|
|
|||
|
|
@ -43,6 +43,8 @@ type Config struct {
|
|||
DataLatencyDistributionAll string `yaml:"data_latency_distribution_all"`
|
||||
DataLatencyThroughputTimeseries string `yaml:"data_latency_throughput_timeseries"`
|
||||
DataLatencyByKeyNumber string `yaml:"data_latency_by_key_number"`
|
||||
ClientSystemMetrics string `yaml:"client_system_metrics"`
|
||||
ClientSystemMetricsInterpolated string `yaml:"client_system_metrics_interpolated"`
|
||||
|
||||
// https://zookeeper.apache.org/doc/trunk/zookeeperAdmin.html
|
||||
Step1 struct {
|
||||
|
|
|
|||
|
|
@ -61,6 +61,12 @@ func TestReadConfig(t *testing.T) {
|
|||
if c.DataLatencyByKeyNumber != "data-latency-by-key-number.csv" {
|
||||
t.Fatalf("unexpected %s", c.DataLatencyByKeyNumber)
|
||||
}
|
||||
if c.ClientSystemMetrics != "client-system-metrics.csv" {
|
||||
t.Fatalf("unexpected %s", c.ClientSystemMetrics)
|
||||
}
|
||||
if c.ClientSystemMetricsInterpolated != "client-system-metrics-interpolated.csv" {
|
||||
t.Fatalf("unexpected %s", c.ClientSystemMetricsInterpolated)
|
||||
}
|
||||
|
||||
if c.Step1.SkipStartDatabase {
|
||||
t.Fatalf("unexpected %v", c.Step1.SkipStartDatabase)
|
||||
|
|
@ -128,7 +134,7 @@ func TestReadConfig(t *testing.T) {
|
|||
if c.Step4.GoogleCloudStorageBucketName != "dbtester-results" {
|
||||
t.Fatalf("unexpected %s", c.Step4.GoogleCloudStorageBucketName)
|
||||
}
|
||||
if c.Step4.GoogleCloudStorageSubDirectory != "2016041501" {
|
||||
if c.Step4.GoogleCloudStorageSubDirectory != "2017Q1-02-etcd-zookeeper-consul/01-write-1M-keys-client-variable" {
|
||||
t.Fatalf("unexpected %s", c.Step4.GoogleCloudStorageSubDirectory)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,6 +17,10 @@ data_latency_distribution_all: data-latency-distribution-all.csv
|
|||
data_latency_throughput_timeseries: data-latency-throughput-timeseries.csv
|
||||
data_latency_by_key_number: data-latency-by-key-number.csv
|
||||
|
||||
# client machine's system metrics
|
||||
client_system_metrics: client-system-metrics.csv
|
||||
client_system_metrics_interpolated: client-system-metrics-interpolated.csv
|
||||
|
||||
# start database by sending RPC calls to agents
|
||||
step1:
|
||||
skip_start_database: false
|
||||
|
|
@ -49,4 +53,4 @@ step4:
|
|||
google_cloud_project_name: etcd-development
|
||||
google_cloud_storage_key_path: $HOME/gcloud-key.json
|
||||
google_cloud_storage_bucket_name: dbtester-results
|
||||
google_cloud_storage_sub_directory: 2016041501
|
||||
google_cloud_storage_sub_directory: 2017Q1-02-etcd-zookeeper-consul/01-write-1M-keys-client-variable
|
||||
|
|
|
|||
|
|
@ -47,6 +47,12 @@ func step4UploadLogs(cfg Config) error {
|
|||
if err := uploadToGoogle(cfg.DataLatencyByKeyNumber, cfg); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := uploadToGoogle(cfg.ClientSystemMetrics, cfg); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := uploadToGoogle(cfg.ClientSystemMetricsInterpolated, cfg); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue