Merge pull request #254 from gyuho/control

control: collect system metrics in client side
This commit is contained in:
Gyu-Ho Lee 2017-02-04 03:13:51 -08:00 committed by GitHub
commit c1f8337339
17 changed files with 156 additions and 6 deletions

View File

@ -18,6 +18,10 @@ data_latency_distribution_all: data-latency-distribution-all.csv
data_latency_throughput_timeseries: data-latency-throughput-timeseries.csv
data_latency_by_key_number: data-latency-by-key-number.csv
# client machine's system metrics
client_system_metrics: client-system-metrics.csv
client_system_metrics_interpolated: client-system-metrics-interpolated.csv
# start database by sending RPC calls to agents
step1:
skip_start_database: false

View File

@ -18,6 +18,10 @@ data_latency_distribution_all: data-latency-distribution-all.csv
data_latency_throughput_timeseries: data-latency-throughput-timeseries.csv
data_latency_by_key_number: data-latency-by-key-number.csv
# client machine's system metrics
client_system_metrics: client-system-metrics.csv
client_system_metrics_interpolated: client-system-metrics-interpolated.csv
# start database by sending RPC calls to agents
step1:
skip_start_database: false

View File

@ -18,6 +18,10 @@ data_latency_distribution_all: data-latency-distribution-all.csv
data_latency_throughput_timeseries: data-latency-throughput-timeseries.csv
data_latency_by_key_number: data-latency-by-key-number.csv
# client machine's system metrics
client_system_metrics: client-system-metrics.csv
client_system_metrics_interpolated: client-system-metrics-interpolated.csv
# start database by sending RPC calls to agents
step1:
skip_start_database: false

View File

@ -18,6 +18,10 @@ data_latency_distribution_all: data-latency-distribution-all.csv
data_latency_throughput_timeseries: data-latency-throughput-timeseries.csv
data_latency_by_key_number: data-latency-by-key-number.csv
# client machine's system metrics
client_system_metrics: client-system-metrics.csv
client_system_metrics_interpolated: client-system-metrics-interpolated.csv
# start database by sending RPC calls to agents
step1:
skip_start_database: false

View File

@ -18,6 +18,10 @@ data_latency_distribution_all: data-latency-distribution-all.csv
data_latency_throughput_timeseries: data-latency-throughput-timeseries.csv
data_latency_by_key_number: data-latency-by-key-number.csv
# client machine's system metrics
client_system_metrics: client-system-metrics.csv
client_system_metrics_interpolated: client-system-metrics-interpolated.csv
# start database by sending RPC calls to agents
step1:
skip_start_database: false

View File

@ -18,6 +18,10 @@ data_latency_distribution_all: data-latency-distribution-all.csv
data_latency_throughput_timeseries: data-latency-throughput-timeseries.csv
data_latency_by_key_number: data-latency-by-key-number.csv
# client machine's system metrics
client_system_metrics: client-system-metrics.csv
client_system_metrics_interpolated: client-system-metrics-interpolated.csv
# start database by sending RPC calls to agents
step1:
skip_start_database: false

View File

@ -18,6 +18,10 @@ data_latency_distribution_all: data-latency-distribution-all.csv
data_latency_throughput_timeseries: data-latency-throughput-timeseries.csv
data_latency_by_key_number: data-latency-by-key-number.csv
# client machine's system metrics
client_system_metrics: client-system-metrics.csv
client_system_metrics_interpolated: client-system-metrics-interpolated.csv
# start database by sending RPC calls to agents
step1:
skip_start_database: false

View File

@ -18,6 +18,10 @@ data_latency_distribution_all: data-latency-distribution-all.csv
data_latency_throughput_timeseries: data-latency-throughput-timeseries.csv
data_latency_by_key_number: data-latency-by-key-number.csv
# client machine's system metrics
client_system_metrics: client-system-metrics.csv
client_system_metrics_interpolated: client-system-metrics-interpolated.csv
# start database by sending RPC calls to agents
step1:
skip_start_database: false

View File

@ -18,6 +18,10 @@ data_latency_distribution_all: data-latency-distribution-all.csv
data_latency_throughput_timeseries: data-latency-throughput-timeseries.csv
data_latency_by_key_number: data-latency-by-key-number.csv
# client machine's system metrics
client_system_metrics: client-system-metrics.csv
client_system_metrics_interpolated: client-system-metrics-interpolated.csv
# start database by sending RPC calls to agents
step1:
skip_start_database: false

View File

@ -18,6 +18,10 @@ data_latency_distribution_all: data-latency-distribution-all.csv
data_latency_throughput_timeseries: data-latency-throughput-timeseries.csv
data_latency_by_key_number: data-latency-by-key-number.csv
# client machine's system metrics
client_system_metrics: client-system-metrics.csv
client_system_metrics_interpolated: client-system-metrics-interpolated.csv
# start database by sending RPC calls to agents
step1:
skip_start_database: false

View File

@ -18,6 +18,10 @@ data_latency_distribution_all: data-latency-distribution-all.csv
data_latency_throughput_timeseries: data-latency-throughput-timeseries.csv
data_latency_by_key_number: data-latency-by-key-number.csv
# client machine's system metrics
client_system_metrics: client-system-metrics.csv
client_system_metrics_interpolated: client-system-metrics-interpolated.csv
# start database by sending RPC calls to agents
step1:
skip_start_database: false

View File

@ -18,6 +18,10 @@ data_latency_distribution_all: data-latency-distribution-all.csv
data_latency_throughput_timeseries: data-latency-throughput-timeseries.csv
data_latency_by_key_number: data-latency-by-key-number.csv
# client machine's system metrics
client_system_metrics: client-system-metrics.csv
client_system_metrics_interpolated: client-system-metrics-interpolated.csv
# start database by sending RPC calls to agents
step1:
skip_start_database: false

View File

@ -17,10 +17,13 @@ package control
import (
"fmt"
"io/ioutil"
"os"
"strings"
"time"
"github.com/coreos/dbtester/pkg/ntp"
"github.com/coreos/etcd/pkg/netutil"
"github.com/gyuho/psn"
"github.com/spf13/cobra"
)
@ -32,9 +35,26 @@ var Command = &cobra.Command{
}
var configPath string
var diskDevice string
var networkInterface string
func init() {
dn, err := psn.GetDevice("/")
if err != nil {
plog.Warningf("cannot get disk device mounted at '/' (%v)", err)
}
nm, err := netutil.GetDefaultInterfaces()
if err != nil {
plog.Warningf("cannot detect default network interface (%v)", err)
}
var nt string
for k := range nm {
nt = k
break
}
Command.PersistentFlags().StringVarP(&configPath, "config", "c", "", "YAML configuration file path.")
Command.PersistentFlags().StringVar(&diskDevice, "disk-device", dn, "Disk device to collect disk statistics metrics from.")
Command.PersistentFlags().StringVar(&networkInterface, "network-interface", nt, "Network interface to record in/outgoing packets.")
}
func commandFunc(cmd *cobra.Command, args []string) error {
@ -52,6 +72,7 @@ func commandFunc(cmd *cobra.Command, args []string) error {
default:
return fmt.Errorf("%q is not supported", cfg.Database)
}
if !cfg.Step2.SkipStressDatabase {
switch cfg.Step2.BenchType {
case "write":
@ -70,6 +91,66 @@ func commandFunc(cmd *cobra.Command, args []string) error {
cfg.Step4.GoogleCloudStorageKey = string(bts)
}
pid := int64(os.Getpid())
plog.Infof("starting collecting system metrics at %q [disk device: %q | network interface: %q | PID: %d]", cfg.ClientSystemMetrics, diskDevice, networkInterface, pid)
if err = os.RemoveAll(cfg.ClientSystemMetrics); err != nil {
return err
}
tcfg := &psn.TopConfig{
Exec: psn.DefaultTopPath,
IntervalSecond: 1,
PID: pid,
}
var metricsCSV *psn.CSV
metricsCSV, err = psn.NewCSV(
cfg.ClientSystemMetrics,
pid,
diskDevice,
networkInterface,
"",
tcfg,
)
if err = metricsCSV.Add(); err != nil {
return err
}
donec, sysdonec := make(chan struct{}), make(chan struct{})
go func() {
for {
select {
case <-time.After(time.Second):
if err := metricsCSV.Add(); err != nil {
plog.Errorf("psn.CSV.Add error (%v)", err)
continue
}
case <-donec:
plog.Infof("finishing collecting system metrics; saving CSV at %q", cfg.ClientSystemMetrics)
if err := metricsCSV.Save(); err != nil {
plog.Errorf("psn.CSV.Save(%q) error %v", metricsCSV.FilePath, err)
} else {
plog.Infof("CSV saved at %q", metricsCSV.FilePath)
}
interpolated, err := metricsCSV.Interpolate()
if err != nil {
plog.Fatalf("psn.CSV.Interpolate(%q) failed with %v", metricsCSV.FilePath, err)
}
interpolated.FilePath = cfg.ClientSystemMetricsInterpolated
if err := interpolated.Save(); err != nil {
plog.Errorf("psn.CSV.Save(%q) error %v", interpolated.FilePath, err)
} else {
plog.Infof("CSV saved at %q", interpolated.FilePath)
}
close(sysdonec)
plog.Infof("finished collecting system metrics")
return
}
}
}()
// protoc sorts the 'repeated' type data
// encode in string to enforce ordering of IPs
cfg.PeerIPString = strings.Join(cfg.PeerIPs, "___")
@ -82,12 +163,12 @@ func commandFunc(cmd *cobra.Command, args []string) error {
cfg.DatabaseEndpoints[i] = fmt.Sprintf("%s:%d", cfg.PeerIPs[i], cfg.DatabasePort)
}
no, nerr := ntp.DefaultSync()
plog.Infof("npt update output: %q", no)
plog.Infof("npt update error: %v", nerr)
println()
if !cfg.Step1.SkipStartDatabase {
no, nerr := ntp.DefaultSync()
plog.Infof("npt update output: %q", no)
plog.Infof("npt update error: %v", nerr)
plog.Info("step 1: starting databases...")
if err = step1StartDatabase(cfg); err != nil {
return err
@ -117,6 +198,9 @@ func commandFunc(cmd *cobra.Command, args []string) error {
time.Sleep(time.Second)
saveDatasizeSummary(cfg, idxToResponse)
close(donec)
<-sysdonec
if cfg.Step4.UploadLogs {
println()
time.Sleep(3 * time.Second)

View File

@ -43,6 +43,8 @@ type Config struct {
DataLatencyDistributionAll string `yaml:"data_latency_distribution_all"`
DataLatencyThroughputTimeseries string `yaml:"data_latency_throughput_timeseries"`
DataLatencyByKeyNumber string `yaml:"data_latency_by_key_number"`
ClientSystemMetrics string `yaml:"client_system_metrics"`
ClientSystemMetricsInterpolated string `yaml:"client_system_metrics_interpolated"`
// https://zookeeper.apache.org/doc/trunk/zookeeperAdmin.html
Step1 struct {

View File

@ -61,6 +61,12 @@ func TestReadConfig(t *testing.T) {
if c.DataLatencyByKeyNumber != "data-latency-by-key-number.csv" {
t.Fatalf("unexpected %s", c.DataLatencyByKeyNumber)
}
if c.ClientSystemMetrics != "client-system-metrics.csv" {
t.Fatalf("unexpected %s", c.ClientSystemMetrics)
}
if c.ClientSystemMetricsInterpolated != "client-system-metrics-interpolated.csv" {
t.Fatalf("unexpected %s", c.ClientSystemMetricsInterpolated)
}
if c.Step1.SkipStartDatabase {
t.Fatalf("unexpected %v", c.Step1.SkipStartDatabase)
@ -128,7 +134,7 @@ func TestReadConfig(t *testing.T) {
if c.Step4.GoogleCloudStorageBucketName != "dbtester-results" {
t.Fatalf("unexpected %s", c.Step4.GoogleCloudStorageBucketName)
}
if c.Step4.GoogleCloudStorageSubDirectory != "2016041501" {
if c.Step4.GoogleCloudStorageSubDirectory != "2017Q1-02-etcd-zookeeper-consul/01-write-1M-keys-client-variable" {
t.Fatalf("unexpected %s", c.Step4.GoogleCloudStorageSubDirectory)
}
}

View File

@ -17,6 +17,10 @@ data_latency_distribution_all: data-latency-distribution-all.csv
data_latency_throughput_timeseries: data-latency-throughput-timeseries.csv
data_latency_by_key_number: data-latency-by-key-number.csv
# client machine's system metrics
client_system_metrics: client-system-metrics.csv
client_system_metrics_interpolated: client-system-metrics-interpolated.csv
# start database by sending RPC calls to agents
step1:
skip_start_database: false
@ -49,4 +53,4 @@ step4:
google_cloud_project_name: etcd-development
google_cloud_storage_key_path: $HOME/gcloud-key.json
google_cloud_storage_bucket_name: dbtester-results
google_cloud_storage_sub_directory: 2016041501
google_cloud_storage_sub_directory: 2017Q1-02-etcd-zookeeper-consul/01-write-1M-keys-client-variable

View File

@ -47,6 +47,12 @@ func step4UploadLogs(cfg Config) error {
if err := uploadToGoogle(cfg.DataLatencyByKeyNumber, cfg); err != nil {
return err
}
if err := uploadToGoogle(cfg.ClientSystemMetrics, cfg); err != nil {
return err
}
if err := uploadToGoogle(cfg.ClientSystemMetricsInterpolated, cfg); err != nil {
return err
}
return nil
}