mirror of https://github.com/rancher/dartboard.git
324 lines
13 KiB
Go
324 lines
13 KiB
Go
package helpers
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/pkg/errors"
|
|
|
|
"github.com/git-ival/dartboard/test/utils/grafanautils"
|
|
"github.com/git-ival/dartboard/test/utils/imageutils"
|
|
"github.com/git-ival/dartboard/test/utils/ranchermonitoring"
|
|
"github.com/git-ival/dartboard/test/utils/rancherprofiling"
|
|
gapi "github.com/grafana/grafana-api-golang-client"
|
|
provV1 "github.com/rancher/rancher/pkg/apis/provisioning.cattle.io/v1"
|
|
"github.com/rancher/shepherd/clients/rancher"
|
|
mgmtV3 "github.com/rancher/shepherd/clients/rancher/generated/management/v3"
|
|
"github.com/rancher/shepherd/extensions/clusters"
|
|
"github.com/rancher/shepherd/extensions/kubeconfig"
|
|
"github.com/rancher/shepherd/extensions/kubectl"
|
|
"github.com/rancher/shepherd/pkg/session"
|
|
"github.com/sirupsen/logrus"
|
|
log "github.com/sirupsen/logrus"
|
|
"github.com/stretchr/testify/require"
|
|
corev1 "k8s.io/api/core/v1"
|
|
k8sErrors "k8s.io/apimachinery/pkg/api/errors"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/apimachinery/pkg/runtime/schema"
|
|
restclient "k8s.io/client-go/rest"
|
|
"k8s.io/client-go/tools/clientcmd"
|
|
"sigs.k8s.io/yaml"
|
|
)
|
|
|
|
func ConfigMapGVR() schema.GroupVersionResource {
|
|
return corev1.SchemeGroupVersion.WithResource("configmaps")
|
|
}
|
|
|
|
func V1ClusterGVR() schema.GroupVersionResource {
|
|
return provV1.SchemeGroupVersion.WithResource("clusters")
|
|
}
|
|
|
|
type ScreenshotParams struct {
|
|
URL string
|
|
ImageFilePath string
|
|
WindowSize [2]int
|
|
Selector string
|
|
Timeout int
|
|
Cookies []string
|
|
}
|
|
|
|
// endpoints for the different pprof visualizations
|
|
// selectors have been commented out as they can sometimes fail to be found by chromedp
|
|
func PprofEndpoints() map[string]ScreenshotParams {
|
|
return map[string]ScreenshotParams{
|
|
"graph": {
|
|
URL: "http://" + rancherprofiling.BasePprofAddress + "/ui/",
|
|
// Selector: "div#graph",
|
|
},
|
|
"top": {
|
|
URL: "http://" + rancherprofiling.BasePprofAddress + "/ui/top",
|
|
// Selector: "table#toptable",
|
|
},
|
|
"flame": { // Not able to programmatically retrieve screenshot of this via browser, have to resort to html
|
|
URL: "http://" + rancherprofiling.BasePprofAddress + "/ui/flamegraph",
|
|
// Selector: "div#stack-chart",
|
|
},
|
|
"peek": {
|
|
URL: "http://" + rancherprofiling.BasePprofAddress + "/ui/peek",
|
|
// Selector: "div#content",
|
|
},
|
|
"source": {
|
|
URL: "http://" + rancherprofiling.BasePprofAddress + "/ui/source",
|
|
// Selector: "div#content",
|
|
},
|
|
}
|
|
}
|
|
|
|
func GetAllRancherLogs(r *rancher.Client, clusterID string, podName string, since metav1.Time) (string, error) {
|
|
podLogOptions := &corev1.PodLogOptions{
|
|
Container: "rancher",
|
|
Timestamps: true,
|
|
SinceTime: &since,
|
|
}
|
|
log.Infof("Collecting Rancher logs since: %s", since.String())
|
|
return kubeconfig.GetPodLogsWithOpts(r, clusterID, podName, "cattle-system", "", podLogOptions)
|
|
}
|
|
|
|
func CreateCustomMonitoringDashboards(t *testing.T, ts *session.Session, client *rancher.Client, configMapsDir string) error {
|
|
files, err := os.ReadDir(configMapsDir)
|
|
require.NoError(t, err)
|
|
|
|
for _, file := range files {
|
|
if !file.IsDir() {
|
|
f, err := os.ReadFile(configMapsDir + "/" + file.Name())
|
|
require.NoError(t, err)
|
|
dashboardYAML, err := yaml.YAMLToJSON(f)
|
|
require.NoError(t, err)
|
|
_, err = kubectl.CreateUnstructured(ts, client, dashboardYAML, "local", "cattle-dashboards", ConfigMapGVR())
|
|
if k8sErrors.ReasonForError(err) == metav1.StatusReasonAlreadyExists {
|
|
logrus.Infof("configmap already exists for %v, skipping", file)
|
|
continue
|
|
}
|
|
require.NoError(t, err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func WriteMonitoringSnapshotsToPNGs(t *testing.T, ts *session.Session, client *rancher.Client, gapi *gapi.Client, from time.Time, to time.Time, outputPath, clusterID, configMapsDir, prefix, suffix string, dashboardUIDs []string) ([]string, error) {
|
|
var snapshotURLs []string
|
|
var err error
|
|
for _, d := range dashboardUIDs {
|
|
// Ensure ConfigMaps for each Dashboard exist
|
|
configMapPath := configMapsDir + "/" + d
|
|
// In case not all dashboards have custom configmaps in local dir, get from cluster
|
|
if _, err := os.Stat(configMapPath); errors.Is(err, os.ErrExist) {
|
|
_, err := kubectl.GetUnstructured(ts, client, d, clusterID, "cattle-dashboards", ConfigMapGVR())
|
|
require.NoError(t, err)
|
|
}
|
|
snapshotResponse, err := grafanautils.GetDashboardSnapshot(gapi, from, to, d, 9000, false)
|
|
if err != nil {
|
|
log.Warnf("Failed to retrieve dashboard snapshot for %v using time range from %v to %v. Skipping dashboard.", d, from, to)
|
|
continue
|
|
}
|
|
var cookies []string
|
|
imageutils.HTTPCookiesToSlice(gapi.Cookies(), &cookies)
|
|
snapshotURL := "https://" + client.RancherConfig.Host + ranchermonitoring.GrafanaSnapshotRoute + snapshotResponse.Key
|
|
snapshotURLs = append(snapshotURLs, snapshotURL)
|
|
filePath := fmt.Sprintf("%s/%s%s%s.png", outputPath, prefix, d, suffix)
|
|
err = imageutils.URLScreenshotToPNG(snapshotURL, filePath, ranchermonitoring.PanelContentSelector, nil, 60, cookies...)
|
|
if err != nil {
|
|
log.Warnf("Failed to write snapshotURL (%s) to file (%s): %v", snapshotURL, filePath, err)
|
|
}
|
|
}
|
|
return snapshotURLs, err
|
|
}
|
|
|
|
func LogV1ClusterProvisioningTime(t *testing.T, ts *session.Session, client *rancher.Client, cluster *mgmtV3.Cluster, numClusters *int, outputPath, clusterID, namespace, configMapsDir, suffix string) (time.Duration, error) {
|
|
createdTime, err := time.Parse(time.RFC3339, cluster.Created)
|
|
if err != nil {
|
|
return time.Duration(0), err
|
|
}
|
|
var readyTime time.Time
|
|
v1Cluster, _, err := clusters.GetProvisioningClusterByName(client, cluster.ID, "fleet-default")
|
|
if err != nil {
|
|
return time.Duration(0), err
|
|
}
|
|
for _, condition := range v1Cluster.Status.Conditions {
|
|
if condition.Type == "Ready" {
|
|
readyTime, err = time.Parse(time.RFC3339, condition.LastUpdateTime)
|
|
log.Infof("Cluster Created time is: %s", createdTime.Format(time.RFC3339))
|
|
log.Infof("Cluster Ready time is: %s", condition.LastUpdateTime)
|
|
break
|
|
}
|
|
}
|
|
if err != nil {
|
|
return time.Duration(0), err
|
|
}
|
|
filePath := outputPath + "/provisioning-times.log"
|
|
provisioningTimeDiff := readyTime.Sub(createdTime)
|
|
if numClusters != nil {
|
|
text := fmt.Sprintf("%d Clusters: %s\n", numClusters, provisioningTimeDiff)
|
|
WriteStringToFile(text, filePath)
|
|
}
|
|
log.Infof("Provisioning took: %s", provisioningTimeDiff)
|
|
return provisioningTimeDiff, nil
|
|
}
|
|
|
|
func WriteSnapshotURLsToFiles(t *testing.T, ts *session.Session, client *rancher.Client, gapi *gapi.Client, from time.Time, to time.Time, outputPath, clusterID, configMapsDir, prefix, suffix string, dashboardUIDs []string) {
|
|
snapshotURLs, err := WriteMonitoringSnapshotsToPNGs(t, ts, client, gapi, from, to, outputPath, clusterID, configMapsDir, prefix, suffix, dashboardUIDs)
|
|
if err != nil {
|
|
log.Infof("error writing snapshots to PNG: %v", err)
|
|
}
|
|
filename := prefix + "snapshots" + suffix + ".txt"
|
|
f, err := os.Create(outputPath + "/" + filename)
|
|
if err != nil {
|
|
log.Infof("error creating file with path (%s): %v", outputPath+"/"+filename, err)
|
|
}
|
|
for _, url := range snapshotURLs {
|
|
_, err = f.WriteString(url + "\n")
|
|
if err != nil {
|
|
log.Infof("error writing bytes to file (%s): %v", outputPath+"/"+filename, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func WriteStringToFile(s string, dest string) {
|
|
f, err := os.OpenFile(dest,
|
|
os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
|
if err != nil {
|
|
log.Warnf("error creating file (%s): %v", dest, err)
|
|
} else {
|
|
log.Info("Writing to file at: ", dest)
|
|
_, err = f.WriteString(s)
|
|
if err != nil {
|
|
log.Warnf("error writing to file (%s): %v", dest, err)
|
|
}
|
|
err := f.Close()
|
|
if err != nil {
|
|
log.Warnf("error closing file (%s): %v", f.Name(), err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func CollectProfileScreenshots(profilePath, imagePath string) error {
|
|
var err error
|
|
profCmd := rancherprofiling.StartServeProfile(profilePath)
|
|
defer func() {
|
|
if err := profCmd.Process.Kill(); err != nil {
|
|
log.Errorf("CollectProfileScreenshots: Error killing command (%s): %v", profCmd.Args, err)
|
|
} else {
|
|
log.Infof("CollectProfileScreenshots: Successfully killed command (%s)", profCmd.Args)
|
|
}
|
|
}()
|
|
|
|
endpoints := PprofEndpoints()
|
|
for k, endpoint := range endpoints {
|
|
endpoint.ImageFilePath = fmt.Sprintf("%s-%s", imagePath, k)
|
|
endpoint.WindowSize = [2]int{2560, 1440}
|
|
endpoint.Timeout = 30
|
|
|
|
_, err = imageutils.GetURLWithRetry(endpoint.URL, 5)
|
|
if err != nil {
|
|
log.Errorf("CollectProfileScreenshots: Failed to get URL (%s), skipping URL: %v", endpoint.URL, err)
|
|
continue
|
|
}
|
|
|
|
if k == "flame" {
|
|
err = imageutils.LocalHTMLFromURL(endpoint.URL, endpoint.ImageFilePath+".html")
|
|
if err != nil {
|
|
log.Errorf("CollectProfileScreenshots: Failed to get flamegraph html: %v", err)
|
|
}
|
|
} else {
|
|
err = imageutils.URLScreenshotToPNG(endpoint.URL, endpoint.ImageFilePath+".png", endpoint.Selector, &endpoint.WindowSize, 25, "")
|
|
if err != nil {
|
|
log.Errorf("CollectProfileScreenshots: Failed to get image of url (%s): %v", endpoint.URL, err)
|
|
}
|
|
}
|
|
}
|
|
return err
|
|
}
|
|
|
|
func CollectRancherMetricsAndArtifacts(t *testing.T, ts *session.Session, client *rancher.Client, gapi *gapi.Client,
|
|
restClient *restclient.Config, clientConfig *clientcmd.ClientConfig, outputPath, prefix, clusterID, configMapsDir string,
|
|
numClusters int, start metav1.Time, end metav1.Time, dashboardUIDs []string) {
|
|
log.Infof("Collecting metrics and other artifacts at %d Clusters", numClusters)
|
|
podNames, err := kubeconfig.GetPodNames(client, clusterID, "cattle-system", &metav1.ListOptions{
|
|
LabelSelector: "app=rancher",
|
|
FieldSelector: "status.phase=Running",
|
|
})
|
|
require.NoError(t, err)
|
|
log.Info("Pod Names: ", podNames)
|
|
clustersSuffix := fmt.Sprintf("-%d-clusters", numClusters)
|
|
|
|
// Create a context with timeout
|
|
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute)
|
|
defer cancel()
|
|
// Create a channel to communicate completion of goroutines
|
|
done := make(chan string, len(podNames))
|
|
pprofBaseDir := outputPath + "/pprof/"
|
|
|
|
for _, podName := range podNames {
|
|
err = os.MkdirAll(pprofBaseDir, 0755)
|
|
if err != nil {
|
|
log.Errorf("Failed to create pprof file directory: %v", err)
|
|
}
|
|
|
|
memProfilePath := fmt.Sprintf("%s%s%s-%s%s", pprofBaseDir, prefix, rancherprofiling.MemProfileFileName, podName, clustersSuffix+rancherprofiling.ProfileFileExtension)
|
|
log.Infof("Getting mem profile for pod %s, storing at %s", podName, memProfilePath)
|
|
err = rancherprofiling.GetRancherMemProfile(*restClient, *clientConfig, podName, memProfilePath)
|
|
if err != nil {
|
|
log.Errorf("Failed to get Rancher memory profile: %v", err)
|
|
continue
|
|
}
|
|
memProfileImagePath := fmt.Sprintf("%s%s%s-%s%s", pprofBaseDir, prefix, rancherprofiling.MemProfileFileName, podName, clustersSuffix)
|
|
log.Infof("Getting mem profile screenshots for %s, storing at %s", podName, pprofBaseDir)
|
|
CollectProfileScreenshots(memProfilePath, memProfileImagePath)
|
|
|
|
cpuProfilePath := fmt.Sprintf("%s%s%s-%s%s", pprofBaseDir, prefix, rancherprofiling.CPUProfileFileName, podName, clustersSuffix+rancherprofiling.ProfileFileExtension)
|
|
log.Infof("Getting cpu profile for pod %s, storing at %s", podName, cpuProfilePath)
|
|
err = rancherprofiling.GetRancherCPUProfile(*restClient, *clientConfig, podName, cpuProfilePath)
|
|
if err != nil {
|
|
log.Errorf("Failed to get Rancher CPU profile: %v", err)
|
|
continue
|
|
}
|
|
cpuProfileImagePath := fmt.Sprintf("%s%s%s-%s%s", pprofBaseDir, prefix, rancherprofiling.CPUProfileFileName, podName, clustersSuffix)
|
|
log.Infof("Getting cpu profile screenshots for %s, storing at %s", podName, pprofBaseDir)
|
|
CollectProfileScreenshots(cpuProfilePath, cpuProfileImagePath)
|
|
|
|
// Get rancher pod logs
|
|
logFileDest := fmt.Sprintf("%s/%s%s%s-%s", outputPath, prefix, podName, clustersSuffix, start.String()+".log")
|
|
podLogOptions := &corev1.PodLogOptions{
|
|
Container: "rancher",
|
|
SinceTime: &start,
|
|
}
|
|
go func(ctx context.Context, podName, logFileDest string, podLogOptions *corev1.PodLogOptions) {
|
|
defer func() { done <- podName }() // Signal completion of goroutine
|
|
log.Info("Getting pod logs")
|
|
_, err := kubeconfig.GetPodLogsWithContext(ctx, client, "local", podName, "cattle-system", "", logFileDest, true, podLogOptions)
|
|
if err != nil {
|
|
log.Warnf("error getting pod logs for pod (%s): %v", podName, err)
|
|
}
|
|
}(ctx, podName, logFileDest, podLogOptions)
|
|
}
|
|
|
|
// Wait for all goroutines to complete or timeout
|
|
for range podNames {
|
|
select {
|
|
case podName := <-done:
|
|
log.Infof("Completed getting pod logs for %s", podName)
|
|
case <-ctx.Done():
|
|
log.Warn("Timeout waiting for podlog goroutines to complete")
|
|
return
|
|
}
|
|
}
|
|
_, newGAPI, err := ranchermonitoring.SetupClients(client.RancherConfig.Host, client.RancherConfig.AdminToken, client.RancherConfig.AdminPassword)
|
|
if err != nil {
|
|
log.Errorf("Could not re-setup prometheus and/or grafana clients: %v", err)
|
|
}
|
|
WriteMonitoringSnapshotsToPNGs(t, ts, client, newGAPI, start.Time, end.Time, outputPath, clusterID, configMapsDir, prefix, clustersSuffix, dashboardUIDs)
|
|
}
|