dartboard/test/utils/helpers/helpers.go

324 lines
13 KiB
Go

package helpers
import (
"context"
"fmt"
"os"
"testing"
"time"
"github.com/pkg/errors"
"github.com/git-ival/dartboard/test/utils/grafanautils"
"github.com/git-ival/dartboard/test/utils/imageutils"
"github.com/git-ival/dartboard/test/utils/ranchermonitoring"
"github.com/git-ival/dartboard/test/utils/rancherprofiling"
gapi "github.com/grafana/grafana-api-golang-client"
provV1 "github.com/rancher/rancher/pkg/apis/provisioning.cattle.io/v1"
"github.com/rancher/shepherd/clients/rancher"
mgmtV3 "github.com/rancher/shepherd/clients/rancher/generated/management/v3"
"github.com/rancher/shepherd/extensions/clusters"
"github.com/rancher/shepherd/extensions/kubeconfig"
"github.com/rancher/shepherd/extensions/kubectl"
"github.com/rancher/shepherd/pkg/session"
"github.com/sirupsen/logrus"
log "github.com/sirupsen/logrus"
"github.com/stretchr/testify/require"
corev1 "k8s.io/api/core/v1"
k8sErrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
restclient "k8s.io/client-go/rest"
"k8s.io/client-go/tools/clientcmd"
"sigs.k8s.io/yaml"
)
func ConfigMapGVR() schema.GroupVersionResource {
return corev1.SchemeGroupVersion.WithResource("configmaps")
}
func V1ClusterGVR() schema.GroupVersionResource {
return provV1.SchemeGroupVersion.WithResource("clusters")
}
type ScreenshotParams struct {
URL string
ImageFilePath string
WindowSize [2]int
Selector string
Timeout int
Cookies []string
}
// endpoints for the different pprof visualizations
// selectors have been commented out as they can sometimes fail to be found by chromedp
func PprofEndpoints() map[string]ScreenshotParams {
return map[string]ScreenshotParams{
"graph": {
URL: "http://" + rancherprofiling.BasePprofAddress + "/ui/",
// Selector: "div#graph",
},
"top": {
URL: "http://" + rancherprofiling.BasePprofAddress + "/ui/top",
// Selector: "table#toptable",
},
"flame": { // Not able to programmatically retrieve screenshot of this via browser, have to resort to html
URL: "http://" + rancherprofiling.BasePprofAddress + "/ui/flamegraph",
// Selector: "div#stack-chart",
},
"peek": {
URL: "http://" + rancherprofiling.BasePprofAddress + "/ui/peek",
// Selector: "div#content",
},
"source": {
URL: "http://" + rancherprofiling.BasePprofAddress + "/ui/source",
// Selector: "div#content",
},
}
}
func GetAllRancherLogs(r *rancher.Client, clusterID string, podName string, since metav1.Time) (string, error) {
podLogOptions := &corev1.PodLogOptions{
Container: "rancher",
Timestamps: true,
SinceTime: &since,
}
log.Infof("Collecting Rancher logs since: %s", since.String())
return kubeconfig.GetPodLogsWithOpts(r, clusterID, podName, "cattle-system", "", podLogOptions)
}
func CreateCustomMonitoringDashboards(t *testing.T, ts *session.Session, client *rancher.Client, configMapsDir string) error {
files, err := os.ReadDir(configMapsDir)
require.NoError(t, err)
for _, file := range files {
if !file.IsDir() {
f, err := os.ReadFile(configMapsDir + "/" + file.Name())
require.NoError(t, err)
dashboardYAML, err := yaml.YAMLToJSON(f)
require.NoError(t, err)
_, err = kubectl.CreateUnstructured(ts, client, dashboardYAML, "local", "cattle-dashboards", ConfigMapGVR())
if k8sErrors.ReasonForError(err) == metav1.StatusReasonAlreadyExists {
logrus.Infof("configmap already exists for %v, skipping", file)
continue
}
require.NoError(t, err)
}
}
return nil
}
func WriteMonitoringSnapshotsToPNGs(t *testing.T, ts *session.Session, client *rancher.Client, gapi *gapi.Client, from time.Time, to time.Time, outputPath, clusterID, configMapsDir, prefix, suffix string, dashboardUIDs []string) ([]string, error) {
var snapshotURLs []string
var err error
for _, d := range dashboardUIDs {
// Ensure ConfigMaps for each Dashboard exist
configMapPath := configMapsDir + "/" + d
// In case not all dashboards have custom configmaps in local dir, get from cluster
if _, err := os.Stat(configMapPath); errors.Is(err, os.ErrExist) {
_, err := kubectl.GetUnstructured(ts, client, d, clusterID, "cattle-dashboards", ConfigMapGVR())
require.NoError(t, err)
}
snapshotResponse, err := grafanautils.GetDashboardSnapshot(gapi, from, to, d, 9000, false)
if err != nil {
log.Warnf("Failed to retrieve dashboard snapshot for %v using time range from %v to %v. Skipping dashboard.", d, from, to)
continue
}
var cookies []string
imageutils.HTTPCookiesToSlice(gapi.Cookies(), &cookies)
snapshotURL := "https://" + client.RancherConfig.Host + ranchermonitoring.GrafanaSnapshotRoute + snapshotResponse.Key
snapshotURLs = append(snapshotURLs, snapshotURL)
filePath := fmt.Sprintf("%s/%s%s%s.png", outputPath, prefix, d, suffix)
err = imageutils.URLScreenshotToPNG(snapshotURL, filePath, ranchermonitoring.PanelContentSelector, nil, 60, cookies...)
if err != nil {
log.Warnf("Failed to write snapshotURL (%s) to file (%s): %v", snapshotURL, filePath, err)
}
}
return snapshotURLs, err
}
func LogV1ClusterProvisioningTime(t *testing.T, ts *session.Session, client *rancher.Client, cluster *mgmtV3.Cluster, numClusters *int, outputPath, clusterID, namespace, configMapsDir, suffix string) (time.Duration, error) {
createdTime, err := time.Parse(time.RFC3339, cluster.Created)
if err != nil {
return time.Duration(0), err
}
var readyTime time.Time
v1Cluster, _, err := clusters.GetProvisioningClusterByName(client, cluster.ID, "fleet-default")
if err != nil {
return time.Duration(0), err
}
for _, condition := range v1Cluster.Status.Conditions {
if condition.Type == "Ready" {
readyTime, err = time.Parse(time.RFC3339, condition.LastUpdateTime)
log.Infof("Cluster Created time is: %s", createdTime.Format(time.RFC3339))
log.Infof("Cluster Ready time is: %s", condition.LastUpdateTime)
break
}
}
if err != nil {
return time.Duration(0), err
}
filePath := outputPath + "/provisioning-times.log"
provisioningTimeDiff := readyTime.Sub(createdTime)
if numClusters != nil {
text := fmt.Sprintf("%d Clusters: %s\n", numClusters, provisioningTimeDiff)
WriteStringToFile(text, filePath)
}
log.Infof("Provisioning took: %s", provisioningTimeDiff)
return provisioningTimeDiff, nil
}
func WriteSnapshotURLsToFiles(t *testing.T, ts *session.Session, client *rancher.Client, gapi *gapi.Client, from time.Time, to time.Time, outputPath, clusterID, configMapsDir, prefix, suffix string, dashboardUIDs []string) {
snapshotURLs, err := WriteMonitoringSnapshotsToPNGs(t, ts, client, gapi, from, to, outputPath, clusterID, configMapsDir, prefix, suffix, dashboardUIDs)
if err != nil {
log.Infof("error writing snapshots to PNG: %v", err)
}
filename := prefix + "snapshots" + suffix + ".txt"
f, err := os.Create(outputPath + "/" + filename)
if err != nil {
log.Infof("error creating file with path (%s): %v", outputPath+"/"+filename, err)
}
for _, url := range snapshotURLs {
_, err = f.WriteString(url + "\n")
if err != nil {
log.Infof("error writing bytes to file (%s): %v", outputPath+"/"+filename, err)
}
}
}
func WriteStringToFile(s string, dest string) {
f, err := os.OpenFile(dest,
os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
log.Warnf("error creating file (%s): %v", dest, err)
} else {
log.Info("Writing to file at: ", dest)
_, err = f.WriteString(s)
if err != nil {
log.Warnf("error writing to file (%s): %v", dest, err)
}
err := f.Close()
if err != nil {
log.Warnf("error closing file (%s): %v", f.Name(), err)
}
}
}
func CollectProfileScreenshots(profilePath, imagePath string) error {
var err error
profCmd := rancherprofiling.StartServeProfile(profilePath)
defer func() {
if err := profCmd.Process.Kill(); err != nil {
log.Errorf("CollectProfileScreenshots: Error killing command (%s): %v", profCmd.Args, err)
} else {
log.Infof("CollectProfileScreenshots: Successfully killed command (%s)", profCmd.Args)
}
}()
endpoints := PprofEndpoints()
for k, endpoint := range endpoints {
endpoint.ImageFilePath = fmt.Sprintf("%s-%s", imagePath, k)
endpoint.WindowSize = [2]int{2560, 1440}
endpoint.Timeout = 30
_, err = imageutils.GetURLWithRetry(endpoint.URL, 5)
if err != nil {
log.Errorf("CollectProfileScreenshots: Failed to get URL (%s), skipping URL: %v", endpoint.URL, err)
continue
}
if k == "flame" {
err = imageutils.LocalHTMLFromURL(endpoint.URL, endpoint.ImageFilePath+".html")
if err != nil {
log.Errorf("CollectProfileScreenshots: Failed to get flamegraph html: %v", err)
}
} else {
err = imageutils.URLScreenshotToPNG(endpoint.URL, endpoint.ImageFilePath+".png", endpoint.Selector, &endpoint.WindowSize, 25, "")
if err != nil {
log.Errorf("CollectProfileScreenshots: Failed to get image of url (%s): %v", endpoint.URL, err)
}
}
}
return err
}
func CollectRancherMetricsAndArtifacts(t *testing.T, ts *session.Session, client *rancher.Client, gapi *gapi.Client,
restClient *restclient.Config, clientConfig *clientcmd.ClientConfig, outputPath, prefix, clusterID, configMapsDir string,
numClusters int, start metav1.Time, end metav1.Time, dashboardUIDs []string) {
log.Infof("Collecting metrics and other artifacts at %d Clusters", numClusters)
podNames, err := kubeconfig.GetPodNames(client, clusterID, "cattle-system", &metav1.ListOptions{
LabelSelector: "app=rancher",
FieldSelector: "status.phase=Running",
})
require.NoError(t, err)
log.Info("Pod Names: ", podNames)
clustersSuffix := fmt.Sprintf("-%d-clusters", numClusters)
// Create a context with timeout
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute)
defer cancel()
// Create a channel to communicate completion of goroutines
done := make(chan string, len(podNames))
pprofBaseDir := outputPath + "/pprof/"
for _, podName := range podNames {
err = os.MkdirAll(pprofBaseDir, 0755)
if err != nil {
log.Errorf("Failed to create pprof file directory: %v", err)
}
memProfilePath := fmt.Sprintf("%s%s%s-%s%s", pprofBaseDir, prefix, rancherprofiling.MemProfileFileName, podName, clustersSuffix+rancherprofiling.ProfileFileExtension)
log.Infof("Getting mem profile for pod %s, storing at %s", podName, memProfilePath)
err = rancherprofiling.GetRancherMemProfile(*restClient, *clientConfig, podName, memProfilePath)
if err != nil {
log.Errorf("Failed to get Rancher memory profile: %v", err)
continue
}
memProfileImagePath := fmt.Sprintf("%s%s%s-%s%s", pprofBaseDir, prefix, rancherprofiling.MemProfileFileName, podName, clustersSuffix)
log.Infof("Getting mem profile screenshots for %s, storing at %s", podName, pprofBaseDir)
CollectProfileScreenshots(memProfilePath, memProfileImagePath)
cpuProfilePath := fmt.Sprintf("%s%s%s-%s%s", pprofBaseDir, prefix, rancherprofiling.CPUProfileFileName, podName, clustersSuffix+rancherprofiling.ProfileFileExtension)
log.Infof("Getting cpu profile for pod %s, storing at %s", podName, cpuProfilePath)
err = rancherprofiling.GetRancherCPUProfile(*restClient, *clientConfig, podName, cpuProfilePath)
if err != nil {
log.Errorf("Failed to get Rancher CPU profile: %v", err)
continue
}
cpuProfileImagePath := fmt.Sprintf("%s%s%s-%s%s", pprofBaseDir, prefix, rancherprofiling.CPUProfileFileName, podName, clustersSuffix)
log.Infof("Getting cpu profile screenshots for %s, storing at %s", podName, pprofBaseDir)
CollectProfileScreenshots(cpuProfilePath, cpuProfileImagePath)
// Get rancher pod logs
logFileDest := fmt.Sprintf("%s/%s%s%s-%s", outputPath, prefix, podName, clustersSuffix, start.String()+".log")
podLogOptions := &corev1.PodLogOptions{
Container: "rancher",
SinceTime: &start,
}
go func(ctx context.Context, podName, logFileDest string, podLogOptions *corev1.PodLogOptions) {
defer func() { done <- podName }() // Signal completion of goroutine
log.Info("Getting pod logs")
_, err := kubeconfig.GetPodLogsWithContext(ctx, client, "local", podName, "cattle-system", "", logFileDest, true, podLogOptions)
if err != nil {
log.Warnf("error getting pod logs for pod (%s): %v", podName, err)
}
}(ctx, podName, logFileDest, podLogOptions)
}
// Wait for all goroutines to complete or timeout
for range podNames {
select {
case podName := <-done:
log.Infof("Completed getting pod logs for %s", podName)
case <-ctx.Done():
log.Warn("Timeout waiting for podlog goroutines to complete")
return
}
}
_, newGAPI, err := ranchermonitoring.SetupClients(client.RancherConfig.Host, client.RancherConfig.AdminToken, client.RancherConfig.AdminPassword)
if err != nil {
log.Errorf("Could not re-setup prometheus and/or grafana clients: %v", err)
}
WriteMonitoringSnapshotsToPNGs(t, ts, client, newGAPI, start.Time, end.Time, outputPath, clusterID, configMapsDir, prefix, clustersSuffix, dashboardUIDs)
}