Merge pull request #16266 from borg-land/dump-patch

Fix dumping logs for GCE scale tests
This commit is contained in:
Kubernetes Prow Robot 2024-01-26 20:49:50 +01:00 committed by GitHub
commit e5c4fe80df
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 35 additions and 16 deletions

View File

@ -210,7 +210,14 @@ func RunToolboxDump(ctx context.Context, f commandutils.Factory, out io.Writer,
return fmt.Errorf("adding key to SSH agent: %w", err)
}
dumper := dump.NewLogDumper(cluster.ObjectMeta.Name, sshConfig, keyRing, options.Dir)
// look for a bastion instance and use it if exists
bastionAddress := ""
for _, instance := range d.Instances {
if strings.Contains(instance.Name, "bastion") {
bastionAddress = instance.PublicAddresses[0]
}
}
dumper := dump.NewLogDumper(bastionAddress, sshConfig, keyRing, options.Dir)
var additionalIPs []string
var additionalPrivateIPs []string
@ -224,7 +231,7 @@ func RunToolboxDump(ctx context.Context, f commandutils.Factory, out io.Writer,
}
}
if err := dumper.DumpAllNodes(ctx, nodes, additionalIPs, additionalPrivateIPs); err != nil {
if err := dumper.DumpAllNodes(ctx, nodes, options.MaxNodes, additionalIPs, additionalPrivateIPs); err != nil {
return fmt.Errorf("error dumping nodes: %v", err)
}

View File

@ -34,11 +34,6 @@ import (
"k8s.io/klog/v2"
)
const (
// MaxNodesToDump is the maximum number of nodes to dump
MaxNodesToDump = 500
)
// logDumper gets all the nodes from a kubernetes cluster and dumps a well-known set of logs
type logDumper struct {
sshClientFactory sshClientFactory
@ -51,12 +46,15 @@ type logDumper struct {
}
// NewLogDumper is the constructor for a logDumper
func NewLogDumper(clusterName string, sshConfig *ssh.ClientConfig, keyRing agent.Agent, artifactsDir string) *logDumper {
func NewLogDumper(bastionAddress string, sshConfig *ssh.ClientConfig, keyRing agent.Agent, artifactsDir string) *logDumper {
sshClientFactory := &sshClientFactoryImplementation{
bastion: "bastion." + clusterName,
keyRing: keyRing,
sshConfig: sshConfig,
}
if bastionAddress != "" {
log.Printf("detected a bastion instance, with the address: %s", bastionAddress)
sshClientFactory.bastion = bastionAddress
}
d := &logDumper{
sshClientFactory: sshClientFactory,
@ -106,9 +104,10 @@ func NewLogDumper(clusterName string, sshConfig *ssh.ClientConfig, keyRing agent
// if the IPs are not found from kubectl get nodes, then these will be dumped also.
// This allows for dumping log on nodes even if they don't register as a kubernetes
// node, or if a node fails to register, or if the whole cluster fails to start.
func (d *logDumper) DumpAllNodes(ctx context.Context, nodes corev1.NodeList, additionalIPs, additionalPrivateIPs []string) error {
func (d *logDumper) DumpAllNodes(ctx context.Context, nodes corev1.NodeList, maxNodesToDump int, additionalIPs, additionalPrivateIPs []string) error {
var special, regular, dumped []*corev1.Node
log.Printf("starting to dump %d nodes fetched through the Kubernetes APIs", len(nodes.Items))
for i := range nodes.Items {
node := &nodes.Items[i]
@ -139,8 +138,8 @@ func (d *logDumper) DumpAllNodes(ctx context.Context, nodes corev1.NodeList, add
}
for i := range regular {
if len(dumped) >= MaxNodesToDump {
log.Printf("stopping dumping nodes: %d nodes dumped", MaxNodesToDump)
if len(dumped) >= maxNodesToDump {
log.Printf("stopping dumping nodes: %d nodes dumped", maxNodesToDump)
return nil
}
node := regular[i]
@ -154,8 +153,8 @@ func (d *logDumper) DumpAllNodes(ctx context.Context, nodes corev1.NodeList, add
notDumped := findInstancesNotDumped(additionalIPs, dumped)
for _, ip := range notDumped {
if len(dumped) >= MaxNodesToDump {
log.Printf("stopping dumping nodes: %d nodes dumped", MaxNodesToDump)
if len(dumped) >= maxNodesToDump {
log.Printf("stopping dumping nodes: %d nodes dumped", maxNodesToDump)
return nil
}
err := d.dumpNotRegistered(ctx, ip, false)
@ -166,8 +165,8 @@ func (d *logDumper) DumpAllNodes(ctx context.Context, nodes corev1.NodeList, add
notDumped = findInstancesNotDumped(additionalPrivateIPs, dumped)
for _, ip := range notDumped {
if len(dumped) >= MaxNodesToDump {
log.Printf("stopping dumping nodes: %d nodes dumped", MaxNodesToDump)
if len(dumped) >= maxNodesToDump {
log.Printf("stopping dumping nodes: %d nodes dumped", maxNodesToDump)
return nil
}
err := d.dumpNotRegistered(ctx, ip, true)

View File

@ -63,6 +63,12 @@ func DumpManagedInstance(op *resources.DumpOperation, r *resources.Resource) err
klog.Warningf("instance %q not found", instance.Instance)
} else {
for _, ni := range instanceDetails.NetworkInterfaces {
if ni.NetworkIP != "" {
i.PrivateAddresses = append(i.PrivateAddresses, ni.NetworkIP)
}
if ni.Ipv6Address != "" {
i.PrivateAddresses = append(i.PrivateAddresses, ni.Ipv6Address)
}
for _, ac := range ni.AccessConfigs {
if ac.NatIP != "" {
i.PublicAddresses = append(i.PublicAddresses, ac.NatIP)

View File

@ -100,6 +100,8 @@ func (d *deployer) initialize() error {
d.SSHPublicKeyPath = publicKey
}
d.createBucket = true
} else if d.SSHPrivateKeyPath == "" && os.Getenv("KUBE_SSH_KEY_PATH") != "" {
d.SSHPrivateKeyPath = os.Getenv("KUBE_SSH_KEY_PATH")
}
}

View File

@ -68,6 +68,7 @@ type deployer struct {
ValidationWait time.Duration `flag:"validation-wait" desc:"time to wait for newly created cluster to pass validation"`
ValidationCount int `flag:"validation-count" desc:"how many times should a validation pass"`
ValidationInterval time.Duration `flag:"validation-interval" desc:"time in duration to wait between validation attempts"`
MaxNodesToDump string `flag:"max-nodes-to-dump" desc:"max number of nodes to dump logs from, helpful to set when running scale tests"`
TemplatePath string `flag:"template-path" desc:"The path to the manifest template used for cluster creation"`

View File

@ -44,6 +44,10 @@ func (d *deployer) DumpClusterLogs() error {
"--private-key", d.SSHPrivateKeyPath,
"--ssh-user", d.SSHUser,
}
if d.MaxNodesToDump != "" {
args = append(args, "--max-nodes", d.MaxNodesToDump)
}
klog.Info(strings.Join(args, " "))
cmd := exec.Command(args[0], args[1:]...)
cmd.SetEnv(append(d.env(), "KOPS_TOOLBOX_DUMP_K8S_RESOURCES=1")...)