mirror of https://github.com/kubernetes/kops.git
Merge pull request #16266 from borg-land/dump-patch
Fix dumping logs for GCE scale tests
This commit is contained in:
commit
e5c4fe80df
|
@ -210,7 +210,14 @@ func RunToolboxDump(ctx context.Context, f commandutils.Factory, out io.Writer,
|
|||
return fmt.Errorf("adding key to SSH agent: %w", err)
|
||||
}
|
||||
|
||||
dumper := dump.NewLogDumper(cluster.ObjectMeta.Name, sshConfig, keyRing, options.Dir)
|
||||
// look for a bastion instance and use it if exists
|
||||
bastionAddress := ""
|
||||
for _, instance := range d.Instances {
|
||||
if strings.Contains(instance.Name, "bastion") {
|
||||
bastionAddress = instance.PublicAddresses[0]
|
||||
}
|
||||
}
|
||||
dumper := dump.NewLogDumper(bastionAddress, sshConfig, keyRing, options.Dir)
|
||||
|
||||
var additionalIPs []string
|
||||
var additionalPrivateIPs []string
|
||||
|
@ -224,7 +231,7 @@ func RunToolboxDump(ctx context.Context, f commandutils.Factory, out io.Writer,
|
|||
}
|
||||
}
|
||||
|
||||
if err := dumper.DumpAllNodes(ctx, nodes, additionalIPs, additionalPrivateIPs); err != nil {
|
||||
if err := dumper.DumpAllNodes(ctx, nodes, options.MaxNodes, additionalIPs, additionalPrivateIPs); err != nil {
|
||||
return fmt.Errorf("error dumping nodes: %v", err)
|
||||
}
|
||||
|
||||
|
|
|
@ -34,11 +34,6 @@ import (
|
|||
"k8s.io/klog/v2"
|
||||
)
|
||||
|
||||
const (
|
||||
// MaxNodesToDump is the maximum number of nodes to dump
|
||||
MaxNodesToDump = 500
|
||||
)
|
||||
|
||||
// logDumper gets all the nodes from a kubernetes cluster and dumps a well-known set of logs
|
||||
type logDumper struct {
|
||||
sshClientFactory sshClientFactory
|
||||
|
@ -51,12 +46,15 @@ type logDumper struct {
|
|||
}
|
||||
|
||||
// NewLogDumper is the constructor for a logDumper
|
||||
func NewLogDumper(clusterName string, sshConfig *ssh.ClientConfig, keyRing agent.Agent, artifactsDir string) *logDumper {
|
||||
func NewLogDumper(bastionAddress string, sshConfig *ssh.ClientConfig, keyRing agent.Agent, artifactsDir string) *logDumper {
|
||||
sshClientFactory := &sshClientFactoryImplementation{
|
||||
bastion: "bastion." + clusterName,
|
||||
keyRing: keyRing,
|
||||
sshConfig: sshConfig,
|
||||
}
|
||||
if bastionAddress != "" {
|
||||
log.Printf("detected a bastion instance, with the address: %s", bastionAddress)
|
||||
sshClientFactory.bastion = bastionAddress
|
||||
}
|
||||
|
||||
d := &logDumper{
|
||||
sshClientFactory: sshClientFactory,
|
||||
|
@ -106,9 +104,10 @@ func NewLogDumper(clusterName string, sshConfig *ssh.ClientConfig, keyRing agent
|
|||
// if the IPs are not found from kubectl get nodes, then these will be dumped also.
|
||||
// This allows for dumping log on nodes even if they don't register as a kubernetes
|
||||
// node, or if a node fails to register, or if the whole cluster fails to start.
|
||||
func (d *logDumper) DumpAllNodes(ctx context.Context, nodes corev1.NodeList, additionalIPs, additionalPrivateIPs []string) error {
|
||||
func (d *logDumper) DumpAllNodes(ctx context.Context, nodes corev1.NodeList, maxNodesToDump int, additionalIPs, additionalPrivateIPs []string) error {
|
||||
var special, regular, dumped []*corev1.Node
|
||||
|
||||
log.Printf("starting to dump %d nodes fetched through the Kubernetes APIs", len(nodes.Items))
|
||||
for i := range nodes.Items {
|
||||
node := &nodes.Items[i]
|
||||
|
||||
|
@ -139,8 +138,8 @@ func (d *logDumper) DumpAllNodes(ctx context.Context, nodes corev1.NodeList, add
|
|||
}
|
||||
|
||||
for i := range regular {
|
||||
if len(dumped) >= MaxNodesToDump {
|
||||
log.Printf("stopping dumping nodes: %d nodes dumped", MaxNodesToDump)
|
||||
if len(dumped) >= maxNodesToDump {
|
||||
log.Printf("stopping dumping nodes: %d nodes dumped", maxNodesToDump)
|
||||
return nil
|
||||
}
|
||||
node := regular[i]
|
||||
|
@ -154,8 +153,8 @@ func (d *logDumper) DumpAllNodes(ctx context.Context, nodes corev1.NodeList, add
|
|||
|
||||
notDumped := findInstancesNotDumped(additionalIPs, dumped)
|
||||
for _, ip := range notDumped {
|
||||
if len(dumped) >= MaxNodesToDump {
|
||||
log.Printf("stopping dumping nodes: %d nodes dumped", MaxNodesToDump)
|
||||
if len(dumped) >= maxNodesToDump {
|
||||
log.Printf("stopping dumping nodes: %d nodes dumped", maxNodesToDump)
|
||||
return nil
|
||||
}
|
||||
err := d.dumpNotRegistered(ctx, ip, false)
|
||||
|
@ -166,8 +165,8 @@ func (d *logDumper) DumpAllNodes(ctx context.Context, nodes corev1.NodeList, add
|
|||
|
||||
notDumped = findInstancesNotDumped(additionalPrivateIPs, dumped)
|
||||
for _, ip := range notDumped {
|
||||
if len(dumped) >= MaxNodesToDump {
|
||||
log.Printf("stopping dumping nodes: %d nodes dumped", MaxNodesToDump)
|
||||
if len(dumped) >= maxNodesToDump {
|
||||
log.Printf("stopping dumping nodes: %d nodes dumped", maxNodesToDump)
|
||||
return nil
|
||||
}
|
||||
err := d.dumpNotRegistered(ctx, ip, true)
|
||||
|
|
|
@ -63,6 +63,12 @@ func DumpManagedInstance(op *resources.DumpOperation, r *resources.Resource) err
|
|||
klog.Warningf("instance %q not found", instance.Instance)
|
||||
} else {
|
||||
for _, ni := range instanceDetails.NetworkInterfaces {
|
||||
if ni.NetworkIP != "" {
|
||||
i.PrivateAddresses = append(i.PrivateAddresses, ni.NetworkIP)
|
||||
}
|
||||
if ni.Ipv6Address != "" {
|
||||
i.PrivateAddresses = append(i.PrivateAddresses, ni.Ipv6Address)
|
||||
}
|
||||
for _, ac := range ni.AccessConfigs {
|
||||
if ac.NatIP != "" {
|
||||
i.PublicAddresses = append(i.PublicAddresses, ac.NatIP)
|
||||
|
|
|
@ -100,6 +100,8 @@ func (d *deployer) initialize() error {
|
|||
d.SSHPublicKeyPath = publicKey
|
||||
}
|
||||
d.createBucket = true
|
||||
} else if d.SSHPrivateKeyPath == "" && os.Getenv("KUBE_SSH_KEY_PATH") != "" {
|
||||
d.SSHPrivateKeyPath = os.Getenv("KUBE_SSH_KEY_PATH")
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -68,6 +68,7 @@ type deployer struct {
|
|||
ValidationWait time.Duration `flag:"validation-wait" desc:"time to wait for newly created cluster to pass validation"`
|
||||
ValidationCount int `flag:"validation-count" desc:"how many times should a validation pass"`
|
||||
ValidationInterval time.Duration `flag:"validation-interval" desc:"time in duration to wait between validation attempts"`
|
||||
MaxNodesToDump string `flag:"max-nodes-to-dump" desc:"max number of nodes to dump logs from, helpful to set when running scale tests"`
|
||||
|
||||
TemplatePath string `flag:"template-path" desc:"The path to the manifest template used for cluster creation"`
|
||||
|
||||
|
|
|
@ -44,6 +44,10 @@ func (d *deployer) DumpClusterLogs() error {
|
|||
"--private-key", d.SSHPrivateKeyPath,
|
||||
"--ssh-user", d.SSHUser,
|
||||
}
|
||||
|
||||
if d.MaxNodesToDump != "" {
|
||||
args = append(args, "--max-nodes", d.MaxNodesToDump)
|
||||
}
|
||||
klog.Info(strings.Join(args, " "))
|
||||
cmd := exec.Command(args[0], args[1:]...)
|
||||
cmd.SetEnv(append(d.env(), "KOPS_TOOLBOX_DUMP_K8S_RESOURCES=1")...)
|
||||
|
|
Loading…
Reference in New Issue