Dump pod logs in parallel

This commit is contained in:
Peter Rifel 2024-01-01 13:11:25 -06:00
parent db32f982d1
commit 8ad0661975
No known key found for this signature in database
2 changed files with 170 additions and 0 deletions

View File

@ -235,6 +235,14 @@ func RunToolboxDump(ctx context.Context, f commandutils.Factory, out io.Writer,
if err := dumper.DumpResources(ctx); err != nil { if err := dumper.DumpResources(ctx); err != nil {
return fmt.Errorf("error dumping resources: %w", err) return fmt.Errorf("error dumping resources: %w", err)
} }
logDumper, err := dump.NewPodLogDumper(config, options.Dir)
if err != nil {
return fmt.Errorf("error creating pod log dumper: %w", err)
}
if err := logDumper.DumpLogs(ctx); err != nil {
return fmt.Errorf("error dumping pod logs: %w", err)
}
} }
} }

162
pkg/dump/podlogs.go Normal file
View File

@ -0,0 +1,162 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package dump
import (
"context"
"errors"
"fmt"
"os"
"path"
v1 "k8s.io/api/core/v1"
k8sErrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/klog/v2"
)
const (
podLogDumpConcurrency = 20
)
type podLogDumper struct {
k8sClient *kubernetes.Clientset
artifactsDir string
}
type podLogDumpResult struct {
err error
}
func NewPodLogDumper(k8sConfig *rest.Config, artifactsDir string) (*podLogDumper, error) {
k8sConfig.QPS = 50
k8sConfig.Burst = 100
clientSet, err := kubernetes.NewForConfig(k8sConfig)
if err != nil {
return nil, fmt.Errorf("creating clientset: %w", err)
}
return &podLogDumper{
k8sClient: clientSet,
artifactsDir: artifactsDir,
}, nil
}
func (d *podLogDumper) DumpLogs(ctx context.Context) error {
klog.Info("Dumping k8s pod logs")
allPods, err := d.k8sClient.CoreV1().Pods(v1.NamespaceAll).List(ctx, metav1.ListOptions{})
if err != nil {
return fmt.Errorf("listing pods: %w", err)
}
jobs := make(chan v1.Pod, len(allPods.Items))
results := make(chan podLogDumpResult, len(allPods.Items))
for i := 0; i < podLogDumpConcurrency; i++ {
go d.getPodLogs(ctx, jobs, results)
}
var dumpErr error
for _, pod := range allPods.Items {
jobs <- pod
}
close(jobs)
for i := 0; i < len(allPods.Items); i++ {
result := <-results
if result.err != nil {
errors.Join(dumpErr, result.err)
}
}
close(results)
return dumpErr
}
func (d *podLogDumper) getPodLogs(ctx context.Context, pods chan v1.Pod, results chan podLogDumpResult) {
for pod := range pods {
for _, container := range pod.Spec.Containers {
resPath := path.Join(d.artifactsDir, "cluster-info", pod.Namespace, pod.Name, container.Name+".log")
err := os.MkdirAll(path.Dir(resPath), 0755)
if err != nil {
results <- podLogDumpResult{
err: fmt.Errorf("creating directory %q: %w", resPath, err),
}
continue
}
resFile, err := os.Create(resPath)
if err != nil {
results <- podLogDumpResult{
err: fmt.Errorf("creating file %q: %w", resPath, err),
}
continue
}
prevResp, err := d.k8sClient.CoreV1().Pods(pod.Namespace).GetLogs(pod.Name, &v1.PodLogOptions{Container: container.Name, Previous: true}).Do(ctx).Raw()
hasPrevious := true
var statusErr *k8sErrors.StatusError
if errors.As(err, &statusErr) {
if statusErr.ErrStatus.Code == 400 {
hasPrevious = false
} else {
results <- podLogDumpResult{
err: fmt.Errorf("getting pod logs for %v/%v: %w", pod.Namespace, pod.Name, err),
}
continue
}
}
resp, err := d.k8sClient.CoreV1().Pods(pod.Namespace).GetLogs(pod.Name, &v1.PodLogOptions{Container: container.Name}).Do(ctx).Raw()
if err != nil {
results <- podLogDumpResult{
err: fmt.Errorf("getting pod logs for %v/%v: %w", pod.Namespace, pod.Name, err),
}
continue
}
suffix := fmt.Sprintf("container %v of pod %v/%v ====\n", container.Name, pod.Namespace, pod.Name)
if hasPrevious {
contents := []byte(fmt.Sprintf("==== START logs for PREVIOUS %v", suffix))
contents = append(contents, prevResp...)
contents = append(contents, []byte(fmt.Sprintf("==== END logs for PREVIOUS %v", suffix))...)
_, err = resFile.Write(contents)
if err != nil {
results <- podLogDumpResult{
err: fmt.Errorf("writing pod logs for %v/%v: %w", pod.Namespace, pod.Name, err),
}
continue
}
}
contents := []byte(fmt.Sprintf("==== START logs for CURRENT %v", suffix))
contents = append(contents, resp...)
contents = append(contents, []byte(fmt.Sprintf("==== END logs for CURRENT %v", suffix))...)
_, err = resFile.Write(contents)
if err != nil {
results <- podLogDumpResult{
err: fmt.Errorf("writing pod logs for %v/%v: %w", pod.Namespace, pod.Name, err),
}
continue
}
}
results <- podLogDumpResult{}
}
}