apimachinery runtime: support contextual logging

In contrast to the original HandleError and HandleCrash, the new
HandleErrorWithContext and HandleCrashWithContext functions properly do contextual
logging, so if a problem occurs while e.g. dealing with a certain request and
WithValues was used for that request, then the error log entry will also
contain information about it.

The output changes from unstructured to structured, which might be a breaking
change for users who grep for panics. Care was taken to format panics
as similar as possible to the original output.

For errors, a message string gets added. There was none before, which made it
impossible to find all error output coming from HandleError.

Keeping HandleError and HandleCrash around without deprecating while changing
the signature of callbacks is a compromise between not breaking existing code
and not adding too many special cases that need to be supported. There is some
code which uses PanicHandlers or ErrorHandlers, but less than code that uses
the Handle* calls.

In Kubernetes, we want to replace the calls. logcheck warns about them in code
which is supposed to be contextual. The steps towards that are:
- add TODO remarks as reminder (this commit)
- locally remove " TODO(pohly): " to enable the check with `//logcheck:context`,
  merge fixes for linter warnings
- once there are none, remove the TODO to enable the check permanently

Kubernetes-commit: 5a130d2b71e5d70cfff15087f4d521c6b68fb01e
This commit is contained in:
Patrick Ohly 2023-11-20 20:25:00 +01:00 committed by Kubernetes Publisher
parent ba6e7aabe5
commit d712a4ee7e
4 changed files with 9 additions and 8 deletions

View File

@ -137,9 +137,7 @@ func (t *timeoutHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
}
metrics.RecordRequestPostTimeout(metrics.PostTimeoutSourceTimeoutHandler, status)
err := fmt.Errorf("post-timeout activity - time-elapsed: %s, %v %q result: %v",
time.Since(timedOutAt), r.Method, r.URL.Path, res)
utilruntime.HandleError(err)
utilruntime.HandleErrorWithContext(r.Context(), nil, "Post-timeout activity", "timeElapsed", time.Since(timedOutAt), "method", r.Method, "path", r.URL.Path, "result", res)
}()
}()
httplog.SetStacktracePredicate(r.Context(), func(status int) bool {

View File

@ -408,7 +408,9 @@ func TestErrConnKilled(t *testing.T) {
if isStackTraceLoggedByRuntime(capturedOutput) {
t.Errorf("unexpected stack trace in log, actual = %v", capturedOutput)
}
if !strings.Contains(capturedOutput, `timeout or abort while handling: method=GET URI="/" audit-ID=""`) {
// For the sake of simplicity and clarity this matches the full log line.
// This is not part of the Kubernetes API and could change.
if !strings.Contains(capturedOutput, `"Timeout or abort while handling" logger="UnhandledError" method="GET" URI="/" auditID=""`) {
t.Errorf("unexpected output captured actual = %v", capturedOutput)
}
}
@ -501,7 +503,9 @@ func TestErrConnKilledHTTP2(t *testing.T) {
if isStackTraceLoggedByRuntime(capturedOutput) {
t.Errorf("unexpected stack trace in log, actual = %v", capturedOutput)
}
if !strings.Contains(capturedOutput, `timeout or abort while handling: method=GET URI="/" audit-ID=""`) {
// For the sake of simplicity and clarity this matches the full log line.
// This is not part of the Kubernetes API and could change.
if !strings.Contains(capturedOutput, `"Timeout or abort while handling" logger="UnhandledError" method="GET" URI="/" auditID=""`) {
t.Errorf("unexpected output captured actual = %v", capturedOutput)
}

View File

@ -17,7 +17,6 @@ limitations under the License.
package filters
import (
"fmt"
"net/http"
"k8s.io/apimachinery/pkg/util/runtime"
@ -51,7 +50,7 @@ func WithPanicRecovery(handler http.Handler, resolver request.RequestInfoResolve
// This call can have different handlers, but the default chain rate limits. Call it after the metrics are updated
// in case the rate limit delays it. If you outrun the rate for this one timed out requests, something has gone
// seriously wrong with your server, but generally having a logging signal for timeouts is useful.
runtime.HandleError(fmt.Errorf("timeout or abort while handling: method=%v URI=%q audit-ID=%q", req.Method, req.RequestURI, audit.GetAuditIDTruncated(req.Context())))
runtime.HandleErrorWithContext(req.Context(), nil, "Timeout or abort while handling", "method", req.Method, "URI", req.RequestURI, "auditID", audit.GetAuditIDTruncated(req.Context()))
return
}
http.Error(w, "This request caused apiserver to panic. Look in the logs for details.", http.StatusInternalServerError)

View File

@ -79,7 +79,7 @@ func webSocketServerStreams(req *http.Request, w http.ResponseWriter, opts Optio
if p := recover(); p != nil {
// Standard panic logging.
for _, fn := range runtime.PanicHandlers {
fn(p)
fn(req.Context(), p)
}
}
}()