opentelemetry-collector/component/componentstatus/status.go

161 lines
5.7 KiB
Go

// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0
// Package componentstatus is an experimental module that defines how components should
// report health statues, how collector hosts should facilitate component status reporting,
// and how extensions should watch for new component statuses.
//
// This package is currently under development and is exempt from the Collector SIG's
// breaking change policy.
package componentstatus // import "go.opentelemetry.io/collector/component/componentstatus"
import (
"time"
"go.opentelemetry.io/collector/component"
)
// Reporter is an extra interface for `component.Host` implementations.
// A Reporter defines how to report a `componentstatus.Event`.
type Reporter interface {
// Report allows a component to report runtime changes in status. The service
// will automatically report status for a component during startup and shutdown. Components can
// use this method to report status after start and before shutdown. For more details about
// component status reporting see: https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/component-status.md
Report(*Event)
}
// Watcher is an extra interface for Extension hosted by the OpenTelemetry
// Collector that is to be implemented by extensions interested in changes to component
// status.
//
// TODO: consider moving this interface to a new package/module like `extension/statuswatcher`
// https://github.com/open-telemetry/opentelemetry-collector/issues/10764
type Watcher interface {
// ComponentStatusChanged notifies about a change in the source component status.
// Extensions that implement this interface must be ready that the ComponentStatusChanged
// may be called before, after or concurrently with calls to Component.Start() and Component.Shutdown().
// The function may be called concurrently with itself.
ComponentStatusChanged(source *InstanceID, event *Event)
}
type Status int32
// Enumeration of possible component statuses
const (
// StatusNone indicates absence of component status.
StatusNone Status = iota
// StatusStarting indicates the component is starting.
StatusStarting
// StatusOK indicates the component is running without issues.
StatusOK
// StatusRecoverableError indicates that the component has experienced a transient error and may recover.
StatusRecoverableError
// StatusPermanentError indicates that the component has detected a condition at runtime that will need human intervention to fix. The collector will continue to run in a degraded mode.
StatusPermanentError
// StatusFatalError indicates that the collector has experienced a fatal runtime error and will shut down.
StatusFatalError
// StatusStopping indicates that the component is in the process of shutting down.
StatusStopping
// StatusStopped indicates that the component has completed shutdown.
StatusStopped
)
// String returns a string representation of a Status
func (s Status) String() string {
switch s {
case StatusStarting:
return "StatusStarting"
case StatusOK:
return "StatusOK"
case StatusRecoverableError:
return "StatusRecoverableError"
case StatusPermanentError:
return "StatusPermanentError"
case StatusFatalError:
return "StatusFatalError"
case StatusStopping:
return "StatusStopping"
case StatusStopped:
return "StatusStopped"
}
return "StatusNone"
}
// Event contains a status and timestamp, and can contain an error
type Event struct {
status Status
err error
// TODO: consider if a timestamp is necessary in the default Event struct or is needed only for the healthcheckv2 extension
// https://github.com/open-telemetry/opentelemetry-collector/issues/10763
timestamp time.Time
}
// Status returns the Status (enum) associated with the Event
func (ev *Event) Status() Status {
return ev.status
}
// Err returns the error associated with the Event.
func (ev *Event) Err() error {
return ev.err
}
// Timestamp returns the timestamp associated with the Event
func (ev *Event) Timestamp() time.Time {
return ev.timestamp
}
// NewEvent creates and returns a Event with the specified status and sets the timestamp
// time.Now(). To set an error on the event for an error status use one of the dedicated
// constructors (e.g. NewRecoverableErrorEvent, NewPermanentErrorEvent, NewFatalErrorEvent)
func NewEvent(status Status) *Event {
return &Event{
status: status,
timestamp: time.Now(),
}
}
// NewRecoverableErrorEvent wraps a transient error
// passed as argument as a Event with a status StatusRecoverableError
// and a timestamp set to time.Now().
func NewRecoverableErrorEvent(err error) *Event {
ev := NewEvent(StatusRecoverableError)
ev.err = err
return ev
}
// NewPermanentErrorEvent wraps an error requiring human intervention to fix
// passed as argument as a Event with a status StatusPermanentError
// and a timestamp set to time.Now().
func NewPermanentErrorEvent(err error) *Event {
ev := NewEvent(StatusPermanentError)
ev.err = err
return ev
}
// NewFatalErrorEvent wraps the fatal runtime error passed as argument as a Event
// with a status StatusFatalError and a timestamp set to time.Now().
func NewFatalErrorEvent(err error) *Event {
ev := NewEvent(StatusFatalError)
ev.err = err
return ev
}
// StatusIsError returns true for error statuses (e.g. StatusRecoverableError,
// StatusPermanentError, or StatusFatalError)
func StatusIsError(status Status) bool {
return status == StatusRecoverableError ||
status == StatusPermanentError ||
status == StatusFatalError
}
// ReportStatus is a helper function that handles checking if the component.Host has implemented Reporter.
// If it has, the Event is reported. Otherwise, nothing happens.
func ReportStatus(host component.Host, e *Event) {
statusReporter, ok := host.(Reporter)
if ok {
statusReporter.Report(e)
}
}