Make log-validator take glob patterns to monitor for log files (#7172)
To simplify deployment of the log validator, this allows wildcards (using go's filepath.Glob) to be included in the file paths. In order to detect new files, a new background goroutine polls the glob patterns every minute for matches. Because the "monitor" function is running in its own goroutine, a lock is needed to ensure it's not trying to add new tailers while shutdown is happening.
This commit is contained in:
parent
a31429fb3e
commit
32adaf1846
|
@ -38,14 +38,9 @@ func main() {
|
|||
defer oTelShutdown(context.Background())
|
||||
logger.Info(cmd.VersionString())
|
||||
|
||||
v := validator.New(logger, stats)
|
||||
v := validator.New(config.Files, logger, stats)
|
||||
defer v.Shutdown()
|
||||
|
||||
for _, filename := range config.Files {
|
||||
err := v.TailValidateFile(filename)
|
||||
cmd.FailOnError(err, "failed to tail file")
|
||||
}
|
||||
|
||||
cmd.WaitForSignal()
|
||||
}
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@ import (
|
|||
"github.com/letsencrypt/boulder/log"
|
||||
)
|
||||
|
||||
// tailLogger is an adapter to the hpcloud/tail module's logging interface.
|
||||
// tailLogger is an adapter to the nxadm/tail module's logging interface.
|
||||
type tailLogger struct {
|
||||
log.Logger
|
||||
}
|
||||
|
|
|
@ -1,11 +1,14 @@
|
|||
package validator
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/nxadm/tail"
|
||||
|
@ -17,73 +20,132 @@ import (
|
|||
var errInvalidChecksum = errors.New("invalid checksum length")
|
||||
|
||||
type Validator struct {
|
||||
tailers []*tail.Tail
|
||||
// mu guards patterns and tailers to prevent Shutdown racing monitor
|
||||
mu sync.Mutex
|
||||
|
||||
// patterns is the list of glob patterns to monitor with filepath.Glob for logs
|
||||
patterns []string
|
||||
|
||||
// tailers is a map of filenames to the tailer which are currently being tailed
|
||||
tailers map[string]*tail.Tail
|
||||
|
||||
// monitorCancel cancels the monitor's context, so it exits
|
||||
monitorCancel context.CancelFunc
|
||||
|
||||
lineCounter *prometheus.CounterVec
|
||||
log log.Logger
|
||||
}
|
||||
|
||||
func New(logger log.Logger, stats prometheus.Registerer) *Validator {
|
||||
// New Validator monitoring paths, which is a list of file globs.
|
||||
func New(patterns []string, logger log.Logger, stats prometheus.Registerer) *Validator {
|
||||
lineCounter := prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: "log_lines",
|
||||
Help: "A counter of log lines processed, with status",
|
||||
}, []string{"filename", "status"})
|
||||
stats.MustRegister(lineCounter)
|
||||
|
||||
return &Validator{log: logger, lineCounter: lineCounter}
|
||||
}
|
||||
monitorContext, monitorCancel := context.WithCancel(context.Background())
|
||||
|
||||
// TailValidateFile takes a filename, and starts tailing it.
|
||||
// An error is returned if the file couldn't be opened.
|
||||
func (v *Validator) TailValidateFile(filename string) error {
|
||||
t, err := tail.TailFile(filename, tail.Config{
|
||||
ReOpen: true,
|
||||
MustExist: false, // sometimes files won't exist, so we must tolerate that
|
||||
Follow: true,
|
||||
Logger: tailLogger{v.log},
|
||||
CompleteLines: true,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
v := &Validator{
|
||||
patterns: patterns,
|
||||
tailers: map[string]*tail.Tail{},
|
||||
log: logger,
|
||||
monitorCancel: monitorCancel,
|
||||
lineCounter: lineCounter,
|
||||
}
|
||||
|
||||
go func() {
|
||||
// Emit no more than 1 error line per second. This prevents consuming large
|
||||
// amounts of disk space in case there is problem that causes all log lines to
|
||||
// be invalid.
|
||||
outputLimiter := time.NewTicker(time.Second)
|
||||
defer outputLimiter.Stop()
|
||||
go v.monitor(monitorContext)
|
||||
|
||||
for line := range t.Lines {
|
||||
if line.Err != nil {
|
||||
v.log.Errf("error while tailing %s: %s", t.Filename, line.Err)
|
||||
return v
|
||||
}
|
||||
|
||||
// pollPaths expands v.patterns and calls v.tailValidateFile on each resulting file
|
||||
func (v *Validator) pollPaths() {
|
||||
v.mu.Lock()
|
||||
defer v.mu.Unlock()
|
||||
for _, pattern := range v.patterns {
|
||||
paths, err := filepath.Glob(pattern)
|
||||
if err != nil {
|
||||
v.log.Err(err.Error())
|
||||
}
|
||||
|
||||
for _, path := range paths {
|
||||
if _, ok := v.tailers[path]; ok {
|
||||
// We are already tailing this file
|
||||
continue
|
||||
}
|
||||
err := lineValid(line.Text)
|
||||
|
||||
t, err := tail.TailFile(path, tail.Config{
|
||||
ReOpen: true,
|
||||
MustExist: false, // sometimes files won't exist, so we must tolerate that
|
||||
Follow: true,
|
||||
Logger: tailLogger{v.log},
|
||||
CompleteLines: true,
|
||||
})
|
||||
if err != nil {
|
||||
if errors.Is(err, errInvalidChecksum) {
|
||||
v.lineCounter.WithLabelValues(t.Filename, "invalid checksum length").Inc()
|
||||
} else {
|
||||
v.lineCounter.WithLabelValues(t.Filename, "bad").Inc()
|
||||
}
|
||||
select {
|
||||
case <-outputLimiter.C:
|
||||
v.log.Errf("%s: %s %q", t.Filename, err, line.Text)
|
||||
default:
|
||||
}
|
||||
} else {
|
||||
v.lineCounter.WithLabelValues(t.Filename, "ok").Inc()
|
||||
// TailFile shouldn't error when MustExist is false
|
||||
v.log.Errf("unexpected error from TailFile: %v", err)
|
||||
}
|
||||
|
||||
go v.tailValidate(path, t.Lines)
|
||||
|
||||
v.tailers[path] = t
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
v.tailers = append(v.tailers, t)
|
||||
// Monitor calls v.pollPaths every minute until its context is cancelled
|
||||
func (v *Validator) monitor(ctx context.Context) {
|
||||
for {
|
||||
v.pollPaths()
|
||||
|
||||
return nil
|
||||
// Wait a minute, unless cancelled
|
||||
timer := time.NewTimer(time.Minute)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-timer.C:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (v *Validator) tailValidate(filename string, lines chan *tail.Line) {
|
||||
// Emit no more than 1 error line per second. This prevents consuming large
|
||||
// amounts of disk space in case there is problem that causes all log lines to
|
||||
// be invalid.
|
||||
outputLimiter := time.NewTicker(time.Second)
|
||||
defer outputLimiter.Stop()
|
||||
|
||||
for line := range lines {
|
||||
if line.Err != nil {
|
||||
v.log.Errf("error while tailing %s: %s", filename, line.Err)
|
||||
continue
|
||||
}
|
||||
err := lineValid(line.Text)
|
||||
if err != nil {
|
||||
if errors.Is(err, errInvalidChecksum) {
|
||||
v.lineCounter.WithLabelValues(filename, "invalid checksum length").Inc()
|
||||
} else {
|
||||
v.lineCounter.WithLabelValues(filename, "bad").Inc()
|
||||
}
|
||||
select {
|
||||
case <-outputLimiter.C:
|
||||
v.log.Errf("%s: %s %q", filename, err, line.Text)
|
||||
default:
|
||||
}
|
||||
} else {
|
||||
v.lineCounter.WithLabelValues(filename, "ok").Inc()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Shutdown should be called before process shutdown
|
||||
func (v *Validator) Shutdown() {
|
||||
v.mu.Lock()
|
||||
defer v.mu.Unlock()
|
||||
|
||||
v.monitorCancel()
|
||||
|
||||
for _, t := range v.tailers {
|
||||
// The tail module seems to have a race condition that will generate
|
||||
// errors like this on shutdown:
|
||||
|
|
|
@ -10,16 +10,8 @@
|
|||
"files": [
|
||||
"/var/log/akamai-purger.log",
|
||||
"/var/log/bad-key-revoker.log",
|
||||
"/var/log/boulder-ca.log",
|
||||
"/var/log/boulder-observer.log",
|
||||
"/var/log/boulder-publisher.log",
|
||||
"/var/log/boulder-ra.log",
|
||||
"/var/log/boulder-remoteva.log",
|
||||
"/var/log/boulder-sa.log",
|
||||
"/var/log/boulder-va.log",
|
||||
"/var/log/boulder-wfe2.log",
|
||||
"/var/log/crl-storer.log",
|
||||
"/var/log/crl-updater.log",
|
||||
"/var/log/boulder-*.log",
|
||||
"/var/log/crl-*.log",
|
||||
"/var/log/nonce-service.log",
|
||||
"/var/log/ocsp-responder.log"
|
||||
]
|
||||
|
|
Loading…
Reference in New Issue