boulder/cmd/log-validator/main.go

226 lines
6.3 KiB
Go

package notmain
import (
"encoding/base64"
"encoding/json"
"errors"
"flag"
"fmt"
"io/ioutil"
"os"
"strings"
"time"
"github.com/honeycombio/beeline-go"
"github.com/hpcloud/tail"
"github.com/prometheus/client_golang/prometheus"
"github.com/letsencrypt/boulder/cmd"
blog "github.com/letsencrypt/boulder/log"
)
var errInvalidChecksum = errors.New("invalid checksum length")
func lineValid(text string) error {
// Line format should match the following rsyslog omfile template:
//
// template( name="LELogFormat" type="list" ) {
// property(name="timereported" dateFormat="rfc3339")
// constant(value=" ")
// property(name="hostname" field.delimiter="46" field.number="1")
// constant(value=" datacenter ")
// property(name="syslogseverity")
// constant(value=" ")
// property(name="syslogtag")
// property(name="msg" spifno1stsp="on" )
// property(name="msg" droplastlf="on" )
// constant(value="\n")
// }
//
// This should result in a log line that looks like this:
// timestamp hostname datacenter syslogseverity binary-name[pid]: checksum msg
fields := strings.Split(text, " ")
const errorPrefix = "log-validator:"
// Extract checksum from line
if len(fields) < 6 {
return fmt.Errorf("%s line doesn't match expected format", errorPrefix)
}
checksum := fields[5]
_, err := base64.RawURLEncoding.DecodeString(checksum)
if err != nil || len(checksum) != 7 {
return fmt.Errorf(
"%s expected a 7 character base64 raw URL decodable string, got %q: %w",
errorPrefix,
checksum,
errInvalidChecksum,
)
}
// Reconstruct just the message portion of the line
line := strings.Join(fields[6:], " ")
// If we are fed our own output, treat it as always valid. This
// prevents runaway scenarios where we generate ever-longer output.
if strings.Contains(text, errorPrefix) {
return nil
}
// Check the extracted checksum against the computed checksum
if computedChecksum := blog.LogLineChecksum(line); checksum != computedChecksum {
return fmt.Errorf("%s invalid checksum (expected %q, got %q)", errorPrefix, computedChecksum, checksum)
}
return nil
}
func validateFile(filename string) error {
file, err := ioutil.ReadFile(filename)
if err != nil {
return err
}
badFile := false
for i, line := range strings.Split(string(file), "\n") {
if line == "" {
continue
}
if err := lineValid(line); err != nil {
badFile = true
fmt.Fprintf(os.Stderr, "[line %d] %s: %s\n", i+1, err, line)
}
}
if badFile {
return errors.New("file contained invalid lines")
}
return nil
}
// tailLogger is an adapter to the hpcloud/tail module's logging interface.
type tailLogger struct {
blog.Logger
}
func (tl tailLogger) Fatal(v ...interface{}) {
tl.AuditErr(fmt.Sprint(v...))
}
func (tl tailLogger) Fatalf(format string, v ...interface{}) {
tl.AuditErrf(format, v...)
}
func (tl tailLogger) Fatalln(v ...interface{}) {
tl.AuditErr(fmt.Sprint(v...) + "\n")
}
func (tl tailLogger) Panic(v ...interface{}) {
tl.AuditErr(fmt.Sprint(v...))
}
func (tl tailLogger) Panicf(format string, v ...interface{}) {
tl.AuditErrf(format, v...)
}
func (tl tailLogger) Panicln(v ...interface{}) {
tl.AuditErr(fmt.Sprint(v...) + "\n")
}
func (tl tailLogger) Print(v ...interface{}) {
tl.Info(fmt.Sprint(v...))
}
func (tl tailLogger) Printf(format string, v ...interface{}) {
tl.Infof(format, v...)
}
func (tl tailLogger) Println(v ...interface{}) {
tl.Info(fmt.Sprint(v...) + "\n")
}
type Config struct {
Files []string
DebugAddr string
Syslog cmd.SyslogConfig
Beeline cmd.BeelineConfig
}
func main() {
configPath := flag.String("config", "", "File path to the configuration file for this service")
checkFile := flag.String("check-file", "", "File path to a file to directly validate, if this argument is provided the config will not be parsed and only this file will be inspected")
flag.Parse()
if *checkFile != "" {
err := validateFile(*checkFile)
cmd.FailOnError(err, "validation failed")
return
}
configBytes, err := ioutil.ReadFile(*configPath)
cmd.FailOnError(err, "failed to read config file")
var config Config
err = json.Unmarshal(configBytes, &config)
cmd.FailOnError(err, "failed to parse config file")
bc, err := config.Beeline.Load()
cmd.FailOnError(err, "Failed to load Beeline config")
beeline.Init(bc)
defer beeline.Close()
stats, logger := cmd.StatsAndLogging(config.Syslog, config.DebugAddr)
lineCounter := prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "log_lines",
Help: "A counter of log lines processed, with status",
}, []string{"filename", "status"})
stats.MustRegister(lineCounter)
// Emit no more than 1 error line per second. This prevents consuming large
// amounts of disk space in case there is problem that causes all log lines to
// be invalid.
outputLimiter := time.NewTicker(time.Second)
var tailers []*tail.Tail
for _, filename := range config.Files {
t, err := tail.TailFile(filename, tail.Config{
ReOpen: true,
MustExist: false, // sometimes files won't exist, so we must tolerate that
Follow: true,
Logger: tailLogger{logger},
})
cmd.FailOnError(err, "failed to tail file")
go func() {
for line := range t.Lines {
if line.Err != nil {
logger.Errf("error while tailing %s: %s", t.Filename, err)
continue
}
if err := lineValid(line.Text); err != nil {
if errors.Is(err, errInvalidChecksum) {
lineCounter.WithLabelValues(t.Filename, "invalid checksum length").Inc()
} else {
lineCounter.WithLabelValues(t.Filename, "bad").Inc()
}
select {
case <-outputLimiter.C:
logger.Errf("%s: %s %q", t.Filename, err, line.Text)
default:
}
} else {
lineCounter.WithLabelValues(t.Filename, "ok").Inc()
}
}
}()
tailers = append(tailers, t)
}
cmd.CatchSignals(logger, func() {
for _, t := range tailers {
// The tail module seems to have a race condition that will generate
// errors like this on shutdown:
// failed to stop tailing file: <filename>: Failed to detect creation of
// <filename>: inotify watcher has been closed
// This is probably related to the module's shutdown logic triggering the
// "reopen" code path for files that are removed and then recreated.
// These errors are harmless so we ignore them to allow clean shutdown.
_ = t.Stop()
t.Cleanup()
}
})
}
func init() {
cmd.RegisterCommand("log-validator", main)
}