boulder/cmd/cert-checker/main.go

441 lines
14 KiB
Go

package main
import (
"bytes"
"encoding/json"
"flag"
"fmt"
"log/syslog"
"os"
"reflect"
"regexp"
"runtime"
"sync"
"sync/atomic"
"time"
"github.com/jmhodges/clock"
"github.com/prometheus/client_golang/prometheus"
"github.com/zmap/zcrypto/x509"
"github.com/zmap/zlint/v2"
"github.com/zmap/zlint/v2/lint"
"github.com/letsencrypt/boulder/cmd"
"github.com/letsencrypt/boulder/core"
"github.com/letsencrypt/boulder/features"
"github.com/letsencrypt/boulder/identifier"
blog "github.com/letsencrypt/boulder/log"
"github.com/letsencrypt/boulder/policy"
"github.com/letsencrypt/boulder/sa"
)
const (
expectedValidityPeriod = time.Hour * 24 * 90
)
// For defense-in-depth in addition to using the PA & its hostnamePolicy to
// check domain names we also perform a check against the regex's from the
// forbiddenDomains array
var forbiddenDomainPatterns = []*regexp.Regexp{
regexp.MustCompile(`^\s*$`),
regexp.MustCompile(`\.local$`),
regexp.MustCompile(`^localhost$`),
regexp.MustCompile(`\.localhost$`),
}
func isForbiddenDomain(name string) (bool, string) {
for _, r := range forbiddenDomainPatterns {
if matches := r.FindAllStringSubmatch(name, -1); len(matches) > 0 {
return true, r.String()
}
}
return false, ""
}
var batchSize = 1000
type report struct {
begin time.Time
end time.Time
GoodCerts int64 `json:"good-certs"`
BadCerts int64 `json:"bad-certs"`
Entries map[string]reportEntry `json:"entries"`
}
func (r *report) dump() error {
content, err := json.MarshalIndent(r, "", " ")
if err != nil {
return err
}
fmt.Fprintln(os.Stdout, string(content))
return nil
}
type reportEntry struct {
Valid bool `json:"valid"`
Problems []string `json:"problems,omitempty"`
}
/*
* certDB is an interface collecting the gorp.DbMap functions that the
* various parts of cert-checker rely on. Using this adapter shim allows tests to
* swap out the dbMap implementation.
*/
type certDB interface {
Select(i interface{}, query string, args ...interface{}) ([]interface{}, error)
SelectInt(query string, args ...interface{}) (int64, error)
}
type certChecker struct {
pa core.PolicyAuthority
dbMap certDB
certs chan core.Certificate
clock clock.Clock
rMu *sync.Mutex
issuedReport report
checkPeriod time.Duration
}
func newChecker(saDbMap certDB, clk clock.Clock, pa core.PolicyAuthority, period time.Duration) certChecker {
c := certChecker{
pa: pa,
dbMap: saDbMap,
certs: make(chan core.Certificate, batchSize),
rMu: new(sync.Mutex),
clock: clk,
checkPeriod: period,
}
c.issuedReport.Entries = make(map[string]reportEntry)
return c
}
func (c *certChecker) getCerts(unexpiredOnly bool) error {
c.issuedReport.end = c.clock.Now()
c.issuedReport.begin = c.issuedReport.end.Add(-c.checkPeriod)
args := map[string]interface{}{"issued": c.issuedReport.begin, "now": 0}
if unexpiredOnly {
now := c.clock.Now()
args["now"] = now
}
count, err := c.dbMap.SelectInt(
"SELECT count(*) FROM certificates WHERE issued >= :issued AND expires >= :now",
args,
)
if err != nil {
return err
}
initialID, err := c.dbMap.SelectInt(
"SELECT id FROM certificates WHERE issued >= :issued AND expires >= :now LIMIT 1",
args,
)
if err != nil {
return err
}
if initialID > 0 {
// decrement the initial ID so that we select below as we aren't using >=
initialID -= 1
}
// Retrieve certs in batches of 1000 (the size of the certificate channel)
// so that we don't eat unnecessary amounts of memory and avoid the 16MB MySQL
// packet limit.
args["limit"] = batchSize
args["id"] = initialID
for offset := 0; offset < int(count); {
certs, err := sa.SelectCertificates(
c.dbMap,
"WHERE id > :id AND expires >= :now ORDER BY id LIMIT :limit",
args,
)
if err != nil {
return err
}
for _, cert := range certs {
c.certs <- cert.Certificate
}
if len(certs) == 0 {
break
}
args["id"] = certs[len(certs)-1].ID
offset += len(certs)
}
// Close channel so range operations won't block once the channel empties out
close(c.certs)
return nil
}
func (c *certChecker) processCerts(wg *sync.WaitGroup, badResultsOnly bool, ignoredLints map[string]bool) {
for cert := range c.certs {
problems := c.checkCert(cert, ignoredLints)
valid := len(problems) == 0
c.rMu.Lock()
if !badResultsOnly || (badResultsOnly && !valid) {
c.issuedReport.Entries[cert.Serial] = reportEntry{
Valid: valid,
Problems: problems,
}
}
c.rMu.Unlock()
if !valid {
atomic.AddInt64(&c.issuedReport.BadCerts, 1)
} else {
atomic.AddInt64(&c.issuedReport.GoodCerts, 1)
}
}
wg.Done()
}
// Extensions that we allow in certificates
var allowedExtensions = map[string]bool{
"1.3.6.1.5.5.7.1.1": true, // Authority info access
"2.5.29.35": true, // Authority key identifier
"2.5.29.19": true, // Basic constraints
"2.5.29.32": true, // Certificate policies
"2.5.29.31": true, // CRL distribution points
"2.5.29.37": true, // Extended key usage
"2.5.29.15": true, // Key usage
"2.5.29.17": true, // Subject alternative name
"2.5.29.14": true, // Subject key identifier
"1.3.6.1.4.1.11129.2.4.2": true, // SCT list
"1.3.6.1.5.5.7.1.24": true, // TLS feature
}
// For extensions that have a fixed value we check that it contains that value
var expectedExtensionContent = map[string][]byte{
"1.3.6.1.5.5.7.1.24": {0x30, 0x03, 0x02, 0x01, 0x05}, // Must staple feature
}
func (c *certChecker) checkCert(cert core.Certificate, ignoredLints map[string]bool) (problems []string) {
// Check digests match
if cert.Digest != core.Fingerprint256(cert.DER) {
problems = append(problems, "Stored digest doesn't match certificate digest")
}
// Parse certificate
parsedCert, err := x509.ParseCertificate(cert.DER)
if err != nil {
problems = append(problems, fmt.Sprintf("Couldn't parse stored certificate: %s", err))
} else {
// Run zlint checks
results := zlint.LintCertificate(parsedCert)
for name, res := range results.Results {
if ignoredLints[name] || res.Status <= lint.Pass {
continue
}
prob := fmt.Sprintf("zlint %s: %s", res.Status, name)
if res.Details != "" {
prob = fmt.Sprintf("%s %s", prob, res.Details)
}
problems = append(problems, prob)
}
// Check stored serial is correct
storedSerial, err := core.StringToSerial(cert.Serial)
if err != nil {
problems = append(problems, "Stored serial is invalid")
} else if parsedCert.SerialNumber.Cmp(storedSerial) != 0 {
problems = append(problems, "Stored serial doesn't match certificate serial")
}
// Check we have the right expiration time
if !parsedCert.NotAfter.Equal(cert.Expires) {
problems = append(problems, "Stored expiration doesn't match certificate NotAfter")
}
// Check basic constraints are set
if !parsedCert.BasicConstraintsValid {
problems = append(problems, "Certificate doesn't have basic constraints set")
}
// Check the cert isn't able to sign other certificates
if parsedCert.IsCA {
problems = append(problems, "Certificate can sign other certificates")
}
// Check the cert has the correct validity period
validityPeriod := parsedCert.NotAfter.Sub(parsedCert.NotBefore)
if validityPeriod > expectedValidityPeriod {
problems = append(problems, fmt.Sprintf("Certificate has a validity period longer than %s", expectedValidityPeriod))
} else if validityPeriod < expectedValidityPeriod {
problems = append(problems, fmt.Sprintf("Certificate has a validity period shorter than %s", expectedValidityPeriod))
}
// Check the stored issuance time isn't too far back/forward dated
if parsedCert.NotBefore.Before(cert.Issued.Add(-6*time.Hour)) || parsedCert.NotBefore.After(cert.Issued.Add(6*time.Hour)) {
problems = append(problems, "Stored issuance date is outside of 6 hour window of certificate NotBefore")
}
// Check CommonName is <= 64 characters
if len(parsedCert.Subject.CommonName) > 64 {
problems = append(
problems,
fmt.Sprintf("Certificate has common name >64 characters long (%d)", len(parsedCert.Subject.CommonName)),
)
}
// Check that the PA is still willing to issue for each name in DNSNames + CommonName
for _, name := range append(parsedCert.DNSNames, parsedCert.Subject.CommonName) {
id := identifier.ACMEIdentifier{Type: identifier.DNS, Value: name}
// TODO(https://github.com/letsencrypt/boulder/issues/3371): Distinguish
// between certificates issued by v1 and v2 API.
if err = c.pa.WillingToIssueWildcards([]identifier.ACMEIdentifier{id}); err != nil {
problems = append(problems, fmt.Sprintf("Policy Authority isn't willing to issue for '%s': %s", name, err))
} else {
// For defense-in-depth, even if the PA was willing to issue for a name
// we double check it against a list of forbidden domains. This way even
// if the hostnamePolicyFile malfunctions we will flag the forbidden
// domain matches
if forbidden, pattern := isForbiddenDomain(name); forbidden {
problems = append(problems, fmt.Sprintf(
"Policy Authority was willing to issue but domain '%s' matches "+
"forbiddenDomains entry %q", name, pattern))
}
}
}
// Check the cert has the correct key usage extensions
if !reflect.DeepEqual(parsedCert.ExtKeyUsage, []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth, x509.ExtKeyUsageClientAuth}) {
problems = append(problems, "Certificate has incorrect key usage extensions")
}
for _, ext := range parsedCert.Extensions {
if _, ok := allowedExtensions[ext.Id.String()]; !ok {
problems = append(problems, fmt.Sprintf("Certificate contains an unexpected extension: %s", ext.Id))
}
if expectedContent, ok := expectedExtensionContent[ext.Id.String()]; ok {
if !bytes.Equal(ext.Value, expectedContent) {
problems = append(problems, fmt.Sprintf("Certificate extension %s contains unexpected content: has %x, expected %x", ext.Id, ext.Value, expectedContent))
}
}
}
}
return problems
}
type config struct {
CertChecker struct {
cmd.DBConfig
cmd.HostnamePolicyConfig
Workers int
ReportDirectoryPath string
UnexpiredOnly bool
BadResultsOnly bool
CheckPeriod cmd.ConfigDuration
// IgnoredLints is a list of zlint names. Any lint results from a lint in
// the IgnoredLists list are ignored regardless of LintStatus level.
IgnoredLints []string
Features map[string]bool
}
PA cmd.PAConfig
Syslog cmd.SyslogConfig
}
func main() {
configFile := flag.String("config", "", "File path to the configuration file for this service")
workers := flag.Int("workers", runtime.NumCPU(), "The number of concurrent workers used to process certificates")
badResultsOnly := flag.Bool("bad-results-only", false, "Only collect and display bad results")
connect := flag.String("db-connect", "", "SQL URI if not provided in the configuration file")
cp := flag.Duration("check-period", time.Hour*2160, "How far back to check")
unexpiredOnly := flag.Bool("unexpired-only", false, "Only check currently unexpired certificates")
flag.Parse()
if *configFile == "" {
flag.Usage()
os.Exit(1)
}
var config config
err := cmd.ReadConfigFile(*configFile, &config)
cmd.FailOnError(err, "Reading JSON config file into config structure")
err = features.Set(config.CertChecker.Features)
cmd.FailOnError(err, "Failed to set feature flags")
syslogger, err := syslog.Dial("", "", syslog.LOG_INFO|syslog.LOG_LOCAL0, "")
cmd.FailOnError(err, "Failed to dial syslog")
logger, err := blog.New(syslogger, 0, 0)
cmd.FailOnError(err, "Failed to construct logger")
err = blog.Set(logger)
cmd.FailOnError(err, "Failed to set audit logger")
if *connect != "" {
config.CertChecker.DBConnect = *connect
}
if *workers != 0 {
config.CertChecker.Workers = *workers
}
config.CertChecker.UnexpiredOnly = *unexpiredOnly
config.CertChecker.BadResultsOnly = *badResultsOnly
config.CertChecker.CheckPeriod.Duration = *cp
// Validate PA config and set defaults if needed
cmd.FailOnError(config.PA.CheckChallenges(), "Invalid PA configuration")
saDbURL, err := config.CertChecker.DBConfig.URL()
cmd.FailOnError(err, "Couldn't load DB URL")
dbSettings := sa.DbSettings{
MaxOpenConns: config.CertChecker.DBConfig.GetMaxOpenConns(),
MaxIdleConns: config.CertChecker.DBConfig.MaxIdleConns,
ConnMaxLifetime: config.CertChecker.DBConfig.ConnMaxLifetime.Duration,
ConnMaxIdleTime: config.CertChecker.DBConfig.ConnMaxIdleTime.Duration,
}
saDbMap, err := sa.NewDbMap(saDbURL, dbSettings)
cmd.FailOnError(err, "Could not connect to database")
sa.InitDBMetrics(saDbMap, prometheus.DefaultRegisterer, dbSettings)
checkerLatency := prometheus.NewHistogram(prometheus.HistogramOpts{
Name: "cert_checker_latency",
Help: "Histogram of latencies a cert-checker worker takes to complete a batch",
})
prometheus.DefaultRegisterer.MustRegister(checkerLatency)
pa, err := policy.New(config.PA.Challenges)
cmd.FailOnError(err, "Failed to create PA")
err = pa.SetHostnamePolicyFile(config.CertChecker.HostnamePolicyFile)
cmd.FailOnError(err, "Failed to load HostnamePolicyFile")
checker := newChecker(
saDbMap,
cmd.Clock(),
pa,
config.CertChecker.CheckPeriod.Duration,
)
fmt.Fprintf(os.Stderr, "# Getting certificates issued in the last %s\n", config.CertChecker.CheckPeriod)
ignoredLintsMap := make(map[string]bool)
for _, name := range config.CertChecker.IgnoredLints {
ignoredLintsMap[name] = true
}
// Since we grab certificates in batches we don't want this to block, when it
// is finished it will close the certificate channel which allows the range
// loops in checker.processCerts to break
go func() {
err := checker.getCerts(config.CertChecker.UnexpiredOnly)
cmd.FailOnError(err, "Batch retrieval of certificates failed")
}()
fmt.Fprintf(os.Stderr, "# Processing certificates using %d workers\n", config.CertChecker.Workers)
wg := new(sync.WaitGroup)
for i := 0; i < config.CertChecker.Workers; i++ {
wg.Add(1)
go func() {
s := checker.clock.Now()
checker.processCerts(wg, config.CertChecker.BadResultsOnly, ignoredLintsMap)
checkerLatency.Observe(checker.clock.Since(s).Seconds())
}()
}
wg.Wait()
fmt.Fprintf(
os.Stderr,
"# Finished processing certificates, sample: %d, good: %d, bad: %d\n",
len(checker.issuedReport.Entries),
checker.issuedReport.GoodCerts,
checker.issuedReport.BadCerts,
)
err = checker.issuedReport.dump()
cmd.FailOnError(err, "Failed to dump results: %s\n")
}