boulder/cmd/expiration-mailer/main.go

953 lines
29 KiB
Go

package notmain
import (
"bytes"
"context"
"crypto/x509"
"encoding/json"
"errors"
"flag"
"fmt"
"math"
netmail "net/mail"
"net/url"
"os"
"sort"
"strings"
"sync"
"text/template"
"time"
"github.com/honeycombio/beeline-go"
"github.com/jmhodges/clock"
"google.golang.org/grpc"
"github.com/letsencrypt/boulder/cmd"
"github.com/letsencrypt/boulder/core"
corepb "github.com/letsencrypt/boulder/core/proto"
"github.com/letsencrypt/boulder/db"
"github.com/letsencrypt/boulder/features"
bgrpc "github.com/letsencrypt/boulder/grpc"
blog "github.com/letsencrypt/boulder/log"
bmail "github.com/letsencrypt/boulder/mail"
"github.com/letsencrypt/boulder/metrics"
"github.com/letsencrypt/boulder/sa"
sapb "github.com/letsencrypt/boulder/sa/proto"
"github.com/prometheus/client_golang/prometheus"
)
const (
defaultExpirationSubject = "Let's Encrypt certificate expiration notice for domain {{.ExpirationSubject}}"
)
type regStore interface {
GetRegistration(ctx context.Context, req *sapb.RegistrationID, _ ...grpc.CallOption) (*corepb.Registration, error)
}
// limiter tracks how many mails we've sent to a given address in a given day.
// Note that this does not track mails across restarts of the process.
// Modifications to `counts` and `currentDay` are protected by a mutex.
type limiter struct {
sync.RWMutex
// currentDay is a day in UTC, truncated to 24 hours. When the current
// time is more than 24 hours past this date, all counts reset and this
// date is updated.
currentDay time.Time
// counts is a map from address to number of mails we have attempted to
// send during `currentDay`.
counts map[string]int
// limit is the number of sends after which we'll return an error from
// check()
limit int
clk clock.Clock
}
const oneDay = 24 * time.Hour
// maybeBumpDay updates lim.currentDay if its current value is more than 24
// hours ago, and resets the counts map. Expects limiter is locked.
func (lim *limiter) maybeBumpDay() {
today := lim.clk.Now().Truncate(oneDay)
if (today.Sub(lim.currentDay) >= oneDay && len(lim.counts) > 0) ||
lim.counts == nil {
// Throw away counts so far and switch to a new day.
// This also does the initialization of counts and currentDay the first
// time inc() is called.
lim.counts = make(map[string]int)
lim.currentDay = today
}
}
// inc increments the count for the current day, and cleans up previous days
// if needed.
func (lim *limiter) inc(address string) {
lim.Lock()
defer lim.Unlock()
lim.maybeBumpDay()
lim.counts[address] += 1
}
// check checks whether the count for the given address is at the limit,
// and returns an error if so.
func (lim *limiter) check(address string) error {
lim.RLock()
defer lim.RUnlock()
lim.maybeBumpDay()
if lim.counts[address] >= lim.limit {
return fmt.Errorf("daily mail limit exceeded for %q", address)
}
return nil
}
type mailer struct {
log blog.Logger
dbMap *db.WrappedMap
rs regStore
mailer bmail.Mailer
emailTemplate *template.Template
subjectTemplate *template.Template
nagTimes []time.Duration
parallelSends uint
certificatesPerTick int
// addressLimiter limits how many mails we'll send to a single address in
// a single day.
addressLimiter *limiter
// Maximum number of rows to update in a single SQL UPDATE statement.
updateChunkSize int
clk clock.Clock
stats mailerStats
}
type certDERWithRegID struct {
DER core.CertDER
RegID int64
}
type mailerStats struct {
sendDelay *prometheus.GaugeVec
sendDelayHistogram *prometheus.HistogramVec
nagsAtCapacity *prometheus.GaugeVec
errorCount *prometheus.CounterVec
sendLatency prometheus.Histogram
processingLatency prometheus.Histogram
certificatesExamined prometheus.Counter
certificatesAlreadyRenewed prometheus.Counter
certificatesPerAccountNeedingMail prometheus.Histogram
}
func (m *mailer) sendNags(conn bmail.Conn, contacts []string, certs []*x509.Certificate) error {
if len(certs) == 0 {
return errors.New("no certs given to send nags for")
}
emails := []string{}
for _, contact := range contacts {
parsed, err := url.Parse(contact)
if err != nil {
m.log.Errf("parsing contact email %s: %s", contact, err)
continue
}
if parsed.Scheme != "mailto" {
continue
}
address := parsed.Opaque
err = m.addressLimiter.check(address)
if err != nil {
m.log.Infof("not sending mail: %s", err)
continue
}
m.addressLimiter.inc(address)
emails = append(emails, parsed.Opaque)
}
if len(emails) == 0 {
return nil
}
expiresIn := time.Duration(math.MaxInt64)
expDate := m.clk.Now()
domains := []string{}
serials := []string{}
// Pick out the expiration date that is closest to being hit.
for _, cert := range certs {
domains = append(domains, cert.DNSNames...)
serials = append(serials, core.SerialToString(cert.SerialNumber))
possible := cert.NotAfter.Sub(m.clk.Now())
if possible < expiresIn {
expiresIn = possible
expDate = cert.NotAfter
}
}
domains = core.UniqueLowerNames(domains)
sort.Strings(domains)
const maxSerials = 100
truncatedSerials := serials
if len(truncatedSerials) > maxSerials {
truncatedSerials = serials[0:maxSerials]
}
const maxDomains = 100
truncatedDomains := domains
if len(truncatedDomains) > maxDomains {
truncatedDomains = domains[0:maxDomains]
}
// Construct the information about the expiring certificates for use in the
// subject template
expiringSubject := fmt.Sprintf("%q", domains[0])
if len(domains) > 1 {
expiringSubject += fmt.Sprintf(" (and %d more)", len(domains)-1)
}
// Execute the subjectTemplate by filling in the ExpirationSubject
subjBuf := new(bytes.Buffer)
err := m.subjectTemplate.Execute(subjBuf, struct {
ExpirationSubject string
}{
ExpirationSubject: expiringSubject,
})
if err != nil {
m.stats.errorCount.With(prometheus.Labels{"type": "SubjectTemplateFailure"}).Inc()
return err
}
email := struct {
ExpirationDate string
DaysToExpiration int
DNSNames string
TruncatedDNSNames string
NumDNSNamesOmitted int
}{
ExpirationDate: expDate.UTC().Format(time.DateOnly),
DaysToExpiration: int(expiresIn.Hours() / 24),
DNSNames: strings.Join(domains, "\n"),
TruncatedDNSNames: strings.Join(truncatedDomains, "\n"),
NumDNSNamesOmitted: len(domains) - len(truncatedDomains),
}
msgBuf := new(bytes.Buffer)
err = m.emailTemplate.Execute(msgBuf, email)
if err != nil {
m.stats.errorCount.With(prometheus.Labels{"type": "TemplateFailure"}).Inc()
return err
}
logItem := struct {
Rcpt []string
DaysToExpiration int
TruncatedDNSNames []string
TruncatedSerials []string
}{
Rcpt: emails,
DaysToExpiration: email.DaysToExpiration,
TruncatedDNSNames: truncatedDomains,
TruncatedSerials: truncatedSerials,
}
logStr, err := json.Marshal(logItem)
if err != nil {
m.log.Errf("logItem could not be serialized to JSON. Raw: %+v", logItem)
return err
}
m.log.Infof("attempting send JSON=%s", string(logStr))
startSending := m.clk.Now()
err = conn.SendMail(emails, subjBuf.String(), msgBuf.String())
if err != nil {
m.log.Errf("failed send JSON=%s err=%s", string(logStr), err)
return err
}
finishSending := m.clk.Now()
elapsed := finishSending.Sub(startSending)
m.stats.sendLatency.Observe(elapsed.Seconds())
return nil
}
// updateLastNagTimestamps updates the lastExpirationNagSent column for every cert in
// the given list. Even though it can encounter errors, it only logs them and
// does not return them, because we always prefer to simply continue.
func (m *mailer) updateLastNagTimestamps(ctx context.Context, certs []*x509.Certificate) {
for len(certs) > 0 {
size := len(certs)
if m.updateChunkSize > 0 && size > m.updateChunkSize {
size = m.updateChunkSize
}
chunk := certs[0:size]
certs = certs[size:]
m.updateLastNagTimestampsChunk(ctx, chunk)
}
}
// updateLastNagTimestampsChunk processes a single chunk (up to 65k) of certificates.
func (m *mailer) updateLastNagTimestampsChunk(ctx context.Context, certs []*x509.Certificate) {
params := make([]interface{}, len(certs)+1)
for i, cert := range certs {
params[i+1] = core.SerialToString(cert.SerialNumber)
}
query := fmt.Sprintf(
"UPDATE certificateStatus SET lastExpirationNagSent = ? WHERE serial IN (%s)",
db.QuestionMarks(len(certs)),
)
params[0] = m.clk.Now()
_, err := m.dbMap.WithContext(ctx).Exec(query, params...)
if err != nil {
m.log.AuditErrf("Error updating certificate status for %d certs: %s", len(certs), err)
m.stats.errorCount.With(prometheus.Labels{"type": "UpdateCertificateStatus"}).Inc()
}
}
func (m *mailer) certIsRenewed(ctx context.Context, names []string, issued time.Time) (bool, error) {
namehash := sa.HashNames(names)
var present bool
err := m.dbMap.WithContext(ctx).SelectOne(
&present,
`SELECT EXISTS (SELECT id FROM fqdnSets WHERE setHash = ? AND issued > ? LIMIT 1)`,
namehash,
issued,
)
return present, err
}
type work struct {
regID int64
certDERs []core.CertDER
}
func (m *mailer) processCerts(
ctx context.Context,
allCerts []certDERWithRegID,
expiresIn time.Duration,
) error {
regIDToCertDERs := make(map[int64][]core.CertDER)
for _, cert := range allCerts {
cs := regIDToCertDERs[cert.RegID]
cs = append(cs, cert.DER)
regIDToCertDERs[cert.RegID] = cs
}
parallelSends := m.parallelSends
if parallelSends == 0 {
parallelSends = 1
}
var wg sync.WaitGroup
workChan := make(chan work, len(regIDToCertDERs))
// Populate the work chan on a goroutine so work is available as soon
// as one of the sender routines starts.
go func(ch chan<- work) {
for regID, certs := range regIDToCertDERs {
ch <- work{regID, certs}
}
close(workChan)
}(workChan)
for senderNum := uint(0); senderNum < parallelSends; senderNum++ {
// For politeness' sake, don't open more than 1 new connection per
// second.
if senderNum > 0 {
time.Sleep(time.Second)
}
if ctx.Err() != nil {
return ctx.Err()
}
conn, err := m.mailer.Connect()
if err != nil {
m.log.AuditErrf("connecting parallel sender %d: %s", senderNum, err)
return err
}
wg.Add(1)
go func(conn bmail.Conn, ch <-chan work) {
defer wg.Done()
for w := range ch {
err := m.sendToOneRegID(ctx, conn, w.regID, w.certDERs, expiresIn)
if err != nil {
m.log.AuditErr(err.Error())
}
}
conn.Close()
}(conn, workChan)
}
wg.Wait()
return nil
}
func (m *mailer) sendToOneRegID(ctx context.Context, conn bmail.Conn, regID int64, certDERs []core.CertDER, expiresIn time.Duration) error {
if ctx.Err() != nil {
return ctx.Err()
}
if len(certDERs) == 0 {
return errors.New("shouldn't happen: empty certificate list in sendToOneRegID")
}
reg, err := m.rs.GetRegistration(ctx, &sapb.RegistrationID{Id: regID})
if err != nil {
m.stats.errorCount.With(prometheus.Labels{"type": "GetRegistration"}).Inc()
return fmt.Errorf("Error fetching registration %d: %s", regID, err)
}
parsedCerts := []*x509.Certificate{}
for i, certDER := range certDERs {
if ctx.Err() != nil {
return ctx.Err()
}
parsedCert, err := x509.ParseCertificate(certDER)
if err != nil {
// TODO(#1420): tell registration about this error
m.log.AuditErrf("Error parsing certificate: %s. Body: %x", err, certDER)
m.stats.errorCount.With(prometheus.Labels{"type": "ParseCertificate"}).Inc()
continue
}
// The histogram version of send delay reports the worst case send delay for
// a single regID in this cycle.
if i == 0 {
sendDelay := expiresIn - parsedCert.NotAfter.Sub(m.clk.Now())
m.stats.sendDelayHistogram.With(prometheus.Labels{"nag_group": expiresIn.String()}).Observe(
sendDelay.Truncate(time.Second).Seconds())
}
renewed, err := m.certIsRenewed(ctx, parsedCert.DNSNames, parsedCert.NotBefore)
if err != nil {
m.log.AuditErrf("expiration-mailer: error fetching renewal state: %v", err)
// assume not renewed
} else if renewed {
m.log.Debugf("Cert %s is already renewed", core.SerialToString(parsedCert.SerialNumber))
m.stats.certificatesAlreadyRenewed.Add(1)
m.updateLastNagTimestamps(ctx, []*x509.Certificate{parsedCert})
continue
}
parsedCerts = append(parsedCerts, parsedCert)
}
m.stats.certificatesPerAccountNeedingMail.Observe(float64(len(parsedCerts)))
if len(parsedCerts) == 0 {
// all certificates are renewed
return nil
}
err = m.sendNags(conn, reg.Contact, parsedCerts)
if err != nil {
// Check to see if the error was due to the mail being undeliverable,
// in which case we don't want to try again later.
var badAddrErr *bmail.BadAddressSMTPError
if ok := errors.As(err, &badAddrErr); ok {
m.updateLastNagTimestamps(ctx, parsedCerts)
}
m.stats.errorCount.With(prometheus.Labels{"type": "SendNags"}).Inc()
return fmt.Errorf("sending nag emails: %s", err)
}
m.updateLastNagTimestamps(ctx, parsedCerts)
return nil
}
// findExpiringCertificates finds certificates that might need an expiration mail, filters them,
// groups by account, sends mail, and updates their status in the DB so we don't examine them again.
//
// Invariant: findExpiringCertificates should examine each certificate at most N times, where
// N is the number of reminders. For every certificate examined (barring errors), this function
// should update the lastExpirationNagSent field of certificateStatus, so it does not need to
// examine the same certificate again on the next go-round. This ensures we make forward progress
// and don't clog up the window of certificates to be examined.
func (m *mailer) findExpiringCertificates(ctx context.Context) error {
now := m.clk.Now()
// E.g. m.nagTimes = [2, 4, 8, 15] days from expiration
for i, expiresIn := range m.nagTimes {
left := now
if i > 0 {
left = left.Add(m.nagTimes[i-1])
}
right := now.Add(expiresIn)
m.log.Infof("expiration-mailer: Searching for certificates that expire between %s and %s and had last nag >%s before expiry",
left.UTC(), right.UTC(), expiresIn)
var certs []certDERWithRegID
var err error
if features.Enabled(features.ExpirationMailerUsesJoin) {
certs, err = m.getCertsWithJoin(ctx, left, right, expiresIn)
} else {
certs, err = m.getCerts(ctx, left, right, expiresIn)
}
if err != nil {
return err
}
m.stats.certificatesExamined.Add(float64(len(certs)))
// If the number of rows was exactly `m.certificatesPerTick` rows we need to increment
// a stat indicating that this nag group is at capacity. If this condition
// continually occurs across mailer runs then we will not catch up,
// resulting in under-sending expiration mails. The effects of this
// were initially described in issue #2002[0].
//
// 0: https://github.com/letsencrypt/boulder/issues/2002
atCapacity := float64(0)
if len(certs) == m.certificatesPerTick {
m.log.Infof("nag group %s expiring certificates at configured capacity (select limit %d)",
expiresIn.String(), m.certificatesPerTick)
atCapacity = float64(1)
}
m.stats.nagsAtCapacity.With(prometheus.Labels{"nag_group": expiresIn.String()}).Set(atCapacity)
m.log.Infof("Found %d certificates expiring between %s and %s", len(certs),
left.Format(time.DateTime), right.Format(time.DateTime))
if len(certs) == 0 {
continue // nothing to do
}
processingStarted := m.clk.Now()
err = m.processCerts(ctx, certs, expiresIn)
if err != nil {
m.log.AuditErr(err.Error())
}
processingEnded := m.clk.Now()
elapsed := processingEnded.Sub(processingStarted)
m.stats.processingLatency.Observe(elapsed.Seconds())
}
return nil
}
func (m *mailer) getCertsWithJoin(ctx context.Context, left, right time.Time, expiresIn time.Duration) ([]certDERWithRegID, error) {
// First we do a query on the certificateStatus table to find certificates
// nearing expiry meeting our criteria for email notification. We later
// sequentially fetch the certificate details. This avoids an expensive
// JOIN.
var certs []certDERWithRegID
_, err := m.dbMap.WithContext(ctx).Select(
&certs,
`SELECT
cert.der as der, cert.registrationID as regID
FROM certificateStatus AS cs
JOIN certificates as cert
ON cs.serial = cert.serial
AND cs.notAfter > :cutoffA
AND cs.notAfter <= :cutoffB
AND cs.status != "revoked"
AND COALESCE(TIMESTAMPDIFF(SECOND, cs.lastExpirationNagSent, cs.notAfter) > :nagCutoff, 1)
ORDER BY cs.notAfter ASC
LIMIT :certificatesPerTick`,
map[string]interface{}{
"cutoffA": left,
"cutoffB": right,
"nagCutoff": expiresIn.Seconds(),
"certificatesPerTick": m.certificatesPerTick,
},
)
if err != nil {
m.log.AuditErrf("expiration-mailer: Error loading certificate serials: %s", err)
return nil, err
}
m.log.Debugf("found %d certificates", len(certs))
return certs, nil
}
func (m *mailer) getCerts(ctx context.Context, left, right time.Time, expiresIn time.Duration) ([]certDERWithRegID, error) {
// First we do a query on the certificateStatus table to find certificates
// nearing expiry meeting our criteria for email notification. We later
// sequentially fetch the certificate details. This avoids an expensive
// JOIN.
var serials []string
_, err := m.dbMap.WithContext(ctx).Select(
&serials,
`SELECT
cs.serial
FROM certificateStatus AS cs
WHERE cs.notAfter > :cutoffA
AND cs.notAfter <= :cutoffB
AND cs.status != "revoked"
AND COALESCE(TIMESTAMPDIFF(SECOND, cs.lastExpirationNagSent, cs.notAfter) > :nagCutoff, 1)
ORDER BY cs.notAfter ASC
LIMIT :certificatesPerTick`,
map[string]interface{}{
"cutoffA": left,
"cutoffB": right,
"nagCutoff": expiresIn.Seconds(),
"certificatesPerTick": m.certificatesPerTick,
},
)
if err != nil {
m.log.AuditErrf("expiration-mailer: Error loading certificate serials: %s", err)
return nil, err
}
m.log.Debugf("found %d certificates", len(serials))
// Now we can sequentially retrieve the certificate details for each of the
// certificate status rows
var certs []certDERWithRegID
for i, serial := range serials {
if ctx.Err() != nil {
return nil, ctx.Err()
}
var cert core.Certificate
cert, err := sa.SelectCertificate(m.dbMap.WithContext(ctx), serial)
if err != nil {
// We can get a NoRowsErr when processing a serial number corresponding
// to a precertificate with no final certificate. Since this certificate
// is not being used by a subscriber, we don't send expiration email about
// it.
if db.IsNoRows(err) {
m.log.Infof("no rows for serial %q", serial)
continue
}
m.log.AuditErrf("expiration-mailer: Error loading cert %q: %s", cert.Serial, err)
continue
}
certs = append(certs, certDERWithRegID{
DER: cert.DER,
RegID: cert.RegistrationID,
})
if i == 0 {
// Report the send delay metric. Note: this is the worst-case send delay
// of any certificate in this batch because it's based on the first (oldest).
sendDelay := expiresIn - cert.Expires.Sub(m.clk.Now())
m.stats.sendDelay.With(prometheus.Labels{"nag_group": expiresIn.String()}).Set(
sendDelay.Truncate(time.Second).Seconds())
}
}
return certs, nil
}
type durationSlice []time.Duration
func (ds durationSlice) Len() int {
return len(ds)
}
func (ds durationSlice) Less(a, b int) bool {
return ds[a] < ds[b]
}
func (ds durationSlice) Swap(a, b int) {
ds[a], ds[b] = ds[b], ds[a]
}
type Config struct {
Mailer struct {
cmd.ServiceConfig
DB cmd.DBConfig
cmd.SMTPConfig
// From is the "From" address for reminder messages.
From string
// Subject is the Subject line of reminder messages.
// This is a Go template with a single variable: ExpirationSubject,
// which contains a list of affectd hostnames, possible truncated.
Subject string
// CertLimit is the maximum number of certificates to investigate in a
// single batch.
CertLimit int
// MailsPerAddressPerDay is the maximum number of emails we'll send to
// a single address in a single day. Defaults to 0 (unlimited).
// Note that this does not track sends across restarts of the process,
// so we may send more than this when we restart expiration-mailer.
// This is a best-effort limitation.
MailsPerAddressPerDay int
// UpdateChunkSize is the maximum number of rows to update in a single
// SQL UPDATE statement.
UpdateChunkSize int
NagTimes []string
// TODO(#6097): Remove this
NagCheckInterval string
// Path to a text/template email template
EmailTemplate string
// How often to process a batch of certificates
Frequency cmd.ConfigDuration
// How many parallel goroutines should process each batch of emails
ParallelSends uint
TLS cmd.TLSConfig
SAService *cmd.GRPCClientConfig
// Path to a file containing a list of trusted root certificates for use
// during the SMTP connection (as opposed to the gRPC connections).
SMTPTrustedRootFile string
Features map[string]bool
}
Syslog cmd.SyslogConfig
Beeline cmd.BeelineConfig
}
func initStats(stats prometheus.Registerer) mailerStats {
sendDelay := prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "send_delay",
Help: "For the last batch of certificates, difference between the idealized send time and actual send time. Will always be nonzero, bigger numbers are worse",
},
[]string{"nag_group"})
stats.MustRegister(sendDelay)
sendDelayHistogram := prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "send_delay_histogram",
Help: "For each mail sent, difference between the idealized send time and actual send time. Will always be nonzero, bigger numbers are worse",
Buckets: prometheus.LinearBuckets(86400, 86400, 10),
},
[]string{"nag_group"})
stats.MustRegister(sendDelayHistogram)
nagsAtCapacity := prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "nags_at_capacity",
Help: "Count of nag groups at capcacity",
},
[]string{"nag_group"})
stats.MustRegister(nagsAtCapacity)
errorCount := prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "errors",
Help: "Number of errors",
},
[]string{"type"})
stats.MustRegister(errorCount)
sendLatency := prometheus.NewHistogram(
prometheus.HistogramOpts{
Name: "send_latency",
Help: "Time the mailer takes sending messages in seconds",
Buckets: metrics.InternetFacingBuckets,
})
stats.MustRegister(sendLatency)
processingLatency := prometheus.NewHistogram(
prometheus.HistogramOpts{
Name: "processing_latency",
Help: "Time the mailer takes processing certificates in seconds",
Buckets: []float64{30, 60, 75, 90, 120, 600, 3600},
})
stats.MustRegister(processingLatency)
certificatesExamined := prometheus.NewCounter(
prometheus.CounterOpts{
Name: "certificates_examined",
Help: "Number of certificates looked at that are potentially due for an expiration mail",
})
stats.MustRegister(certificatesExamined)
certificatesAlreadyRenewed := prometheus.NewCounter(
prometheus.CounterOpts{
Name: "certificates_already_renewed",
Help: "Number of certificates from certificates_examined that were ignored because they were already renewed",
})
stats.MustRegister(certificatesAlreadyRenewed)
accountsNeedingMail := prometheus.NewHistogram(
prometheus.HistogramOpts{
Name: "certificates_per_account_needing_mail",
Help: "After ignoring certificates_already_renewed and grouping the remaining certificates by account, how many accounts needed to get an email; grouped by how many certificates each account needed",
Buckets: []float64{0, 1, 2, 100, 1000, 10000, 100000},
})
stats.MustRegister(accountsNeedingMail)
return mailerStats{
sendDelay: sendDelay,
sendDelayHistogram: sendDelayHistogram,
nagsAtCapacity: nagsAtCapacity,
errorCount: errorCount,
sendLatency: sendLatency,
processingLatency: processingLatency,
certificatesExamined: certificatesExamined,
certificatesAlreadyRenewed: certificatesAlreadyRenewed,
certificatesPerAccountNeedingMail: accountsNeedingMail,
}
}
func main() {
configFile := flag.String("config", "", "File path to the configuration file for this service")
certLimit := flag.Int("cert_limit", 0, "Count of certificates to process per expiration period")
reconnBase := flag.Duration("reconnectBase", 1*time.Second, "Base sleep duration between reconnect attempts")
reconnMax := flag.Duration("reconnectMax", 5*60*time.Second, "Max sleep duration between reconnect attempts after exponential backoff")
daemon := flag.Bool("daemon", false, "Run in daemon mode")
flag.Parse()
if *configFile == "" {
flag.Usage()
os.Exit(1)
}
var c Config
err := cmd.ReadConfigFile(*configFile, &c)
cmd.FailOnError(err, "Reading JSON config file into config structure")
err = features.Set(c.Mailer.Features)
cmd.FailOnError(err, "Failed to set feature flags")
bc, err := c.Beeline.Load()
cmd.FailOnError(err, "Failed to load Beeline config")
beeline.Init(bc)
defer beeline.Close()
scope, logger := cmd.StatsAndLogging(c.Syslog, c.Mailer.DebugAddr)
defer logger.AuditPanic()
logger.Info(cmd.VersionString())
if *certLimit > 0 {
c.Mailer.CertLimit = *certLimit
}
// Default to 100 if no certLimit is set
if c.Mailer.CertLimit == 0 {
c.Mailer.CertLimit = 100
}
if c.Mailer.MailsPerAddressPerDay == 0 {
c.Mailer.MailsPerAddressPerDay = math.MaxInt
}
dbMap, err := sa.InitWrappedDb(c.Mailer.DB, scope, logger)
cmd.FailOnError(err, "While initializing dbMap")
tlsConfig, err := c.Mailer.TLS.Load()
cmd.FailOnError(err, "TLS config")
clk := cmd.Clock()
conn, err := bgrpc.ClientSetup(c.Mailer.SAService, tlsConfig, scope, clk)
cmd.FailOnError(err, "Failed to load credentials and create gRPC connection to SA")
sac := sapb.NewStorageAuthorityClient(conn)
var smtpRoots *x509.CertPool
if c.Mailer.SMTPTrustedRootFile != "" {
pem, err := os.ReadFile(c.Mailer.SMTPTrustedRootFile)
cmd.FailOnError(err, "Loading trusted roots file")
smtpRoots = x509.NewCertPool()
if !smtpRoots.AppendCertsFromPEM(pem) {
cmd.FailOnError(nil, "Failed to parse root certs PEM")
}
}
// Load email template
emailTmpl, err := os.ReadFile(c.Mailer.EmailTemplate)
cmd.FailOnError(err, fmt.Sprintf("Could not read email template file [%s]", c.Mailer.EmailTemplate))
tmpl, err := template.New("expiry-email").Parse(string(emailTmpl))
cmd.FailOnError(err, "Could not parse email template")
// If there is no configured subject template, use a default
if c.Mailer.Subject == "" {
c.Mailer.Subject = defaultExpirationSubject
}
// Load subject template
subjTmpl, err := template.New("expiry-email-subject").Parse(c.Mailer.Subject)
cmd.FailOnError(err, "Could not parse email subject template")
fromAddress, err := netmail.ParseAddress(c.Mailer.From)
cmd.FailOnError(err, fmt.Sprintf("Could not parse from address: %s", c.Mailer.From))
smtpPassword, err := c.Mailer.PasswordConfig.Pass()
cmd.FailOnError(err, "Failed to load SMTP password")
mailClient := bmail.New(
c.Mailer.Server,
c.Mailer.Port,
c.Mailer.Username,
smtpPassword,
smtpRoots,
*fromAddress,
logger,
scope,
*reconnBase,
*reconnMax)
var nags durationSlice
for _, nagDuration := range c.Mailer.NagTimes {
dur, err := time.ParseDuration(nagDuration)
if err != nil {
logger.AuditErrf("Failed to parse nag duration string [%s]: %s", nagDuration, err)
return
}
// Add some padding to the nag times so we send _before_ the configured
// time rather than after. See https://github.com/letsencrypt/boulder/pull/1029
adjustedInterval := dur + c.Mailer.Frequency.Duration
nags = append(nags, adjustedInterval)
}
// Make sure durations are sorted in increasing order
sort.Sort(nags)
if c.Mailer.UpdateChunkSize > 65535 {
// MariaDB limits the number of placeholders parameters to max_uint16:
// https://github.com/MariaDB/server/blob/10.5/sql/sql_prepare.cc#L2629-L2635
cmd.Fail(fmt.Sprintf("UpdateChunkSize of %d is too big", c.Mailer.UpdateChunkSize))
}
m := mailer{
log: logger,
dbMap: dbMap,
rs: sac,
mailer: mailClient,
subjectTemplate: subjTmpl,
emailTemplate: tmpl,
nagTimes: nags,
certificatesPerTick: c.Mailer.CertLimit,
addressLimiter: &limiter{clk: cmd.Clock(), limit: c.Mailer.MailsPerAddressPerDay},
updateChunkSize: c.Mailer.UpdateChunkSize,
parallelSends: c.Mailer.ParallelSends,
clk: clk,
stats: initStats(scope),
}
// Prefill this labelled stat with the possible label values, so each value is
// set to 0 on startup, rather than being missing from stats collection until
// the first mail run.
for _, expiresIn := range nags {
m.stats.nagsAtCapacity.With(prometheus.Labels{"nag_group": expiresIn.String()}).Set(0)
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go cmd.CatchSignals(logger, func() {
cancel()
select {} // wait for the `findExpiringCertificates` calls below to exit
})
if *daemon {
if c.Mailer.Frequency.Duration == 0 {
fmt.Fprintln(os.Stderr, "mailer.Frequency is not set in the JSON config")
os.Exit(1)
}
t := time.NewTicker(c.Mailer.Frequency.Duration)
for {
select {
case <-t.C:
err = m.findExpiringCertificates(ctx)
cmd.FailOnError(err, "expiration-mailer has failed")
case <-ctx.Done():
os.Exit(0)
}
}
} else {
err = m.findExpiringCertificates(ctx)
cmd.FailOnError(err, "expiration-mailer has failed")
}
}
func init() {
cmd.RegisterCommand("expiration-mailer", main)
}