integration: add test for boulder-janitor. (#4364)
This commit is contained in:
parent
98677b83d8
commit
bb005e1c79
|
@ -24,6 +24,7 @@ func newCertificatesJob(
|
|||
db: db,
|
||||
log: log,
|
||||
purgeBefore: purgeBefore,
|
||||
workSleep: config.Janitor.Certificates.WorkSleep.Duration,
|
||||
batchSize: config.Janitor.Certificates.BatchSize,
|
||||
maxDPS: config.Janitor.Certificates.MaxDPS,
|
||||
parallelism: config.Janitor.Certificates.Parallelism,
|
||||
|
|
|
@ -24,6 +24,7 @@ func newCertificatesPerNameJob(
|
|||
db: db,
|
||||
log: log,
|
||||
purgeBefore: purgeBefore,
|
||||
workSleep: config.Janitor.CertificatesPerName.WorkSleep.Duration,
|
||||
batchSize: config.Janitor.CertificatesPerName.BatchSize,
|
||||
maxDPS: config.Janitor.CertificatesPerName.MaxDPS,
|
||||
parallelism: config.Janitor.CertificatesPerName.Parallelism,
|
||||
|
|
|
@ -24,6 +24,7 @@ func newCertificateStatusJob(
|
|||
db: db,
|
||||
log: log,
|
||||
purgeBefore: purgeBefore,
|
||||
workSleep: config.Janitor.CertificateStatus.WorkSleep.Duration,
|
||||
batchSize: config.Janitor.CertificateStatus.BatchSize,
|
||||
maxDPS: config.Janitor.CertificateStatus.MaxDPS,
|
||||
parallelism: config.Janitor.CertificateStatus.Parallelism,
|
||||
|
|
|
@ -13,6 +13,9 @@ type CleanupConfig struct {
|
|||
Enabled bool
|
||||
// GracePeriod controls when a resource is old enough to be cleaned up.
|
||||
GracePeriod cmd.ConfigDuration
|
||||
// WorkSleep controls how long the janitor's work threads sleep between
|
||||
// finding no work and trying again. Defaults to a minute if not provided.
|
||||
WorkSleep cmd.ConfigDuration
|
||||
// BatchSize controls how many rows of the resource will be read from the DB
|
||||
// per-query.
|
||||
BatchSize int64
|
||||
|
|
|
@ -47,7 +47,9 @@ func New(clk clock.Clock, config Config) (*janitor, error) {
|
|||
|
||||
// Setup logging and stats
|
||||
scope, logger := cmd.StatsAndLogging(config.Janitor.Syslog, config.Janitor.DebugAddr)
|
||||
scope.MustRegister(errStat)
|
||||
scope.MustRegister(deletedStat)
|
||||
scope.MustRegister(workStat)
|
||||
defer logger.AuditPanic()
|
||||
logger.Info(cmd.VersionString())
|
||||
|
||||
|
|
|
@ -52,6 +52,9 @@ type batchedDBJob struct {
|
|||
// purgeBefore indicates the cut-off for the the resoruce being cleaned up by
|
||||
// the job. Rows that older than now - purgeBefore are deleted.
|
||||
purgeBefore time.Time
|
||||
// workSleep is a duration that the job will sleep between getWork() calls
|
||||
// when no new work is found. If not provided, defaults to a minute.
|
||||
workSleep time.Duration
|
||||
// batchSize indicates how many database rows of work should be returned per query.
|
||||
batchSize int64
|
||||
// maxDPS optionally indicates a maximum rate of deletes to run per second.
|
||||
|
@ -171,7 +174,11 @@ func (j batchedDBJob) RunForever() {
|
|||
j.log.Debugf(
|
||||
"made no new progress on table %q. Sleeping for a minute",
|
||||
j.table)
|
||||
time.Sleep(time.Minute)
|
||||
if j.workSleep.Seconds() == 0 {
|
||||
time.Sleep(time.Minute)
|
||||
} else {
|
||||
time.Sleep(j.workSleep)
|
||||
}
|
||||
}
|
||||
id = lastID
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"janitor": {
|
||||
"syslog": {
|
||||
"stdoutLevel": 7
|
||||
"stdoutLevel": 6
|
||||
},
|
||||
"dbConnectFile": "test/secrets/janitor_dburl",
|
||||
"maxDBConns": 10,
|
||||
|
@ -9,23 +9,26 @@
|
|||
"certificates": {
|
||||
"enabled": true,
|
||||
"gracePeriod": "1h",
|
||||
"batchSize": 10,
|
||||
"batchSize": 100,
|
||||
"workSleep": "500ms",
|
||||
"parallelism": 2,
|
||||
"maxDPS": 1
|
||||
"maxDPS": 50
|
||||
},
|
||||
"certificateStatus": {
|
||||
"enabled": true,
|
||||
"gracePeriod": "1h",
|
||||
"batchSize": 10,
|
||||
"batchSize": 100,
|
||||
"workSleep": "500ms",
|
||||
"parallelism": 2,
|
||||
"maxDPS": 1
|
||||
"maxDPS": 50
|
||||
},
|
||||
"certificatesPerName": {
|
||||
"enabled": true,
|
||||
"gracePeriod": "1h",
|
||||
"batchSize": 10,
|
||||
"batchSize": 100,
|
||||
"workSleep": "500ms",
|
||||
"parallelism": 2,
|
||||
"maxDPS": 1
|
||||
"maxDPS": 50
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,8 +30,6 @@ from helpers import *
|
|||
|
||||
from acme import challenges
|
||||
|
||||
import requests
|
||||
|
||||
def run_client_tests():
|
||||
root = os.environ.get("CERTBOT_PATH")
|
||||
assert root is not None, (
|
||||
|
@ -83,6 +81,94 @@ def run_expired_authz_purger():
|
|||
expect(now, 0, "authz")
|
||||
expect(after_grace_period, 1, "authz")
|
||||
|
||||
def run_janitor():
|
||||
# Set the fake clock to a year in the future such that all of the database
|
||||
# rows created during the integration tests are older than the grace period.
|
||||
now = datetime.datetime.utcnow()
|
||||
target_time = now+datetime.timedelta(days=+365)
|
||||
|
||||
e = os.environ.copy()
|
||||
e.setdefault("GORACE", "halt_on_error=1")
|
||||
e.setdefault("FAKECLOCK", fakeclock(target_time))
|
||||
|
||||
# Note: Must use exec here so that killing this process kills the command.
|
||||
cmdline = "exec ./bin/boulder-janitor --config test/config-next/janitor.json"
|
||||
p = subprocess.Popen(cmdline, shell=True, env=e)
|
||||
|
||||
# Wait for the janitor to come up
|
||||
waitport(8014, "boulder-janitor", None)
|
||||
|
||||
def statline(statname, table):
|
||||
# NOTE: we omit the trailing "}}" to make this match general enough to
|
||||
# permit new labels in the future.
|
||||
return "janitor_{0}{{table=\"{1}\"".format(statname, table)
|
||||
|
||||
def get_stat_line(port, stat):
|
||||
url = "http://localhost:%d/metrics" % port
|
||||
response = requests.get(url)
|
||||
for l in response.content.split("\n"):
|
||||
if l.strip().startswith(stat):
|
||||
return l
|
||||
return None
|
||||
|
||||
def stat_value(line):
|
||||
parts = line.split(" ")
|
||||
if len(parts) != 2:
|
||||
raise Exception("stat line {0} was missing required parts".format(line))
|
||||
return parts[1]
|
||||
|
||||
# Wait for the janitor to report it isn't finding new work
|
||||
print("waiting for boulder-janitor work to complete...\n")
|
||||
workDone = False
|
||||
for i in range(10):
|
||||
certStatusWorkbatch = get_stat_line(8014, statline("workbatch", "certificateStatus"))
|
||||
certsWorkBatch = get_stat_line(8014, statline("workbatch", "certificates"))
|
||||
certsPerNameWorkBatch = get_stat_line(8014, statline("workbatch", "certificatesPerName"))
|
||||
|
||||
allReady = True
|
||||
for line in [certStatusWorkbatch, certsWorkBatch, certsPerNameWorkBatch]:
|
||||
if stat_value(line) != "0":
|
||||
allReady = False
|
||||
|
||||
if allReady is False:
|
||||
print("not done after check {0}. Sleeping".format(i))
|
||||
time.sleep(2)
|
||||
else:
|
||||
workDone = True
|
||||
break
|
||||
|
||||
if workDone is False:
|
||||
raise Exception("Timed out waiting for janitor to report all work completed\n")
|
||||
|
||||
# Check deletion stats are not empty/zero
|
||||
for i in range(10):
|
||||
certStatusDeletes = get_stat_line(8014, statline("deletions", "certificateStatus"))
|
||||
certsDeletes = get_stat_line(8014, statline("deletions", "certificates"))
|
||||
certsPerNameDeletes = get_stat_line(8014, statline("deletions", "certificatesPerName"))
|
||||
|
||||
if certStatusDeletes is None or certsDeletes is None or certsPerNameDeletes is None:
|
||||
print("delete stats not present after check {0}. Sleeping".format(i))
|
||||
time.sleep(2)
|
||||
continue
|
||||
|
||||
for l in [certStatusDeletes, certsDeletes, certsPerNameDeletes]:
|
||||
if stat_value(l) == "0":
|
||||
raise Exception("Expected a non-zero number of deletes to be performed. Found {0}".format(l))
|
||||
|
||||
# Check that all error stats are empty
|
||||
errorStats = [
|
||||
statline("errors", "certificateStatus"),
|
||||
statline("errors", "certificates"),
|
||||
statline("errors", "certificatesPerName"),
|
||||
]
|
||||
for eStat in errorStats:
|
||||
actual = get_stat_line(8014, eStat)
|
||||
if actual is not None:
|
||||
raise Exception("Expected to find no error stat lines but found {0}\n".format(eStat))
|
||||
|
||||
# Terminate the janitor
|
||||
p.terminate()
|
||||
|
||||
def test_single_ocsp():
|
||||
"""Run the single-ocsp command, which is used to generate OCSP responses for
|
||||
intermediate certificates on a manual basis. Then start up an
|
||||
|
@ -187,6 +273,12 @@ def main():
|
|||
check_balance()
|
||||
if not CONFIG_NEXT:
|
||||
run_expired_authz_purger()
|
||||
|
||||
# Run the boulder-janitor. This should happen after all other tests because
|
||||
# it runs with the fake clock set to the future and deletes rows that may
|
||||
# otherwise be referenced by tests.
|
||||
run_janitor()
|
||||
|
||||
# Run the load-generator last. run_loadtest will stop the
|
||||
# pebble-challtestsrv before running the load-generator and will not restart
|
||||
# it.
|
||||
|
|
Loading…
Reference in New Issue