components-contrib/internal/component/sql/cleanup.go

255 lines
6.5 KiB
Go

/*
Copyright 2023 The Dapr Authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package sql
import (
"context"
"database/sql"
"errors"
"fmt"
"io"
"sync"
"sync/atomic"
"time"
"github.com/jackc/pgx/v5"
"github.com/jackc/pgx/v5/pgconn"
"github.com/dapr/kit/logger"
)
type GarbageCollector interface {
CleanupExpired() error
io.Closer
}
type GCOptions struct {
Logger logger.Logger
// Query that must atomically update the "last cleanup time" in the metadata table, but only if the garbage collector hasn't run already.
// The caller will check the nuber of affected rows. If zero, it assumes that the GC has ran too recently, and will not proceed to delete expired records.
// The query receives one parameter that is the last cleanup interval, in milliseconds.
UpdateLastCleanupQuery string
// Query that performs the cleanup of all expired rows.
DeleteExpiredValuesQuery string
// Interval to perfm the cleanup.
CleanupInterval time.Duration
// Database connection when using pgx.
DBPgx PgxConn
// Database connection when using database/sql.
DBSql DatabaseSQLConn
}
// Interface for connections that use pgx.
type PgxConn interface {
Begin(context.Context) (pgx.Tx, error)
Exec(context.Context, string, ...any) (pgconn.CommandTag, error)
}
// Interface for connections that use database/sql.
type DatabaseSQLConn interface {
BeginTx(context.Context, *sql.TxOptions) (*sql.Tx, error)
ExecContext(ctx context.Context, query string, args ...any) (sql.Result, error)
}
type gc struct {
log logger.Logger
updateLastCleanupQuery string
deleteExpiredValuesQuery string
cleanupInterval time.Duration
dbPgx PgxConn
dbSQL DatabaseSQLConn
closed atomic.Bool
closedCh chan struct{}
wg sync.WaitGroup
}
func ScheduleGarbageCollector(opts GCOptions) (GarbageCollector, error) {
if opts.CleanupInterval <= 0 {
return new(gcNoOp), nil
}
if opts.DBPgx == nil && opts.DBSql == nil {
return nil, errors.New("either DBPgx or DBSql must be provided")
}
if opts.DBPgx != nil && opts.DBSql != nil {
return nil, errors.New("only one of DBPgx or DBSql must be provided")
}
gc := &gc{
log: opts.Logger,
updateLastCleanupQuery: opts.UpdateLastCleanupQuery,
deleteExpiredValuesQuery: opts.DeleteExpiredValuesQuery,
cleanupInterval: opts.CleanupInterval,
dbPgx: opts.DBPgx,
dbSQL: opts.DBSql,
closedCh: make(chan struct{}),
}
gc.wg.Add(1)
go func() {
defer gc.wg.Done()
gc.scheduleCleanup()
}()
return gc, nil
}
func (g *gc) scheduleCleanup() {
g.log.Infof("Schedule expired data clean up every %v", g.cleanupInterval)
ticker := time.NewTicker(g.cleanupInterval)
defer ticker.Stop()
var err error
for {
select {
case <-ticker.C:
err = g.CleanupExpired()
if err != nil {
g.log.Errorf("Error removing expired data: %v", err)
}
case <-g.closedCh:
g.log.Debug("Stopping background cleanup of expired data")
return
}
}
}
// Exposed for testing.
func (g *gc) CleanupExpired() error {
// Deletion can take a long time to complete so we have a long background context. Still catch closing of the GC.
ctx, cancel := context.WithTimeout(context.Background(), time.Minute*10)
defer cancel()
g.wg.Add(1)
go func() {
// Wait for context cancellation or closing
select {
case <-ctx.Done():
case <-g.closedCh:
}
cancel()
g.wg.Done()
}()
// Check if the last iteration was too recent
// This performs an atomic operation, so allows coordination with other daprd processes too
// We do this before beginning the transaction
canContinue, err := g.updateLastCleanup(ctx)
if err != nil {
return fmt.Errorf("failed to read last cleanup time from database: %w", err)
}
if !canContinue {
g.log.Debug("Last cleanup was performed too recently")
return nil
}
var (
tx pgx.Tx
txwc *sql.Tx
)
if g.dbPgx != nil {
tx, err = g.dbPgx.Begin(ctx)
if err != nil {
return fmt.Errorf("failed to start transaction: %w", err)
}
defer tx.Rollback(ctx)
} else {
txwc, err = g.dbSQL.BeginTx(ctx, nil)
if err != nil {
return fmt.Errorf("failed to start transaction: %w", err)
}
defer txwc.Rollback()
}
var rowsAffected int64
if tx != nil {
var res pgconn.CommandTag
res, err = tx.Exec(ctx, g.deleteExpiredValuesQuery)
if err != nil {
return fmt.Errorf("failed to execute query: %w", err)
}
rowsAffected = res.RowsAffected()
} else {
var res sql.Result
res, err = txwc.ExecContext(ctx, g.deleteExpiredValuesQuery)
if err != nil {
return fmt.Errorf("failed to execute query: %w", err)
}
rowsAffected, err = res.RowsAffected()
if err != nil {
return fmt.Errorf("failed to get rows affected: %w", err)
}
}
// Commit
if tx != nil {
err = tx.Commit(ctx)
} else {
err = txwc.Commit()
}
if err != nil {
return fmt.Errorf("failed to commit transaction: %w", err)
}
g.log.Infof("Removed %d expired rows", rowsAffected)
return nil
}
// updateLastCleanup sets the 'last-cleanup' value only if it's less than cleanupInterval.
// Returns true if the row was updated, which means that the cleanup can proceed.
func (g *gc) updateLastCleanup(ctx context.Context) (bool, error) {
var n int64
// Subtract 100ms for some buffer
if g.dbPgx != nil {
res, err := g.dbPgx.Exec(ctx, g.updateLastCleanupQuery, g.cleanupInterval.Milliseconds()-100)
if err != nil {
return false, fmt.Errorf("error updating last cleanup time: %w", err)
}
n = res.RowsAffected()
} else {
res, err := g.dbSQL.ExecContext(ctx, g.updateLastCleanupQuery, g.cleanupInterval.Milliseconds()-100)
if err != nil {
return false, fmt.Errorf("error updating last cleanup time: %w", err)
}
n, err = res.RowsAffected()
if err != nil {
return false, fmt.Errorf("failed to retrieve affected row count: %w", err)
}
}
return n > 0, nil
}
func (g *gc) Close() error {
defer g.wg.Wait()
if g.closed.CompareAndSwap(false, true) {
close(g.closedCh)
}
return nil
}
type gcNoOp struct{}
func (g *gcNoOp) CleanupExpired() error { return nil }
func (g *gcNoOp) Close() error { return nil }