storage/pkg/lockfile/lockfile_unix.go

429 lines
14 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//go:build linux || solaris || darwin || freebsd
// +build linux solaris darwin freebsd
package lockfile
import (
"bytes"
cryptorand "crypto/rand"
"encoding/binary"
"fmt"
"os"
"path/filepath"
"sync"
"sync/atomic"
"time"
"github.com/containers/storage/pkg/system"
"golang.org/x/sys/unix"
)
// *LockFile represents a file lock where the file is used to cache an
// identifier of the last party that made changes to whatever's being protected
// by the lock.
//
// It MUST NOT be created manually. Use GetLockFile or GetROLockFile instead.
type LockFile struct {
// The following fields are only set when constructing *LockFile, and must never be modified afterwards.
// They are safe to access without any other locking.
file string
ro bool
// rwMutex serializes concurrent reader-writer acquisitions in the same process space
rwMutex *sync.RWMutex
// stateMutex is used to synchronize concurrent accesses to the state below
stateMutex *sync.Mutex
counter int64
lw LastWrite // A global value valid as of the last .Touch() or .Modified()
locktype int16
locked bool
// The following fields are only modified on transitions between counter == 0 / counter != 0.
// Thus, they can be safely accessed by users _that currently hold the LockFile_ without locking.
// In other cases, they need to be protected using stateMutex.
fd uintptr
}
// LastWrite is an opaque identifier of the last write to some *LockFile.
// It can be used by users of a *LockFile to determine if the lock indicates changes
// since the last check.
//
// Never construct a LastWrite manually; only accept it from *LockFile methods, and pass it back.
type LastWrite struct {
// Never modify fields of a LastWrite object; it has value semantics.
state []byte // Contents of the lock file.
}
const lastWriterIDSize = 64 // This must be the same as len(stringid.GenerateRandomID)
var lastWriterIDCounter uint64 // Private state for newLastWriterID
// newLastWrite returns a new "last write" ID.
// The value must be different on every call, and also differ from values
// generated by other processes.
func newLastWrite() LastWrite {
// The ID is (PID, time, per-process counter, random)
// PID + time represents both a unique process across reboots,
// and a specific time within the process; the per-process counter
// is an extra safeguard for in-process concurrency.
// The random part disambiguates across process namespaces
// (where PID values might collide), serves as a general-purpose
// extra safety, _and_ is used to pad the output to lastWriterIDSize,
// because other versions of this code exist and they don't work
// efficiently if the size of the value changes.
pid := os.Getpid()
tm := time.Now().UnixNano()
counter := atomic.AddUint64(&lastWriterIDCounter, 1)
res := make([]byte, lastWriterIDSize)
binary.LittleEndian.PutUint64(res[0:8], uint64(tm))
binary.LittleEndian.PutUint64(res[8:16], counter)
binary.LittleEndian.PutUint32(res[16:20], uint32(pid))
if n, err := cryptorand.Read(res[20:lastWriterIDSize]); err != nil || n != lastWriterIDSize-20 {
panic(err) // This shouldn't happen
}
return LastWrite{
state: res,
}
}
// newLastWriteFromData returns a LastWrite corresponding to data that came from a previous LastWrite.serialize
func newLastWriteFromData(serialized []byte) LastWrite {
if serialized == nil {
panic("newLastWriteFromData with nil data")
}
return LastWrite{
state: serialized,
}
}
// serialize returns bytes to write to the lock file to represent the specified write.
func (lw LastWrite) serialize() []byte {
if lw.state == nil {
panic("LastWrite.serialize on an uninitialized object")
}
return lw.state
}
// Equals returns true if lw matches other
func (lw LastWrite) equals(other LastWrite) bool {
if lw.state == nil {
panic("LastWrite.equals on an uninitialized object")
}
if other.state == nil {
panic("LastWrite.equals with an uninitialized counterparty")
}
return bytes.Equal(lw.state, other.state)
}
// openLock opens the file at path and returns the corresponding file
// descriptor. The path is opened either read-only or read-write,
// depending on the value of ro argument.
//
// openLock will create the file and its parent directories,
// if necessary.
func openLock(path string, ro bool) (fd int, err error) {
flags := unix.O_CLOEXEC | os.O_CREATE
if ro {
flags |= os.O_RDONLY
} else {
flags |= os.O_RDWR
}
fd, err = unix.Open(path, flags, 0o644)
if err == nil {
return fd, nil
}
// the directory of the lockfile seems to be removed, try to create it
if os.IsNotExist(err) {
if err := os.MkdirAll(filepath.Dir(path), 0o700); err != nil {
return fd, fmt.Errorf("creating lock file directory: %w", err)
}
return openLock(path, ro)
}
return fd, &os.PathError{Op: "open", Path: path, Err: err}
}
// createLockFileForPath returns new *LockFile object, possibly (depending on the platform)
// working inter-process and associated with the specified path.
//
// This function will be called at most once for each path value within a single process.
//
// If ro, the lock is a read-write lock and the returned *LockFile should correspond to the
// “lock for reading” (shared) operation; otherwise, the lock is either an exclusive lock,
// or a read-write lock and *LockFile should correspond to the “lock for writing” (exclusive) operation.
//
// WARNING:
// - The lock may or MAY NOT be inter-process.
// - There may or MAY NOT be an actual object on the filesystem created for the specified path.
// - Even if ro, the lock MAY be exclusive.
func createLockFileForPath(path string, ro bool) (*LockFile, error) {
// Check if we can open the lock.
fd, err := openLock(path, ro)
if err != nil {
return nil, err
}
unix.Close(fd)
locktype := unix.F_WRLCK
if ro {
locktype = unix.F_RDLCK
}
return &LockFile{
file: path,
ro: ro,
rwMutex: &sync.RWMutex{},
stateMutex: &sync.Mutex{},
lw: newLastWrite(), // For compatibility, the first call of .Modified() will always report a change.
locktype: int16(locktype),
locked: false,
}, nil
}
// lock locks the lockfile via FCTNL(2) based on the specified type and
// command.
func (l *LockFile) lock(lType int16) {
lk := unix.Flock_t{
Type: lType,
Whence: int16(unix.SEEK_SET),
Start: 0,
Len: 0,
}
switch lType {
case unix.F_RDLCK:
l.rwMutex.RLock()
case unix.F_WRLCK:
l.rwMutex.Lock()
default:
panic(fmt.Sprintf("attempted to acquire a file lock of unrecognized type %d", lType))
}
l.stateMutex.Lock()
defer l.stateMutex.Unlock()
if l.counter == 0 {
// If we're the first reference on the lock, we need to open the file again.
fd, err := openLock(l.file, l.ro)
if err != nil {
panic(err)
}
l.fd = uintptr(fd)
// Optimization: only use the (expensive) fcntl syscall when
// the counter is 0. In this case, we're either the first
// reader lock or a writer lock.
for unix.FcntlFlock(l.fd, unix.F_SETLKW, &lk) != nil {
time.Sleep(10 * time.Millisecond)
}
}
l.locktype = lType
l.locked = true
l.counter++
}
// Lock locks the lockfile as a writer. Panic if the lock is a read-only one.
func (l *LockFile) Lock() {
if l.ro {
panic("can't take write lock on read-only lock file")
} else {
l.lock(unix.F_WRLCK)
}
}
// LockRead locks the lockfile as a reader.
func (l *LockFile) RLock() {
l.lock(unix.F_RDLCK)
}
// Unlock unlocks the lockfile.
func (l *LockFile) Unlock() {
l.stateMutex.Lock()
if !l.locked {
// Panic when unlocking an unlocked lock. That's a violation
// of the lock semantics and will reveal such.
panic("calling Unlock on unlocked lock")
}
l.counter--
if l.counter < 0 {
// Panic when the counter is negative. There is no way we can
// recover from a corrupted lock and we need to protect the
// storage from corruption.
panic(fmt.Sprintf("lock %q has been unlocked too often", l.file))
}
if l.counter == 0 {
// We should only release the lock when the counter is 0 to
// avoid releasing read-locks too early; a given process may
// acquire a read lock multiple times.
l.locked = false
// Close the file descriptor on the last unlock, releasing the
// file lock.
unix.Close(int(l.fd))
}
if l.locktype == unix.F_RDLCK {
l.rwMutex.RUnlock()
} else {
l.rwMutex.Unlock()
}
l.stateMutex.Unlock()
}
func (l *LockFile) AssertLocked() {
// DO NOT provide a variant that returns the value of l.locked.
//
// If the caller does not hold the lock, l.locked might nevertheless be true because another goroutine does hold it, and
// we cant tell the difference.
//
// Hence, this “AssertLocked” method, which exists only for sanity checks.
// Dont even bother with l.stateMutex: The caller is expected to hold the lock, and in that case l.locked is constant true
// with no possible writers.
// If the caller does not hold the lock, we are violating the locking/memory model anyway, and accessing the data
// without the lock is more efficient for callers, and potentially more visible to lock analysers for incorrect callers.
if !l.locked {
panic("internal error: lock is not held by the expected owner")
}
}
func (l *LockFile) AssertLockedForWriting() {
// DO NOT provide a variant that returns the current lock state.
//
// The same caveats as for AssertLocked apply equally.
l.AssertLocked()
// Like AssertLocked, dont even bother with l.stateMutex.
if l.locktype != unix.F_WRLCK {
panic("internal error: lock is not held for writing")
}
}
// GetLastWrite returns a LastWrite value corresponding to current state of the lock.
// This is typically called before (_not after_) loading the state when initializing a consumer
// of the data protected by the lock.
// During the lifetime of the consumer, the consumer should usually call ModifiedSince instead.
//
// The caller must hold the lock (for reading or writing).
func (l *LockFile) GetLastWrite() (LastWrite, error) {
l.AssertLocked()
contents := make([]byte, lastWriterIDSize)
n, err := unix.Pread(int(l.fd), contents, 0)
if err != nil {
return LastWrite{}, err
}
// It is important to handle the partial read case, because
// the initial size of the lock file is zero, which is a valid
// state (no writes yet)
contents = contents[:n]
return newLastWriteFromData(contents), nil
}
// RecordWrite updates the lock with a new LastWrite value, and returns the new value.
//
// If this function fails, the LastWriter value of the lock is indeterminate;
// the caller should keep using the previously-recorded LastWrite value,
// and possibly detecting its own modification as an external one:
//
// lw, err := state.lock.RecordWrite()
// if err != nil { /* fail */ }
// state.lastWrite = lw
//
// The caller must hold the lock for writing.
func (l *LockFile) RecordWrite() (LastWrite, error) {
l.AssertLockedForWriting()
lw := newLastWrite()
lockContents := lw.serialize()
n, err := unix.Pwrite(int(l.fd), lockContents, 0)
if err != nil {
return LastWrite{}, err
}
if n != len(lockContents) {
return LastWrite{}, unix.ENOSPC
}
return lw, nil
}
// ModifiedSince checks if the lock has been changed since a provided LastWrite value,
// and returns the one to record instead.
//
// If ModifiedSince reports no modification, the previous LastWrite value
// is still valid and can continue to be used.
//
// If this function fails, the LastWriter value of the lock is indeterminate;
// the caller should fail and keep using the previously-recorded LastWrite value,
// so that it continues failing until the situation is resolved. Similarly,
// it should only update the recorded LastWrite value after processing the update:
//
// lw2, modified, err := state.lock.ModifiedSince(state.lastWrite)
// if err != nil { /* fail */ }
// state.lastWrite = lw2
// if modified {
// if err := reload(); err != nil { /* fail */ }
// state.lastWrite = lw2
// }
//
// The caller must hold the lock (for reading or writing).
func (l *LockFile) ModifiedSince(previous LastWrite) (LastWrite, bool, error) {
l.AssertLocked()
currentLW, err := l.GetLastWrite()
if err != nil {
return LastWrite{}, false, err
}
modified := !previous.equals(currentLW)
return currentLW, modified, nil
}
// Touch updates the lock file with to record that the current lock holder has modified the lock-protected data.
//
// Deprecated: Use *LockFile.RecordWrite.
func (l *LockFile) Touch() error {
lw, err := l.RecordWrite()
if err != nil {
return err
}
l.stateMutex.Lock()
if !l.locked || (l.locktype != unix.F_WRLCK) {
panic("attempted to update last-writer in lockfile without the write lock")
}
defer l.stateMutex.Unlock()
l.lw = lw
return nil
}
// Modified indicates if the lockfile has been updated since the last time it
// was loaded.
// NOTE: Unlike ModifiedSince, this returns true the first time it is called on a *LockFile.
// Callers cannot, in general, rely on this, because that might have happened for some other
// owner of the same *LockFile who created it previously.
//
// Deprecated: Use *LockFile.ModifiedSince.
func (l *LockFile) Modified() (bool, error) {
l.stateMutex.Lock()
if !l.locked {
panic("attempted to check last-writer in lockfile without locking it first")
}
defer l.stateMutex.Unlock()
oldLW := l.lw
// Note that this is called with stateMutex held; thats fine because ModifiedSince doesnt need to lock it.
currentLW, modified, err := l.ModifiedSince(oldLW)
if err != nil {
return true, err
}
l.lw = currentLW
return modified, nil
}
// IsReadWriteLock indicates if the lock file is a read-write lock.
func (l *LockFile) IsReadWrite() bool {
return !l.ro
}
// TouchedSince indicates if the lock file has been touched since the specified time
func (l *LockFile) TouchedSince(when time.Time) bool {
st, err := system.Fstat(int(l.fd))
if err != nil {
return true
}
mtim := st.Mtim()
touched := time.Unix(mtim.Unix())
return when.Before(touched)
}