events: no duplicates when streaming during a log rotation

When streaming events, prevent returning duplicates after a log rotation
by marking a beginning and an end for rotated events.  Before starting to
stream, get a timestamp while holding the event lock.  The timestamp
allows for detecting whether a rotation event happened while reading the
log file and to skip all events between the begin and end rotation
event.

In an ideal scenario, we could detect rotated events by enforcing a
chronological order when reading and skip those detected to not be more
recent than the last read event.  However, events are not always
_written_ in chronological order.  While this can be changed, existing
event files could not be read correctly anymore.

Fixes: #17665
Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
This commit is contained in:
Valentin Rothberg 2023-03-03 09:42:30 +01:00
parent 68bf49799d
commit 408e764b94
3 changed files with 202 additions and 42 deletions

View File

@ -57,23 +57,9 @@ func (e EventLogFile) Write(ee Event) error {
return err
}
rotated, err := rotateLog(e.options.LogFilePath, eventJSONString, e.options.LogFileMaxSize)
if err != nil {
return fmt.Errorf("rotating log file: %w", err)
}
if rotated {
rEvent := NewEvent(Rotate)
rEvent.Type = System
rEvent.Name = e.options.LogFilePath
rotateJSONString, err := rEvent.ToJSONString()
if err != nil {
if _, err := rotateLog(e.options.LogFilePath, eventJSONString, e.options.LogFileMaxSize); err != nil {
return err
}
if err := e.writeString(rotateJSONString); err != nil {
return err
}
}
return e.writeString(eventJSONString)
}
@ -83,21 +69,42 @@ func (e EventLogFile) writeString(s string) error {
if err != nil {
return err
}
if _, err := f.WriteString(s + "\n"); err != nil {
return writeToFile(s, f)
}
func writeToFile(s string, f *os.File) error {
_, err := f.WriteString(s + "\n")
return err
}
return nil
}
func (e EventLogFile) getTail(options ReadOptions) (*tail.Tail, error) {
reopen := true
seek := tail.SeekInfo{Offset: 0, Whence: io.SeekEnd}
if options.FromStart || !options.Stream {
seek.Whence = 0
reopen = false
}
stream := options.Stream
return tail.TailFile(e.options.LogFilePath, tail.Config{ReOpen: reopen, Follow: stream, Location: &seek, Logger: tail.DiscardingLogger, Poll: true})
return tail.TailFile(e.options.LogFilePath, tail.Config{ReOpen: stream, Follow: stream, Location: &seek, Logger: tail.DiscardingLogger, Poll: true})
}
func (e EventLogFile) readRotateEvent(event *Event) (begin bool, end bool, err error) {
if event.Status != Rotate {
return
}
if event.Details.Attributes == nil {
// may be an old event before storing attributes in the rotate event
return
}
switch event.Details.Attributes[rotateEventAttribute] {
case rotateEventBegin:
begin = true
return
case rotateEventEnd:
end = true
return
default:
err = fmt.Errorf("unknown rotate-event attribute %q", event.Details.Attributes[rotateEventAttribute])
return
}
}
// Reads from the log file
@ -125,8 +132,26 @@ func (e EventLogFile) Read(ctx context.Context, options ReadOptions) error {
}
logrus.Debugf("Reading events from file %q", e.options.LogFilePath)
// Get the time *before* starting to read. Comparing the timestamps
// with events avoids returning events more than once after a log-file
// rotation.
readTime, err := func() (time.Time, error) {
// We need to lock events file
lock, err := lockfile.GetLockFile(e.options.LogFilePath + ".lock")
if err != nil {
return time.Time{}, err
}
lock.Lock()
defer lock.Unlock()
return time.Now(), nil
}()
if err != nil {
return err
}
var line *tail.Line
var ok bool
var skipRotate bool
for {
select {
case <-ctx.Done():
@ -146,11 +171,30 @@ func (e EventLogFile) Read(ctx context.Context, options ReadOptions) error {
return err
}
switch event.Type {
case Image, Volume, Pod, System, Container, Network:
case Image, Volume, Pod, Container, Network:
// no-op
case System:
begin, end, err := e.readRotateEvent(event)
if err != nil {
return err
}
if begin && event.Time.After(readTime) {
// If the rotation event happened _after_ we
// started reading, we need to ignore/skip
// subsequent event until the end of the
// rotation.
skipRotate = true
logrus.Debugf("Skipping already read events after log-file rotation: %v", event)
} else if end {
// This rotate event
skipRotate = false
}
default:
return fmt.Errorf("event type %s is not valid in %s", event.Type.String(), e.options.LogFilePath)
}
if skipRotate {
continue
}
if applyFilters(event, filterMap) {
options.EventChannel <- event
}
@ -162,8 +206,43 @@ func (e EventLogFile) String() string {
return LogFile.String()
}
const (
rotateEventAttribute = "io.podman.event.rotate"
rotateEventBegin = "begin"
rotateEventEnd = "end"
)
func writeRotateEvent(f *os.File, logFilePath string, begin bool) error {
rEvent := NewEvent(Rotate)
rEvent.Type = System
rEvent.Name = logFilePath
rEvent.Attributes = make(map[string]string)
if begin {
rEvent.Attributes[rotateEventAttribute] = rotateEventBegin
} else {
rEvent.Attributes[rotateEventAttribute] = rotateEventEnd
}
rotateJSONString, err := rEvent.ToJSONString()
if err != nil {
return err
}
return writeToFile(rotateJSONString, f)
}
// Rotates the log file if the log file size and content exceeds limit
func rotateLog(logfile string, content string, limit uint64) (bool, error) {
needsRotation, err := logNeedsRotation(logfile, content, limit)
if err != nil || !needsRotation {
return false, err
}
if err := truncate(logfile); err != nil {
return false, err
}
return true, nil
}
// logNeedsRotation return true if the log file needs to be rotated.
func logNeedsRotation(logfile string, content string, limit uint64) (bool, error) {
if limit == 0 {
return false, nil
}
@ -181,9 +260,6 @@ func rotateLog(logfile string, content string, limit uint64) (bool, error) {
return false, nil
}
if err := truncate(logfile); err != nil {
return false, err
}
return true, nil
}
@ -224,9 +300,16 @@ func truncate(filePath string) error {
return err
}
}
if err := writeRotateEvent(tmp, filePath, true); err != nil {
return fmt.Errorf("writing rotation event begin marker: %w", err)
}
if _, err := reader.WriteTo(tmp); err != nil {
return fmt.Errorf("writing truncated contents: %w", err)
}
if err := writeRotateEvent(tmp, filePath, false); err != nil {
return fmt.Errorf("writing rotation event end marker: %w", err)
}
if err := renameLog(tmp.Name(), filePath); err != nil {
return fmt.Errorf("writing back %s to %s: %w", tmp.Name(), filePath, err)
@ -254,6 +337,12 @@ func renameLog(from, to string) error {
}
defer fFrom.Close()
// Remove the old file to make sure we're not truncating current
// readers.
if err := os.Remove(to); err != nil {
return fmt.Errorf("recreating file %s: %w", to, err)
}
fTo, err := os.Create(to)
if err != nil {
return err

View File

@ -2,6 +2,7 @@ package events
import (
"os"
"strings"
"testing"
"github.com/stretchr/testify/require"
@ -75,12 +76,6 @@ func TestTruncationOutput(t *testing.T) {
8
9
10
`
contentAfter := `6
7
8
9
10
`
// Create dummy file
tmp, err := os.CreateTemp("", "log-rotation")
@ -99,11 +94,14 @@ func TestTruncationOutput(t *testing.T) {
require.NoError(t, err)
afterTruncation, err := os.ReadFile(tmp.Name())
require.NoError(t, err)
// Test if rotation was successful
require.NoError(t, err, "Log content has changed")
// Content has changed
require.NotEqual(t, beforeTruncation, afterTruncation)
require.Equal(t, string(afterTruncation), contentAfter)
split := strings.Split(string(afterTruncation), "\n")
require.Len(t, split, 8) // 2 events + 5 rotated lines + last new line
require.Contains(t, split[0], "\"Attributes\":{\"io.podman.event.rotate\":\"begin\"}")
require.Equal(t, split[1:6], []string{"6", "7", "8", "9", "10"})
require.Contains(t, split[6], "\"Attributes\":{\"io.podman.event.rotate\":\"end\"}")
require.Contains(t, split[7], "")
}
func TestRenameLog(t *testing.T) {

View File

@ -209,7 +209,8 @@ EOF
expectedContentAfterTruncation=$PODMAN_TMPDIR/truncated.txt
run_podman create $IMAGE
CONTAINERS_CONF=$containersConf run_podman rm $output
ctrID=$output
CONTAINERS_CONF=$containersConf run_podman rm $ctrID
tail -n52 $eventsFile >> $expectedContentAfterTruncation
# Make sure the events file looks as expected.
@ -217,9 +218,81 @@ EOF
# Make sure that `podman events` can read the file, and that it returns the
# same amount of events. We checked the contents before.
CONTAINERS_CONF=$containersConf run_podman events --stream=false --since="2022-03-06T11:26:42.723667984+02:00"
assert "${#lines[@]}" = 51 "Number of events returned"
is "${lines[-2]}" ".* log-rotation $eventsFile"
CONTAINERS_CONF=$containersConf run_podman events --stream=false --since="2022-03-06T11:26:42.723667984+02:00" --format=json
assert "${#lines[@]}" = 52 "Number of events returned"
is "${lines[0]}" "{\"Name\":\"$eventsFile\",\"Status\":\"log-rotation\",\"Time\":\".*\",\"Type\":\"system\",\"Attributes\":{\"io.podman.event.rotate\":\"begin\"}}"
is "${lines[-2]}" "{\"Name\":\"$eventsFile\",\"Status\":\"log-rotation\",\"Time\":\".*\",\"Type\":\"system\",\"Attributes\":{\"io.podman.event.rotate\":\"end\"}}"
is "${lines[-1]}" "{\"ID\":\"$ctrID\",\"Image\":\"$IMAGE\",\"Name\":\".*\",\"Status\":\"remove\",\"Time\":\".*\",\"Type\":\"container\",\"Attributes\":{.*}}"
}
@test "events log-file no duplicates" {
skip_if_remote "setting CONTAINERS_CONF logger options does not affect remote client"
# This test makes sure that events are not returned more than once when
# streaming during a log-file rotation.
eventsFile=$PODMAN_TMPDIR/events.txt
eventsJSON=$PODMAN_TMPDIR/events.json
containersConf=$PODMAN_TMPDIR/containers.conf
cat >$containersConf <<EOF
[engine]
events_logger="file"
events_logfile_path="$eventsFile"
# The populated file has a size of 11300, so let's create a couple of events to
# force a log rotation.
events_logfile_max_size=11300
EOF
_populate_events_file $eventsFile
CONTAINERS_CONF=$containersConf timeout --kill=10 20 \
$PODMAN events --stream=true --since="2022-03-06T11:26:42.723667984+02:00" --format=json > $eventsJSON &
# Now wait for the above podman-events process to write to the eventsJSON
# file, so we know it's reading.
retries=20
while [[ $retries -gt 0 ]]; do
if [ -s $eventsJSON ]; then
break
fi
retries=$((retries - 1))
sleep 0.5
done
assert $retries -gt 0 \
"Timed out waiting for podman-events to start reading pre-existing events"
CONTAINERS_CONF=$containersConf run_podman create $IMAGE
ctrID=$output
CONTAINERS_CONF=$containersConf run_podman rm -f $ctrID
# Now wait for the last event above to be read by the `podman-events`
# process above.
retries=20
while [[ $retries -gt 0 ]]; do
run grep "\"Status\"\:\"remove\"," $eventsJSON
if [[ $status -eq 0 ]]; then
break
fi
retries=$((retries - 1))
sleep 0.5
done
test "$status" = 0 || die "Did not find expected 'Status:remove' line in log"
# Make sure that the log file has been rotated as expected.
run cat $eventsFile
assert "${#lines[@]}" = 54 "Number of events in $eventsFile" # 49 previous + 2 rotation + pull/create/rm
is "${lines[0]}" "{\"Name\":\"$eventsFile\",\"Status\":\"log-rotation\",\"Time\":\".*\",\"Type\":\"system\",\"Attributes\":{\"io.podman.event.rotate\":\"begin\"}}"
is "${lines[1]}" "{\"Name\":\"busybox\",\"Status\":\"pull\",\"Time\":\"2022-04-06T11:26:42.723667951+02:00\",\"Type\":\"image\",\"Attributes\":null}"
is "${lines[49]}" "{\"Name\":\"busybox\",\"Status\":\"pull\",\"Time\":\"2022-04-06T11:26:42.723667999+02:00\",\"Type\":\"image\",\"Attributes\":null}"
is "${lines[50]}" "{\"Name\":\"$eventsFile\",\"Status\":\"log-rotation\",\"Time\":\".*\",\"Type\":\"system\",\"Attributes\":{\"io.podman.event.rotate\":\"end\"}}"
is "${lines[53]}" "{\"ID\":\"$ctrID\",\"Image\":\"$IMAGE\",\"Name\":\".*\",\"Status\":\"remove\",\"Time\":\".*\",\"Type\":\"container\",\"Attributes\":{.*}}"
# Make sure that the JSON stream looks as expected. That means it has all
# events and no duplicates.
run cat $eventsJSON
is "${lines[0]}" "{\"Name\":\"busybox\",\"Status\":\"pull\",\"Time\":\"2022-04-06T11:26:42.7236679+02:00\",\"Type\":\"image\",\"Attributes\":null}"
is "${lines[99]}" "{\"Name\":\"busybox\",\"Status\":\"pull\",\"Time\":\"2022-04-06T11:26:42.723667999+02:00\",\"Type\":\"image\",\"Attributes\":null}"
is "${lines[100]}" "{\"Name\":\"$eventsFile\",\"Status\":\"log-rotation\",\"Time\":\".*\",\"Type\":\"system\",\"Attributes\":{\"io.podman.event.rotate\":\"end\"}}"
is "${lines[103]}" "{\"ID\":\"$ctrID\",\"Image\":\"$IMAGE\",\"Name\":\".*\",\"Status\":\"remove\",\"Time\":\".*\",\"Type\":\"container\",\"Attributes\":{.*}}"
}
# Prior to #15633, container labels would not appear in 'die' log events