Avoid concurrent write corruption to /etc/hosts

When we have multiple writers racing to write /etc/hosts, we could
have file corruption where we see a mix of both files.

We can't use a traditional atomic file write, because we are bind-mounting /etc/hosts.

Instead we write to /etc/hosts, pause, then re-read the contents.  If
the contents don't match, we repeat.  This will not result in fair
queuing, but will avoid corruption.
This commit is contained in:
Justin SB 2019-05-06 19:00:52 -04:00
parent 68e975cc74
commit feec6820fe
No known key found for this signature in database
GPG Key ID: 8DEC5C8217494E37
1 changed files with 40 additions and 3 deletions

View File

@ -20,11 +20,13 @@ import (
"bytes"
"fmt"
"io/ioutil"
math_rand "math/rand"
"os"
"path/filepath"
"sort"
"strings"
"sync"
"time"
"k8s.io/klog"
)
@ -113,15 +115,50 @@ func UpdateHostsFileWithRecords(p string, addrToHosts map[string][]string) error
// Note that because we are bind mounting /etc/hosts, we can't do a normal atomic file write
// (where we write a temp file and rename it)
// TODO: We should just hold the file open while we read & write it
err = ioutil.WriteFile(p, updated, stat.Mode().Perm())
if err != nil {
if err := pseudoAtomicWrite(p, updated, stat.Mode()); err != nil {
return fmt.Errorf("error writing file %q: %v", p, err)
}
return nil
}
// Because we are bind-mounting /etc/hosts, we can't do a normal
// atomic file write (where we write a temp file and rename it);
// instead we write the file, pause, re-read and see if anyone else
// wrote in the meantime; if so we rewrite again. By pausing for a
// random amount of time, eventually we'll win the write race and
// exit. This doesn't guarantee fairness, but it should mean that the
// end-result is not malformed (i.e. partial writes).
func pseudoAtomicWrite(p string, b []byte, mode os.FileMode) error {
attempt := 0
for {
attempt++
if attempt > 10 {
return fmt.Errorf("failed to consistently write file %q - too many retries", p)
}
if err := ioutil.WriteFile(p, b, mode); err != nil {
klog.Warningf("error writing file %q: %v", p, err)
continue
}
n := 1 + math_rand.Intn(20)
time.Sleep(time.Duration(n) * time.Millisecond)
contents, err := ioutil.ReadFile(p)
if err != nil {
klog.Warningf("error re-reading file %q: %v", p, err)
continue
}
if bytes.Equal(contents, b) {
return nil
}
klog.Warningf("detected concurrent write to file %q, will retry", p)
}
}
func atomicWriteFile(filename string, data []byte, perm os.FileMode) error {
dir := filepath.Dir(filename)