storage: strip env specific data during archive

This ensures the checksum is predictable, and not influenced by e.g.
different runtime configuration settings, or FS specific data.

Signed-off-by: Hidde Beydals <hello@hidde.co>
This commit is contained in:
Hidde Beydals 2021-07-30 15:56:54 +02:00
parent d0560e5dbe
commit 3ac39b6137
1 changed files with 12 additions and 1 deletions

View File

@ -161,7 +161,8 @@ func SourceIgnoreFilter(ps []gitignore.Pattern, domain []string) ArchiveFileFilt
}
// Archive atomically archives the given directory as a tarball to the given v1beta1.Artifact path, excluding
// directories and any ArchiveFileFilter matches.
// directories and any ArchiveFileFilter matches. While archiving, any environment specific data (for example,
// the user and group name) is stripped from file headers.
// If successful, it sets the checksum and last update time on the artifact.
func (s *Storage) Archive(artifact *sourcev1.Artifact, dir string, filter ArchiveFileFilter) (err error) {
if f, err := os.Stat(dir); os.IsNotExist(err) || !f.IsDir() {
@ -216,6 +217,16 @@ func (s *Storage) Archive(artifact *sourcev1.Artifact, dir string, filter Archiv
}
header.Name = relFilePath
// We want to remove any environment specific data as well, this
// ensures the checksum is purely content based.
header.Gid = 0
header.Uid = 0
header.Uname = ""
header.Gname = ""
header.ModTime = time.Time{}
header.AccessTime = time.Time{}
header.ChangeTime = time.Time{}
if err := tw.WriteHeader(header); err != nil {
return err
}