diff --git a/Dockerfile b/Dockerfile index 1b61aa0f..b9162284 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,7 +20,7 @@ RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -a -o source-c FROM alpine:3.11 -RUN apk add --no-cache ca-certificates tar tini 'git>=2.12.0' socat curl bash +RUN apk add --no-cache ca-certificates tini COPY --from=builder /workspace/source-controller /usr/local/bin/ diff --git a/controllers/gitrepository_controller.go b/controllers/gitrepository_controller.go index ea01f9a4..a47b1811 100644 --- a/controllers/gitrepository_controller.go +++ b/controllers/gitrepository_controller.go @@ -186,7 +186,7 @@ func (r *GitRepositoryReconciler) sync(ctx context.Context, repository sourcev1. defer unlock() // archive artifact and check integrity - err = r.Storage.Archive(artifact, tmpGit, true) + err = r.Storage.Archive(artifact, tmpGit) if err != nil { err = fmt.Errorf("storage archive error: %w", err) return sourcev1.GitRepositoryNotReady(repository, sourcev1.StorageOperationFailedReason, err.Error()), err diff --git a/controllers/storage.go b/controllers/storage.go index 5b751698..b5b6edbf 100644 --- a/controllers/storage.go +++ b/controllers/storage.go @@ -17,16 +17,19 @@ limitations under the License. package controllers import ( - "context" + "archive/tar" + "bufio" + "compress/gzip" "crypto/sha1" "fmt" + "io" "io/ioutil" "os" - "os/exec" "path/filepath" "strings" "time" + "github.com/go-git/go-git/v5/plumbing/format/gitignore" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" sourcev1 "github.com/fluxcd/source-controller/api/v1alpha1" @@ -34,9 +37,9 @@ import ( ) const ( - excludeFile = ".sourceignore" - excludeVCS = ".git/,.gitignore,.gitmodules,.gitattributes" - defaultExcludes = "jpg,jpeg,gif,png,wmv,flv,tar.gz,zip" + excludeFile = ".sourceignore" + excludeVCS = ".git/,.gitignore,.gitmodules,.gitattributes" + excludeExt = "*.jpg,*.jpeg,*.gif,*.png,*.wmv,.*flv,.*tar.gz,*.zip" ) // Storage manages artifacts @@ -120,44 +123,74 @@ func (s *Storage) ArtifactExist(artifact sourcev1.Artifact) bool { // Archive creates a tar.gz to the artifact path from the given dir excluding any VCS specific // files and directories, or any of the excludes defined in the excludeFiles. -func (s *Storage) Archive(artifact sourcev1.Artifact, dir string, integrityCheck bool) error { - ctx, cancel := context.WithTimeout(context.Background(), s.Timeout) - defer cancel() - - var tarExcludes []string - if _, err := os.Stat(filepath.Join(dir, excludeFile)); !os.IsNotExist(err) { - tarExcludes = append(tarExcludes, "--exclude-file="+excludeFile) - } else { - tarExcludes = append(tarExcludes, fmt.Sprintf("--exclude=\\*.{%s}", defaultExcludes)) +func (s *Storage) Archive(artifact sourcev1.Artifact, dir string) error { + if _, err := os.Stat(dir); err != nil { + return err } - for _, excl := range strings.Split(excludeVCS, ",") { - tarExcludes = append(tarExcludes, "--exclude="+excl) - } - cmd := fmt.Sprintf("cd %s && tar -c %s -f - . | gzip > %s", dir, strings.Join(tarExcludes, " "), artifact.Path) - command := exec.CommandContext(ctx, "/bin/sh", "-c", cmd) - err := command.Run() + ps, err := loadExcludePatterns(dir) if err != nil { - return fmt.Errorf("command '%s' failed: %w", cmd, err) + return err } + matcher := gitignore.NewMatcher(ps) - if integrityCheck { - cmd = fmt.Sprintf("gunzip -t %s", artifact.Path) - command = exec.CommandContext(ctx, "/bin/sh", "-c", cmd) - err = command.Run() + gzFile, err := os.Create(artifact.Path) + if err != nil { + return err + } + defer gzFile.Close() + + gw := gzip.NewWriter(gzFile) + defer gw.Close() + + tw := tar.NewWriter(gw) + defer tw.Close() + + return filepath.Walk(dir, func(p string, fi os.FileInfo, err error) error { if err != nil { - return fmt.Errorf("gzip integrity check failed") + return err } - cmd = fmt.Sprintf("tar -tzf %s >/dev/null", artifact.Path) - command = exec.CommandContext(ctx, "/bin/sh", "-c", cmd) - err = command.Run() - if err != nil { - return fmt.Errorf("tar integrity check failed") + // Ignore anything that is not a file (directories, symlinks) + if !fi.Mode().IsRegular() { + return nil } - } - return nil + // Ignore excluded extensions and files + if matcher.Match(strings.Split(p, "/"), false) { + return nil + } + + header, err := tar.FileInfoHeader(fi, p) + if err != nil { + return err + } + // The name needs to be modified to maintain directory structure + // as tar.FileInfoHeader only has access to the base name of the file. + // Ref: https://golang.org/src/archive/tar/common.go?#L626 + relFilePath := p + if filepath.IsAbs(dir) { + relFilePath, err = filepath.Rel(dir, p) + if err != nil { + return err + } + } + header.Name = relFilePath + + if err := tw.WriteHeader(header); err != nil { + return err + } + + f, err := os.Open(p) + if err != nil { + return err + } + if _, err := io.Copy(tw, f); err != nil { + f.Close() + return err + } + return f.Close() + }) } // WriteFile writes the given bytes to the artifact path if the checksum differs @@ -207,3 +240,28 @@ func (s *Storage) Lock(artifact sourcev1.Artifact) (unlock func(), err error) { mutex := lockedfile.MutexAt(lockFile) return mutex.Lock() } + +func loadExcludePatterns(dir string) ([]gitignore.Pattern, error) { + path := strings.Split(dir, "/") + var ps []gitignore.Pattern + for _, p := range strings.Split(excludeVCS, ",") { + ps = append(ps, gitignore.ParsePattern(p, path)) + } + for _, p := range strings.Split(excludeExt, ",") { + ps = append(ps, gitignore.ParsePattern(p, path)) + } + if f, err := os.Open(filepath.Join(dir, excludeFile)); err == nil { + defer f.Close() + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + s := scanner.Text() + if !strings.HasPrefix(s, "#") && len(strings.TrimSpace(s)) > 0 { + ps = append(ps, gitignore.ParsePattern(s, path)) + } + } + } else if !os.IsNotExist(err) { + return nil, err + } + return ps, nil +} diff --git a/docs/spec/v1alpha1/gitrepositories.md b/docs/spec/v1alpha1/gitrepositories.md index 3db341ce..208c3b2c 100644 --- a/docs/spec/v1alpha1/gitrepositories.md +++ b/docs/spec/v1alpha1/gitrepositories.md @@ -1,8 +1,8 @@ # Git Repositories The `GitRepository` API defines a source for artifacts coming from Git. The -resource exposes the latest synchronized state from Git as an artifact in -an archive. +resource exposes the latest synchronized state from Git as an artifact in a +[gzip compressed TAR archive](#artifact). ## Specification @@ -112,6 +112,24 @@ const ( ) ``` +## Artifact + +The `GitRepository` API defines a source for artifacts coming from Git. The +resource exposes the latest synchronized state from Git as an artifact in a +gzip compressed TAR archive (`.tar.gz`). + +### Excluding files + +Git files (`.git/`, `.gitignore`, `.gitmodules`, and `.gitattributes`) are +excluded from the archive by default, as well as some extensions (`.jpg, .jpeg, +.gif, .png, .wmv, .flv, .tar.gz, .zip`) + +Excluding additional files from the archive is possible by adding a +`.sourceignore` file in the root of the repository. The `.sourceignore` file +follows [the `.gitignore` pattern +format](https://git-scm.com/docs/gitignore#_pattern_format), pattern +entries may overrule default exclusions. + ## Spec examples Pull the master branch of a public repository every minute: