Merge pull request #59 from fluxcd/go-native-tar

This commit is contained in:
Hidde Beydals 2020-06-06 20:05:16 +02:00 committed by GitHub
commit 032875caca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 113 additions and 37 deletions

View File

@ -20,7 +20,7 @@ RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -a -o source-c
FROM alpine:3.11 FROM alpine:3.11
RUN apk add --no-cache ca-certificates tar tini 'git>=2.12.0' socat curl bash RUN apk add --no-cache ca-certificates tini
COPY --from=builder /workspace/source-controller /usr/local/bin/ COPY --from=builder /workspace/source-controller /usr/local/bin/

View File

@ -186,7 +186,7 @@ func (r *GitRepositoryReconciler) sync(ctx context.Context, repository sourcev1.
defer unlock() defer unlock()
// archive artifact and check integrity // archive artifact and check integrity
err = r.Storage.Archive(artifact, tmpGit, true) err = r.Storage.Archive(artifact, tmpGit)
if err != nil { if err != nil {
err = fmt.Errorf("storage archive error: %w", err) err = fmt.Errorf("storage archive error: %w", err)
return sourcev1.GitRepositoryNotReady(repository, sourcev1.StorageOperationFailedReason, err.Error()), err return sourcev1.GitRepositoryNotReady(repository, sourcev1.StorageOperationFailedReason, err.Error()), err

View File

@ -17,16 +17,19 @@ limitations under the License.
package controllers package controllers
import ( import (
"context" "archive/tar"
"bufio"
"compress/gzip"
"crypto/sha1" "crypto/sha1"
"fmt" "fmt"
"io"
"io/ioutil" "io/ioutil"
"os" "os"
"os/exec"
"path/filepath" "path/filepath"
"strings" "strings"
"time" "time"
"github.com/go-git/go-git/v5/plumbing/format/gitignore"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
sourcev1 "github.com/fluxcd/source-controller/api/v1alpha1" sourcev1 "github.com/fluxcd/source-controller/api/v1alpha1"
@ -36,7 +39,7 @@ import (
const ( const (
excludeFile = ".sourceignore" excludeFile = ".sourceignore"
excludeVCS = ".git/,.gitignore,.gitmodules,.gitattributes" excludeVCS = ".git/,.gitignore,.gitmodules,.gitattributes"
defaultExcludes = "jpg,jpeg,gif,png,wmv,flv,tar.gz,zip" excludeExt = "*.jpg,*.jpeg,*.gif,*.png,*.wmv,.*flv,.*tar.gz,*.zip"
) )
// Storage manages artifacts // Storage manages artifacts
@ -120,46 +123,76 @@ func (s *Storage) ArtifactExist(artifact sourcev1.Artifact) bool {
// Archive creates a tar.gz to the artifact path from the given dir excluding any VCS specific // Archive creates a tar.gz to the artifact path from the given dir excluding any VCS specific
// files and directories, or any of the excludes defined in the excludeFiles. // files and directories, or any of the excludes defined in the excludeFiles.
func (s *Storage) Archive(artifact sourcev1.Artifact, dir string, integrityCheck bool) error { func (s *Storage) Archive(artifact sourcev1.Artifact, dir string) error {
ctx, cancel := context.WithTimeout(context.Background(), s.Timeout) if _, err := os.Stat(dir); err != nil {
defer cancel() return err
var tarExcludes []string
if _, err := os.Stat(filepath.Join(dir, excludeFile)); !os.IsNotExist(err) {
tarExcludes = append(tarExcludes, "--exclude-file="+excludeFile)
} else {
tarExcludes = append(tarExcludes, fmt.Sprintf("--exclude=\\*.{%s}", defaultExcludes))
} }
for _, excl := range strings.Split(excludeVCS, ",") {
tarExcludes = append(tarExcludes, "--exclude="+excl)
}
cmd := fmt.Sprintf("cd %s && tar -c %s -f - . | gzip > %s", dir, strings.Join(tarExcludes, " "), artifact.Path)
command := exec.CommandContext(ctx, "/bin/sh", "-c", cmd)
err := command.Run() ps, err := loadExcludePatterns(dir)
if err != nil { if err != nil {
return fmt.Errorf("command '%s' failed: %w", cmd, err) return err
} }
matcher := gitignore.NewMatcher(ps)
if integrityCheck { gzFile, err := os.Create(artifact.Path)
cmd = fmt.Sprintf("gunzip -t %s", artifact.Path)
command = exec.CommandContext(ctx, "/bin/sh", "-c", cmd)
err = command.Run()
if err != nil { if err != nil {
return fmt.Errorf("gzip integrity check failed") return err
} }
defer gzFile.Close()
cmd = fmt.Sprintf("tar -tzf %s >/dev/null", artifact.Path) gw := gzip.NewWriter(gzFile)
command = exec.CommandContext(ctx, "/bin/sh", "-c", cmd) defer gw.Close()
err = command.Run()
tw := tar.NewWriter(gw)
defer tw.Close()
return filepath.Walk(dir, func(p string, fi os.FileInfo, err error) error {
if err != nil { if err != nil {
return fmt.Errorf("tar integrity check failed") return err
}
} }
// Ignore anything that is not a file (directories, symlinks)
if !fi.Mode().IsRegular() {
return nil return nil
} }
// Ignore excluded extensions and files
if matcher.Match(strings.Split(p, "/"), false) {
return nil
}
header, err := tar.FileInfoHeader(fi, p)
if err != nil {
return err
}
// The name needs to be modified to maintain directory structure
// as tar.FileInfoHeader only has access to the base name of the file.
// Ref: https://golang.org/src/archive/tar/common.go?#L626
relFilePath := p
if filepath.IsAbs(dir) {
relFilePath, err = filepath.Rel(dir, p)
if err != nil {
return err
}
}
header.Name = relFilePath
if err := tw.WriteHeader(header); err != nil {
return err
}
f, err := os.Open(p)
if err != nil {
return err
}
if _, err := io.Copy(tw, f); err != nil {
f.Close()
return err
}
return f.Close()
})
}
// WriteFile writes the given bytes to the artifact path if the checksum differs // WriteFile writes the given bytes to the artifact path if the checksum differs
func (s *Storage) WriteFile(artifact sourcev1.Artifact, data []byte) error { func (s *Storage) WriteFile(artifact sourcev1.Artifact, data []byte) error {
sum := s.Checksum(data) sum := s.Checksum(data)
@ -207,3 +240,28 @@ func (s *Storage) Lock(artifact sourcev1.Artifact) (unlock func(), err error) {
mutex := lockedfile.MutexAt(lockFile) mutex := lockedfile.MutexAt(lockFile)
return mutex.Lock() return mutex.Lock()
} }
func loadExcludePatterns(dir string) ([]gitignore.Pattern, error) {
path := strings.Split(dir, "/")
var ps []gitignore.Pattern
for _, p := range strings.Split(excludeVCS, ",") {
ps = append(ps, gitignore.ParsePattern(p, path))
}
for _, p := range strings.Split(excludeExt, ",") {
ps = append(ps, gitignore.ParsePattern(p, path))
}
if f, err := os.Open(filepath.Join(dir, excludeFile)); err == nil {
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
s := scanner.Text()
if !strings.HasPrefix(s, "#") && len(strings.TrimSpace(s)) > 0 {
ps = append(ps, gitignore.ParsePattern(s, path))
}
}
} else if !os.IsNotExist(err) {
return nil, err
}
return ps, nil
}

View File

@ -1,8 +1,8 @@
# Git Repositories # Git Repositories
The `GitRepository` API defines a source for artifacts coming from Git. The The `GitRepository` API defines a source for artifacts coming from Git. The
resource exposes the latest synchronized state from Git as an artifact in resource exposes the latest synchronized state from Git as an artifact in a
an archive. [gzip compressed TAR archive](#artifact).
## Specification ## Specification
@ -112,6 +112,24 @@ const (
) )
``` ```
## Artifact
The `GitRepository` API defines a source for artifacts coming from Git. The
resource exposes the latest synchronized state from Git as an artifact in a
gzip compressed TAR archive (`<commit hash>.tar.gz`).
### Excluding files
Git files (`.git/`, `.gitignore`, `.gitmodules`, and `.gitattributes`) are
excluded from the archive by default, as well as some extensions (`.jpg, .jpeg,
.gif, .png, .wmv, .flv, .tar.gz, .zip`)
Excluding additional files from the archive is possible by adding a
`.sourceignore` file in the root of the repository. The `.sourceignore` file
follows [the `.gitignore` pattern
format](https://git-scm.com/docs/gitignore#_pattern_format), pattern
entries may overrule default exclusions.
## Spec examples ## Spec examples
Pull the master branch of a public repository every minute: Pull the master branch of a public repository every minute: