Merge pull request #59 from fluxcd/go-native-tar

This commit is contained in:
Hidde Beydals 2020-06-06 20:05:16 +02:00 committed by GitHub
commit 032875caca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 113 additions and 37 deletions

View File

@ -20,7 +20,7 @@ RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -a -o source-c
FROM alpine:3.11
RUN apk add --no-cache ca-certificates tar tini 'git>=2.12.0' socat curl bash
RUN apk add --no-cache ca-certificates tini
COPY --from=builder /workspace/source-controller /usr/local/bin/

View File

@ -186,7 +186,7 @@ func (r *GitRepositoryReconciler) sync(ctx context.Context, repository sourcev1.
defer unlock()
// archive artifact and check integrity
err = r.Storage.Archive(artifact, tmpGit, true)
err = r.Storage.Archive(artifact, tmpGit)
if err != nil {
err = fmt.Errorf("storage archive error: %w", err)
return sourcev1.GitRepositoryNotReady(repository, sourcev1.StorageOperationFailedReason, err.Error()), err

View File

@ -17,16 +17,19 @@ limitations under the License.
package controllers
import (
"context"
"archive/tar"
"bufio"
"compress/gzip"
"crypto/sha1"
"fmt"
"io"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"github.com/go-git/go-git/v5/plumbing/format/gitignore"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
sourcev1 "github.com/fluxcd/source-controller/api/v1alpha1"
@ -36,7 +39,7 @@ import (
const (
excludeFile = ".sourceignore"
excludeVCS = ".git/,.gitignore,.gitmodules,.gitattributes"
defaultExcludes = "jpg,jpeg,gif,png,wmv,flv,tar.gz,zip"
excludeExt = "*.jpg,*.jpeg,*.gif,*.png,*.wmv,.*flv,.*tar.gz,*.zip"
)
// Storage manages artifacts
@ -120,46 +123,76 @@ func (s *Storage) ArtifactExist(artifact sourcev1.Artifact) bool {
// Archive creates a tar.gz to the artifact path from the given dir excluding any VCS specific
// files and directories, or any of the excludes defined in the excludeFiles.
func (s *Storage) Archive(artifact sourcev1.Artifact, dir string, integrityCheck bool) error {
ctx, cancel := context.WithTimeout(context.Background(), s.Timeout)
defer cancel()
var tarExcludes []string
if _, err := os.Stat(filepath.Join(dir, excludeFile)); !os.IsNotExist(err) {
tarExcludes = append(tarExcludes, "--exclude-file="+excludeFile)
} else {
tarExcludes = append(tarExcludes, fmt.Sprintf("--exclude=\\*.{%s}", defaultExcludes))
func (s *Storage) Archive(artifact sourcev1.Artifact, dir string) error {
if _, err := os.Stat(dir); err != nil {
return err
}
for _, excl := range strings.Split(excludeVCS, ",") {
tarExcludes = append(tarExcludes, "--exclude="+excl)
}
cmd := fmt.Sprintf("cd %s && tar -c %s -f - . | gzip > %s", dir, strings.Join(tarExcludes, " "), artifact.Path)
command := exec.CommandContext(ctx, "/bin/sh", "-c", cmd)
err := command.Run()
ps, err := loadExcludePatterns(dir)
if err != nil {
return fmt.Errorf("command '%s' failed: %w", cmd, err)
return err
}
matcher := gitignore.NewMatcher(ps)
if integrityCheck {
cmd = fmt.Sprintf("gunzip -t %s", artifact.Path)
command = exec.CommandContext(ctx, "/bin/sh", "-c", cmd)
err = command.Run()
gzFile, err := os.Create(artifact.Path)
if err != nil {
return fmt.Errorf("gzip integrity check failed")
return err
}
defer gzFile.Close()
cmd = fmt.Sprintf("tar -tzf %s >/dev/null", artifact.Path)
command = exec.CommandContext(ctx, "/bin/sh", "-c", cmd)
err = command.Run()
gw := gzip.NewWriter(gzFile)
defer gw.Close()
tw := tar.NewWriter(gw)
defer tw.Close()
return filepath.Walk(dir, func(p string, fi os.FileInfo, err error) error {
if err != nil {
return fmt.Errorf("tar integrity check failed")
}
return err
}
// Ignore anything that is not a file (directories, symlinks)
if !fi.Mode().IsRegular() {
return nil
}
// Ignore excluded extensions and files
if matcher.Match(strings.Split(p, "/"), false) {
return nil
}
header, err := tar.FileInfoHeader(fi, p)
if err != nil {
return err
}
// The name needs to be modified to maintain directory structure
// as tar.FileInfoHeader only has access to the base name of the file.
// Ref: https://golang.org/src/archive/tar/common.go?#L626
relFilePath := p
if filepath.IsAbs(dir) {
relFilePath, err = filepath.Rel(dir, p)
if err != nil {
return err
}
}
header.Name = relFilePath
if err := tw.WriteHeader(header); err != nil {
return err
}
f, err := os.Open(p)
if err != nil {
return err
}
if _, err := io.Copy(tw, f); err != nil {
f.Close()
return err
}
return f.Close()
})
}
// WriteFile writes the given bytes to the artifact path if the checksum differs
func (s *Storage) WriteFile(artifact sourcev1.Artifact, data []byte) error {
sum := s.Checksum(data)
@ -207,3 +240,28 @@ func (s *Storage) Lock(artifact sourcev1.Artifact) (unlock func(), err error) {
mutex := lockedfile.MutexAt(lockFile)
return mutex.Lock()
}
func loadExcludePatterns(dir string) ([]gitignore.Pattern, error) {
path := strings.Split(dir, "/")
var ps []gitignore.Pattern
for _, p := range strings.Split(excludeVCS, ",") {
ps = append(ps, gitignore.ParsePattern(p, path))
}
for _, p := range strings.Split(excludeExt, ",") {
ps = append(ps, gitignore.ParsePattern(p, path))
}
if f, err := os.Open(filepath.Join(dir, excludeFile)); err == nil {
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
s := scanner.Text()
if !strings.HasPrefix(s, "#") && len(strings.TrimSpace(s)) > 0 {
ps = append(ps, gitignore.ParsePattern(s, path))
}
}
} else if !os.IsNotExist(err) {
return nil, err
}
return ps, nil
}

View File

@ -1,8 +1,8 @@
# Git Repositories
The `GitRepository` API defines a source for artifacts coming from Git. The
resource exposes the latest synchronized state from Git as an artifact in
an archive.
resource exposes the latest synchronized state from Git as an artifact in a
[gzip compressed TAR archive](#artifact).
## Specification
@ -112,6 +112,24 @@ const (
)
```
## Artifact
The `GitRepository` API defines a source for artifacts coming from Git. The
resource exposes the latest synchronized state from Git as an artifact in a
gzip compressed TAR archive (`<commit hash>.tar.gz`).
### Excluding files
Git files (`.git/`, `.gitignore`, `.gitmodules`, and `.gitattributes`) are
excluded from the archive by default, as well as some extensions (`.jpg, .jpeg,
.gif, .png, .wmv, .flv, .tar.gz, .zip`)
Excluding additional files from the archive is possible by adding a
`.sourceignore` file in the root of the repository. The `.sourceignore` file
follows [the `.gitignore` pattern
format](https://git-scm.com/docs/gitignore#_pattern_format), pattern
entries may overrule default exclusions.
## Spec examples
Pull the master branch of a public repository every minute: