From b1b1dbcec536fe32d6804f9acf7e5ed462c6470e Mon Sep 17 00:00:00 2001 From: Erik Hollensbe Date: Tue, 7 Jul 2020 14:55:23 +0000 Subject: [PATCH 1/2] Support programming excluded patterns in gitrepository spec -- More coming in this commit message soon Signed-off-by: Erik Hollensbe --- api/v1alpha1/gitrepository_types.go | 7 +++ api/v1alpha1/zz_generated.deepcopy.go | 5 ++ .../source.fluxcd.io_gitrepositories.yaml | 6 +++ controllers/gitrepository_controller.go | 3 +- controllers/storage.go | 54 +++++++++++++------ docs/api/source.md | 30 +++++++++++ 6 files changed, 86 insertions(+), 19 deletions(-) diff --git a/api/v1alpha1/gitrepository_types.go b/api/v1alpha1/gitrepository_types.go index aa15bd9d..ac05b048 100644 --- a/api/v1alpha1/gitrepository_types.go +++ b/api/v1alpha1/gitrepository_types.go @@ -54,6 +54,13 @@ type GitRepositorySpec struct { // Verify OpenPGP signature for the commit that HEAD points to. // +optional Verification *GitRepositoryVerification `json:"verify,omitempty"` + + // SourceIgnore overrides the set of excluded patterns in the .sourceignore + // format (which is the same as .gitignore). If not provided, a default will + // be used, consult the documentation for your version to find out what those + // are. + // +optional + SourceIgnore *string `json:"sourceIgnore,omitempty"` } // GitRepositoryRef defines the git ref used for pull and checkout operations. diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index f97d00a3..dc1f2393 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -140,6 +140,11 @@ func (in *GitRepositorySpec) DeepCopyInto(out *GitRepositorySpec) { *out = new(GitRepositoryVerification) **out = **in } + if in.SourceIgnore != nil { + in, out := &in.SourceIgnore, &out.SourceIgnore + *out = new(string) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GitRepositorySpec. diff --git a/config/crd/bases/source.fluxcd.io_gitrepositories.yaml b/config/crd/bases/source.fluxcd.io_gitrepositories.yaml index 04954473..fb4fc258 100644 --- a/config/crd/bases/source.fluxcd.io_gitrepositories.yaml +++ b/config/crd/bases/source.fluxcd.io_gitrepositories.yaml @@ -82,6 +82,12 @@ spec: TODO: Add other useful fields. apiVersion, kind, uid?' type: string type: object + sourceIgnore: + description: SourceIgnore overrides the set of excluded patterns in + the .sourceignore format (which is the same as .gitignore). If not + provided, a default will be used, consult the documentation for your + version to find out what those are. + type: string timeout: description: The timeout for remote git operations like cloning, default to 20s. diff --git a/controllers/gitrepository_controller.go b/controllers/gitrepository_controller.go index cb5c4761..b904cd01 100644 --- a/controllers/gitrepository_controller.go +++ b/controllers/gitrepository_controller.go @@ -198,8 +198,7 @@ func (r *GitRepositoryReconciler) sync(ctx context.Context, repository sourcev1. defer unlock() // archive artifact and check integrity - err = r.Storage.Archive(artifact, tmpGit) - if err != nil { + if err := r.Storage.Archive(artifact, tmpGit, repository.Spec); err != nil { err = fmt.Errorf("storage archive error: %w", err) return sourcev1.GitRepositoryNotReady(repository, sourcev1.StorageOperationFailedReason, err.Error()), err } diff --git a/controllers/storage.go b/controllers/storage.go index 7838ecd7..599a8d1f 100644 --- a/controllers/storage.go +++ b/controllers/storage.go @@ -19,6 +19,7 @@ package controllers import ( "archive/tar" "bufio" + "bytes" "compress/gzip" "crypto/sha1" "fmt" @@ -108,7 +109,7 @@ func (s *Storage) RemoveAllButCurrent(artifact sourcev1.Artifact) error { }) if len(errors) > 0 { - return fmt.Errorf("faild to remove files: %s", strings.Join(errors, " ")) + return fmt.Errorf("failed to remove files: %s", strings.Join(errors, " ")) } return nil } @@ -123,15 +124,17 @@ func (s *Storage) ArtifactExist(artifact sourcev1.Artifact) bool { // Archive creates a tar.gz to the artifact path from the given dir excluding any VCS specific // files and directories, or any of the excludes defined in the excludeFiles. -func (s *Storage) Archive(artifact sourcev1.Artifact, dir string) error { +// Returns a modified sourcev1.Artifact and any error. +func (s *Storage) Archive(artifact sourcev1.Artifact, dir string, spec sourcev1.GitRepositorySpec) error { if _, err := os.Stat(dir); err != nil { return err } - ps, err := loadExcludePatterns(dir) + ps, err := loadExcludePatterns(dir, spec) if err != nil { return err } + matcher := gitignore.NewMatcher(ps) gzFile, err := os.Create(artifact.Path) @@ -241,27 +244,44 @@ func (s *Storage) Lock(artifact sourcev1.Artifact) (unlock func(), err error) { return mutex.Lock() } -func loadExcludePatterns(dir string) ([]gitignore.Pattern, error) { +func getPatterns(reader io.Reader, path []string) []gitignore.Pattern { + ps := []gitignore.Pattern{} + scanner := bufio.NewScanner(reader) + + for scanner.Scan() { + s := scanner.Text() + if !strings.HasPrefix(s, "#") && len(strings.TrimSpace(s)) > 0 { + ps = append(ps, gitignore.ParsePattern(s, path)) + } + } + + return ps +} + +// loadExcludePatterns loads the excluded patterns from sourceignore or other +// sources. +func loadExcludePatterns(dir string, spec sourcev1.GitRepositorySpec) ([]gitignore.Pattern, error) { path := strings.Split(dir, "/") + var ps []gitignore.Pattern for _, p := range strings.Split(excludeVCS, ",") { ps = append(ps, gitignore.ParsePattern(p, path)) } - for _, p := range strings.Split(excludeExt, ",") { - ps = append(ps, gitignore.ParsePattern(p, path)) - } - if f, err := os.Open(filepath.Join(dir, excludeFile)); err == nil { - defer f.Close() - scanner := bufio.NewScanner(f) - for scanner.Scan() { - s := scanner.Text() - if !strings.HasPrefix(s, "#") && len(strings.TrimSpace(s)) > 0 { - ps = append(ps, gitignore.ParsePattern(s, path)) - } + if spec.SourceIgnore == nil { + for _, p := range strings.Split(excludeExt, ",") { + ps = append(ps, gitignore.ParsePattern(p, path)) } - } else if !os.IsNotExist(err) { - return nil, err + + if f, err := os.Open(filepath.Join(dir, excludeFile)); err == nil { + defer f.Close() + ps = append(ps, getPatterns(f, path)...) + } else if !os.IsNotExist(err) { + return nil, err + } + } else { + ps = append(ps, getPatterns(bytes.NewBufferString(*spec.SourceIgnore), path)...) } + return ps, nil } diff --git a/docs/api/source.md b/docs/api/source.md index b80a9591..2bdf2f79 100644 --- a/docs/api/source.md +++ b/docs/api/source.md @@ -157,6 +157,21 @@ GitRepositoryVerification

Verify OpenPGP signature for the commit that HEAD points to.

+ + +sourceIgnore
+ +string + + + +(Optional) +

SourceIgnore overrides the set of excluded patterns in the .sourceignore +format (which is the same as .gitignore). If not provided, a default will +be used, consult the documentation for your version to find out what those +are.

+ + @@ -666,6 +681,21 @@ GitRepositoryVerification

Verify OpenPGP signature for the commit that HEAD points to.

+ + +sourceIgnore
+ +string + + + +(Optional) +

SourceIgnore overrides the set of excluded patterns in the .sourceignore +format (which is the same as .gitignore). If not provided, a default will +be used, consult the documentation for your version to find out what those +are.

+ + From a723b9e3e7e303808e38c814cdef25e87a4c14d6 Mon Sep 17 00:00:00 2001 From: Erik Hollensbe Date: Wed, 8 Jul 2020 16:33:44 +0000 Subject: [PATCH 2/2] Archive and storage tests Signed-off-by: Erik Hollensbe --- controllers/storage.go | 2 +- controllers/storage_test.go | 252 ++++++++++++++++++++++++++++++++++++ 2 files changed, 253 insertions(+), 1 deletion(-) create mode 100644 controllers/storage_test.go diff --git a/controllers/storage.go b/controllers/storage.go index 599a8d1f..db88b4b4 100644 --- a/controllers/storage.go +++ b/controllers/storage.go @@ -40,7 +40,7 @@ import ( const ( excludeFile = ".sourceignore" excludeVCS = ".git/,.gitignore,.gitmodules,.gitattributes" - excludeExt = "*.jpg,*.jpeg,*.gif,*.png,*.wmv,.*flv,.*tar.gz,*.zip" + excludeExt = "*.jpg,*.jpeg,*.gif,*.png,*.wmv,*.flv,*.tar.gz,*.zip" ) // Storage manages artifacts diff --git a/controllers/storage_test.go b/controllers/storage_test.go new file mode 100644 index 00000000..383478ec --- /dev/null +++ b/controllers/storage_test.go @@ -0,0 +1,252 @@ +package controllers + +import ( + "archive/tar" + "compress/gzip" + "fmt" + "io" + "io/ioutil" + "os" + "os/exec" + "path/filepath" + "testing" + "time" + + sourcev1 "github.com/fluxcd/source-controller/api/v1alpha1" +) + +type ignoreMap map[string]bool + +var remoteRepository = "https://github.com/fluxcd/source-controller" + +func init() { + // if this remote repo ever gets in your way, this is an escape; just set + // this to the url you want to clone. Be the source you want to be. + s := os.Getenv("REMOTE_REPOSITORY") + if s != "" { + remoteRepository = s + } +} + +func createStoragePath() (string, error) { + return ioutil.TempDir("", "") +} + +func cleanupStoragePath(dir string) func() { + return func() { os.RemoveAll(dir) } +} + +func TestStorageConstructor(t *testing.T) { + dir, err := createStoragePath() + if err != nil { + t.Fatal(err) + } + t.Cleanup(cleanupStoragePath(dir)) + + if _, err := NewStorage("/nonexistent", "hostname", time.Minute); err == nil { + t.Fatal("nonexistent path was allowable in storage constructor") + } + + f, err := ioutil.TempFile(dir, "") + if err != nil { + t.Fatalf("while creating temporary file: %v", err) + } + f.Close() + + if _, err := NewStorage(f.Name(), "hostname", time.Minute); err == nil { + t.Fatal("file path was accepted as basedir") + } + + os.Remove(f.Name()) + + if _, err := NewStorage(dir, "hostname", time.Minute); err != nil { + t.Fatalf("Valid path did not successfully return: %v", err) + } +} + +func artifactFromURLRepository(repo string) sourcev1.Artifact { + f, err := ioutil.TempFile("", "") + if err != nil { + panic(fmt.Errorf("could not create temporary file: %w", err)) + } + f.Close() + os.Remove(f.Name()) + + return sourcev1.Artifact{Path: f.Name(), URL: repo} +} + +// walks a tar.gz and looks for paths with the basename. It does not match +// symlinks properly at this time because that's painful. +func walkTar(tarFile string, match string) (bool, error) { + f, err := os.Open(tarFile) + if err != nil { + return false, fmt.Errorf("could not open file: %w", err) + } + defer f.Close() + + gzr, err := gzip.NewReader(f) + if err != nil { + return false, fmt.Errorf("could not unzip file: %w", err) + } + defer gzr.Close() + + tr := tar.NewReader(gzr) + for { + header, err := tr.Next() + if err == io.EOF { + break + } else if err != nil { + return false, fmt.Errorf("Corrupt tarball reading header: %w", err) + } + + switch header.Typeflag { + case tar.TypeDir, tar.TypeReg: + if filepath.Base(header.Name) == match { + return true, nil + } + default: + // skip + } + } + + return false, nil +} + +func testPatterns(t *testing.T, artifact sourcev1.Artifact, table ignoreMap) { + for name, expected := range table { + res, err := walkTar(artifact.Path, name) + if err != nil { + t.Fatalf("while reading tarball: %v", err) + } + + if res != expected { + if expected { + t.Fatalf("Could not find repository file matching %q in tarball for repo %q", name, remoteRepository) + } else { + t.Fatalf("Repository contained ignored file %q in tarball for repo %q", name, remoteRepository) + } + } + } +} + +func createArchive(t *testing.T, filenames []string, sourceIgnore string, spec sourcev1.GitRepositorySpec) sourcev1.Artifact { + dir, err := createStoragePath() + if err != nil { + t.Fatal(err) + } + t.Cleanup(cleanupStoragePath(dir)) + + storage, err := NewStorage(dir, "hostname", time.Minute) + if err != nil { + t.Fatalf("Error while bootstrapping storage: %v", err) + } + + gitDir, err := ioutil.TempDir("", "") + if err != nil { + t.Fatalf("could not create temporary directory: %v", err) + } + t.Cleanup(func() { os.RemoveAll(gitDir) }) + + if err := exec.Command("git", "clone", remoteRepository, gitDir).Run(); err != nil { + t.Fatalf("Could not clone remote repository: %v", err) + } + + // inject files.. just empty files + for _, name := range filenames { + f, err := os.Create(filepath.Join(gitDir, name)) + if err != nil { + t.Fatalf("Could not inject filename %q: %v", name, err) + } + f.Close() + } + + // inject sourceignore if not empty + if sourceIgnore != "" { + si, err := os.Create(filepath.Join(gitDir, ".sourceignore")) + if err != nil { + t.Fatalf("Could not create .sourceignore: %v", err) + } + + if _, err := io.WriteString(si, sourceIgnore); err != nil { + t.Fatalf("Could not write to .sourceignore: %v", err) + } + + si.Close() + } + artifact := artifactFromURLRepository(remoteRepository) + + if err := storage.Archive(artifact, gitDir, spec); err != nil { + t.Fatalf("basic archive case failed: %v", err) + } + + if !storage.ArtifactExist(artifact) { + t.Fatalf("artifact was created but does not exist: %+v", artifact) + } + + return artifact +} + +func stringPtr(s string) *string { + return &s +} + +func TestArchiveBasic(t *testing.T) { + table := ignoreMap{ + "README.md": true, + ".gitignore": false, + } + + testPatterns(t, createArchive(t, []string{"README.md", ".gitignore"}, "", sourcev1.GitRepositorySpec{}), table) +} + +func TestArchiveIgnore(t *testing.T) { + // this is a list of files that will be created in the repository for each + // subtest. it is manipulated later on. + filenames := []string{ + "foo.tar.gz", + "bar.jpg", + "bar.gif", + "foo.jpeg", + "video.flv", + "video.wmv", + "bar.png", + "foo.zip", + } + + // this is the table of ignored files and their values. true means that it's + // present in the resulting tarball. + table := ignoreMap{} + for _, item := range filenames { + table[item] = false + } + + t.Run("automatically ignored files", func(t *testing.T) { + testPatterns(t, createArchive(t, filenames, "", sourcev1.GitRepositorySpec{}), table) + }) + + table = ignoreMap{} + for _, item := range filenames { + table[item] = true + } + + t.Run("only vcs ignored files", func(t *testing.T) { + testPatterns(t, createArchive(t, filenames, "", sourcev1.GitRepositorySpec{SourceIgnore: stringPtr("")}), table) + }) + + filenames = append(filenames, "test.txt") + table["test.txt"] = false + sourceIgnoreFile := "*.txt" + + t.Run("sourceignore injected via CRD", func(t *testing.T) { + testPatterns(t, createArchive(t, filenames, "", sourcev1.GitRepositorySpec{SourceIgnore: stringPtr(sourceIgnoreFile)}), table) + }) + + table = ignoreMap{} + for _, item := range filenames { + table[item] = false + } + + t.Run("sourceignore injected via filename", func(t *testing.T) { + testPatterns(t, createArchive(t, filenames, sourceIgnoreFile, sourcev1.GitRepositorySpec{}), table) + }) +}