From 61f9eb28fcaa9361d07b6ff6a5a304b9ace96beb Mon Sep 17 00:00:00 2001 From: Dipti Pai Date: Thu, 10 Apr 2025 15:05:31 -0700 Subject: [PATCH] Sparse Checkout Directories in GitRepositories. - Add `.spec.sparseCheckout` and `.status.observedSparseCheckout` fields to `GitRepository`. - Add controller support to send the sparse checkout directories to go-git via pkg methods. - Use `.status/observedSparseCheckout` to detect drift in configuration. - Trim leading "./" in directory paths. - Validate spec configuration by checking directories specified in spec exist in the cloned repository after successful checkout - Add tests for testing the observed sparse checkout behavior. - Add docs describing the new fields. Signed-off-by: Dipti Pai --- api/v1/gitrepository_types.go | 11 +++++ api/v1/zz_generated.deepcopy.go | 10 +++++ ...rce.toolkit.fluxcd.io_gitrepositories.yaml | 15 +++++++ docs/api/v1/source.md | 41 ++++++++++++++++++ docs/spec/v1/gitrepositories.md | 43 +++++++++++++++++++ .../controller/gitrepository_controller.go | 42 +++++++++++++++++- .../gitrepository_controller_test.go | 32 ++++++++++++++ 7 files changed, 193 insertions(+), 1 deletion(-) diff --git a/api/v1/gitrepository_types.go b/api/v1/gitrepository_types.go index 20ef37d0..590f1a38 100644 --- a/api/v1/gitrepository_types.go +++ b/api/v1/gitrepository_types.go @@ -148,6 +148,12 @@ type GitRepositorySpec struct { // should be included in the Artifact produced for this GitRepository. // +optional Include []GitRepositoryInclude `json:"include,omitempty"` + + // SparseCheckout specifies a list of directories to checkout when cloning + // the repository. If specified, only these directories are included in the + // Artifact produced for this GitRepository. + // +optional + SparseCheckout []string `json:"sparseCheckout,omitempty"` } // GitRepositoryInclude specifies a local reference to a GitRepository which @@ -266,6 +272,11 @@ type GitRepositoryStatus struct { // +optional ObservedInclude []GitRepositoryInclude `json:"observedInclude,omitempty"` + // ObservedSparseCheckout is the observed list of directories used to + // produce the current Artifact. + // +optional + ObservedSparseCheckout []string `json:"observedSparseCheckout,omitempty"` + // SourceVerificationMode is the last used verification mode indicating // which Git object(s) have been verified. // +optional diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 12e537fa..9ac5d593 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -347,6 +347,11 @@ func (in *GitRepositorySpec) DeepCopyInto(out *GitRepositorySpec) { *out = make([]GitRepositoryInclude, len(*in)) copy(*out, *in) } + if in.SparseCheckout != nil { + in, out := &in.SparseCheckout, &out.SparseCheckout + *out = make([]string, len(*in)) + copy(*out, *in) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GitRepositorySpec. @@ -395,6 +400,11 @@ func (in *GitRepositoryStatus) DeepCopyInto(out *GitRepositoryStatus) { *out = make([]GitRepositoryInclude, len(*in)) copy(*out, *in) } + if in.ObservedSparseCheckout != nil { + in, out := &in.ObservedSparseCheckout, &out.ObservedSparseCheckout + *out = make([]string, len(*in)) + copy(*out, *in) + } if in.SourceVerificationMode != nil { in, out := &in.SourceVerificationMode, &out.SourceVerificationMode *out = new(GitVerificationMode) diff --git a/config/crd/bases/source.toolkit.fluxcd.io_gitrepositories.yaml b/config/crd/bases/source.toolkit.fluxcd.io_gitrepositories.yaml index 0e37a7b4..10cf1162 100644 --- a/config/crd/bases/source.toolkit.fluxcd.io_gitrepositories.yaml +++ b/config/crd/bases/source.toolkit.fluxcd.io_gitrepositories.yaml @@ -174,6 +174,14 @@ spec: required: - name type: object + sparseCheckout: + description: |- + SparseCheckout specifies a list of directories to checkout when cloning + the repository. If specified, only these directories are included in the + Artifact produced for this GitRepository. + items: + type: string + type: array suspend: description: |- Suspend tells the controller to suspend the reconciliation of this @@ -443,6 +451,13 @@ spec: ObservedRecurseSubmodules is the observed resource submodules configuration used to produce the current Artifact. type: boolean + observedSparseCheckout: + description: |- + ObservedSparseCheckout is the observed list of directories used to + produce the current Artifact. + items: + type: string + type: array sourceVerificationMode: description: |- SourceVerificationMode is the last used verification mode indicating diff --git a/docs/api/v1/source.md b/docs/api/v1/source.md index 121a056c..df1b800c 100644 --- a/docs/api/v1/source.md +++ b/docs/api/v1/source.md @@ -523,6 +523,20 @@ the GitRepository as cloned from the URL, using their default settings.

should be included in the Artifact produced for this GitRepository.

+ + +sparseCheckout
+ +[]string + + + +(Optional) +

SparseCheckout specifies a list of directories to checkout when cloning +the repository. If specified, only these directories are included in the +Artifact produced for this GitRepository.

+ + @@ -1863,6 +1877,20 @@ the GitRepository as cloned from the URL, using their default settings.

should be included in the Artifact produced for this GitRepository.

+ + +sparseCheckout
+ +[]string + + + +(Optional) +

SparseCheckout specifies a list of directories to checkout when cloning +the repository. If specified, only these directories are included in the +Artifact produced for this GitRepository.

+ + @@ -1983,6 +2011,19 @@ produce the current Artifact.

+observedSparseCheckout
+ +[]string + + + +(Optional) +

ObservedSparseCheckout is the observed list of directories used to +produce the current Artifact.

+ + + + sourceVerificationMode
diff --git a/docs/spec/v1/gitrepositories.md b/docs/spec/v1/gitrepositories.md index a9c5d2a2..b57e2b9d 100644 --- a/docs/spec/v1/gitrepositories.md +++ b/docs/spec/v1/gitrepositories.md @@ -615,6 +615,28 @@ list](#default-exclusions), and may overrule the [`.sourceignore` file exclusions](#sourceignore-file). See [excluding files](#excluding-files) for more information. +### Sparse checkout + +`.spec.sparseCheckout` is an optional field to specify list of directories to +checkout when cloning the repository. If specified, only the specified directory +contents will be present in the artifact produced for this repository. + +```yaml +apiVersion: source.toolkit.fluxcd.io/v1 +kind: GitRepository +metadata: + name: podinfo + namespace: default +spec: + interval: 5m + url: https://github.com/stefanprodan/podinfo + ref: + branch: master + sparseCheckout: + - charts + - kustomize +``` + ### Suspend `.spec.suspend` is an optional field to suspend the reconciliation of a @@ -1157,6 +1179,27 @@ status: ... ``` +### Observed Sparse Checkout + +The source-controller reports observed sparse checkout in the GitRepository's +`.status.observedSparseCheckout`. The observed sparse checkout is the latest +`.spec.sparseCheckout` value which resulted in a [ready +state](#ready-gitrepository), or stalled due to error it can not recover from +without human intervention. The value is the same as the [sparseCheckout in +spec](#sparse-checkout). It indicates the sparse checkout configuration used in +building the current artifact in storage. It is also used by the controller to +determine if an artifact needs to be rebuilt. + +Example: +```yaml +status: + ... + observedSparseCheckout: + - charts + - kustomize + ... +``` + ### Source Verification Mode The source-controller reports the Git object(s) it verified in the Git diff --git a/internal/controller/gitrepository_controller.go b/internal/controller/gitrepository_controller.go index 6b68af55..d5361be3 100644 --- a/internal/controller/gitrepository_controller.go +++ b/internal/controller/gitrepository_controller.go @@ -590,6 +590,16 @@ func (r *GitRepositoryReconciler) reconcileSource(ctx context.Context, sp *patch ctrl.LoggerFrom(ctx).V(logger.DebugLevel).Info("git repository checked out", "url", obj.Spec.URL, "revision", commitReference(obj, commit)) conditions.Delete(obj, sourcev1.FetchFailedCondition) + // Validate sparse checkout paths after successful checkout. + if err := r.validateSparseCheckoutPaths(ctx, obj, dir); err != nil { + e := serror.NewStalling( + fmt.Errorf("failed to sparse checkout directories : %w", err), + sourcev1.GitOperationFailedReason, + ) + conditions.MarkTrue(obj, sourcev1.FetchFailedCondition, e.Reason, "%s", e) + return sreconcile.ResultEmpty, e + } + // Verify commit signature if result, err := r.verifySignature(ctx, obj, *commit); err != nil || result == sreconcile.ResultEmpty { return result, err @@ -812,6 +822,7 @@ func (r *GitRepositoryReconciler) reconcileArtifact(ctx context.Context, sp *pat obj.Status.ObservedIgnore = obj.Spec.Ignore obj.Status.ObservedRecurseSubmodules = obj.Spec.RecurseSubmodules obj.Status.ObservedInclude = obj.Spec.Include + obj.Status.ObservedSparseCheckout = obj.Spec.SparseCheckout // Remove the deprecated symlink. // TODO(hidde): remove 2 minor versions from introduction of v1. @@ -884,6 +895,7 @@ func (r *GitRepositoryReconciler) reconcileInclude(ctx context.Context, sp *patc // performs a git checkout. func (r *GitRepositoryReconciler) gitCheckout(ctx context.Context, obj *sourcev1.GitRepository, authOpts *git.AuthOptions, proxyOpts *transport.ProxyOptions, dir string, optimized bool) (*git.Commit, error) { + // Configure checkout strategy. cloneOpts := repository.CloneConfig{ RecurseSubmodules: obj.Spec.RecurseSubmodules, @@ -896,7 +908,14 @@ func (r *GitRepositoryReconciler) gitCheckout(ctx context.Context, obj *sourcev1 cloneOpts.SemVer = ref.SemVer cloneOpts.RefName = ref.Name } - + if obj.Spec.SparseCheckout != nil { + // Trim any leading "./" in the directory paths since underlying go-git API does not honor them. + sparseCheckoutDirs := make([]string, len(obj.Spec.SparseCheckout)) + for i, path := range obj.Spec.SparseCheckout { + sparseCheckoutDirs[i] = strings.TrimPrefix(path, "./") + } + cloneOpts.SparseCheckoutDirectories = sparseCheckoutDirs + } // Only if the object has an existing artifact in storage, attempt to // short-circuit clone operation. reconcileStorage has already verified // that the artifact exists. @@ -1172,6 +1191,14 @@ func gitContentConfigChanged(obj *sourcev1.GitRepository, includes *artifactSet) if requiresVerification(obj) { return true } + if len(obj.Spec.SparseCheckout) != len(obj.Status.ObservedSparseCheckout) { + return true + } + for index, dir := range obj.Spec.SparseCheckout { + if dir != obj.Status.ObservedSparseCheckout[index] { + return true + } + } // Convert artifactSet to index addressable artifacts and ensure that it and // the included artifacts include all the include from the spec. @@ -1206,6 +1233,19 @@ func gitContentConfigChanged(obj *sourcev1.GitRepository, includes *artifactSet) return false } +// validateSparseCheckoutPaths checks if the sparse checkout paths exist in the cloned repository. +func (r *GitRepositoryReconciler) validateSparseCheckoutPaths(ctx context.Context, obj *sourcev1.GitRepository, dir string) error { + if obj.Spec.SparseCheckout != nil { + for _, path := range obj.Spec.SparseCheckout { + fullPath := filepath.Join(dir, path) + if _, err := os.Lstat(fullPath); err != nil { + return fmt.Errorf("sparse checkout dir '%s' does not exist in repository: %w", path, err) + } + } + } + return nil +} + // Returns true if both GitRepositoryIncludes are equal. func gitRepositoryIncludeEqual(a, b sourcev1.GitRepositoryInclude) bool { if a.GitRepositoryRef != b.GitRepositoryRef { diff --git a/internal/controller/gitrepository_controller_test.go b/internal/controller/gitrepository_controller_test.go index 5eb4713f..5dca5b16 100644 --- a/internal/controller/gitrepository_controller_test.go +++ b/internal/controller/gitrepository_controller_test.go @@ -3130,6 +3130,38 @@ func TestGitContentConfigChanged(t *testing.T) { }, want: false, }, + { + name: "unobserved sparse checkout", + obj: sourcev1.GitRepository{ + Spec: sourcev1.GitRepositorySpec{SparseCheckout: []string{"a/b/c", "x/y/z"}}, + Status: sourcev1.GitRepositoryStatus{ObservedSparseCheckout: []string{"a/b/c"}}, + }, + want: true, + }, + { + name: "unobserved case sensitive sparse checkout", + obj: sourcev1.GitRepository{ + Spec: sourcev1.GitRepositorySpec{SparseCheckout: []string{"a/b/c", "x/y/Z"}}, + Status: sourcev1.GitRepositoryStatus{ObservedSparseCheckout: []string{"a/b/c", "x/y/z"}}, + }, + want: true, + }, + { + name: "observed sparse checkout", + obj: sourcev1.GitRepository{ + Spec: sourcev1.GitRepositorySpec{SparseCheckout: []string{"a/b/c", "x/y/z"}}, + Status: sourcev1.GitRepositoryStatus{ObservedSparseCheckout: []string{"a/b/c", "x/y/z"}}, + }, + want: false, + }, + { + name: "observed sparse checkout with leading slash", + obj: sourcev1.GitRepository{ + Spec: sourcev1.GitRepositorySpec{SparseCheckout: []string{"./a/b/c", "./x/y/z"}}, + Status: sourcev1.GitRepositoryStatus{ObservedSparseCheckout: []string{"./a/b/c", "./x/y/z"}}, + }, + want: false, + }, { name: "unobserved include", obj: sourcev1.GitRepository{