From 61f9eb28fcaa9361d07b6ff6a5a304b9ace96beb Mon Sep 17 00:00:00 2001
From: Dipti Pai
Date: Thu, 10 Apr 2025 15:05:31 -0700
Subject: [PATCH] Sparse Checkout Directories in GitRepositories.
- Add `.spec.sparseCheckout` and `.status.observedSparseCheckout` fields to `GitRepository`.
- Add controller support to send the sparse checkout directories to go-git via pkg methods.
- Use `.status/observedSparseCheckout` to detect drift in configuration.
- Trim leading "./" in directory paths.
- Validate spec configuration by checking directories specified in spec exist in the cloned repository after successful checkout
- Add tests for testing the observed sparse checkout behavior.
- Add docs describing the new fields.
Signed-off-by: Dipti Pai
---
api/v1/gitrepository_types.go | 11 +++++
api/v1/zz_generated.deepcopy.go | 10 +++++
...rce.toolkit.fluxcd.io_gitrepositories.yaml | 15 +++++++
docs/api/v1/source.md | 41 ++++++++++++++++++
docs/spec/v1/gitrepositories.md | 43 +++++++++++++++++++
.../controller/gitrepository_controller.go | 42 +++++++++++++++++-
.../gitrepository_controller_test.go | 32 ++++++++++++++
7 files changed, 193 insertions(+), 1 deletion(-)
diff --git a/api/v1/gitrepository_types.go b/api/v1/gitrepository_types.go
index 20ef37d0..590f1a38 100644
--- a/api/v1/gitrepository_types.go
+++ b/api/v1/gitrepository_types.go
@@ -148,6 +148,12 @@ type GitRepositorySpec struct {
// should be included in the Artifact produced for this GitRepository.
// +optional
Include []GitRepositoryInclude `json:"include,omitempty"`
+
+ // SparseCheckout specifies a list of directories to checkout when cloning
+ // the repository. If specified, only these directories are included in the
+ // Artifact produced for this GitRepository.
+ // +optional
+ SparseCheckout []string `json:"sparseCheckout,omitempty"`
}
// GitRepositoryInclude specifies a local reference to a GitRepository which
@@ -266,6 +272,11 @@ type GitRepositoryStatus struct {
// +optional
ObservedInclude []GitRepositoryInclude `json:"observedInclude,omitempty"`
+ // ObservedSparseCheckout is the observed list of directories used to
+ // produce the current Artifact.
+ // +optional
+ ObservedSparseCheckout []string `json:"observedSparseCheckout,omitempty"`
+
// SourceVerificationMode is the last used verification mode indicating
// which Git object(s) have been verified.
// +optional
diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go
index 12e537fa..9ac5d593 100644
--- a/api/v1/zz_generated.deepcopy.go
+++ b/api/v1/zz_generated.deepcopy.go
@@ -347,6 +347,11 @@ func (in *GitRepositorySpec) DeepCopyInto(out *GitRepositorySpec) {
*out = make([]GitRepositoryInclude, len(*in))
copy(*out, *in)
}
+ if in.SparseCheckout != nil {
+ in, out := &in.SparseCheckout, &out.SparseCheckout
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GitRepositorySpec.
@@ -395,6 +400,11 @@ func (in *GitRepositoryStatus) DeepCopyInto(out *GitRepositoryStatus) {
*out = make([]GitRepositoryInclude, len(*in))
copy(*out, *in)
}
+ if in.ObservedSparseCheckout != nil {
+ in, out := &in.ObservedSparseCheckout, &out.ObservedSparseCheckout
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
if in.SourceVerificationMode != nil {
in, out := &in.SourceVerificationMode, &out.SourceVerificationMode
*out = new(GitVerificationMode)
diff --git a/config/crd/bases/source.toolkit.fluxcd.io_gitrepositories.yaml b/config/crd/bases/source.toolkit.fluxcd.io_gitrepositories.yaml
index 0e37a7b4..10cf1162 100644
--- a/config/crd/bases/source.toolkit.fluxcd.io_gitrepositories.yaml
+++ b/config/crd/bases/source.toolkit.fluxcd.io_gitrepositories.yaml
@@ -174,6 +174,14 @@ spec:
required:
- name
type: object
+ sparseCheckout:
+ description: |-
+ SparseCheckout specifies a list of directories to checkout when cloning
+ the repository. If specified, only these directories are included in the
+ Artifact produced for this GitRepository.
+ items:
+ type: string
+ type: array
suspend:
description: |-
Suspend tells the controller to suspend the reconciliation of this
@@ -443,6 +451,13 @@ spec:
ObservedRecurseSubmodules is the observed resource submodules
configuration used to produce the current Artifact.
type: boolean
+ observedSparseCheckout:
+ description: |-
+ ObservedSparseCheckout is the observed list of directories used to
+ produce the current Artifact.
+ items:
+ type: string
+ type: array
sourceVerificationMode:
description: |-
SourceVerificationMode is the last used verification mode indicating
diff --git a/docs/api/v1/source.md b/docs/api/v1/source.md
index 121a056c..df1b800c 100644
--- a/docs/api/v1/source.md
+++ b/docs/api/v1/source.md
@@ -523,6 +523,20 @@ the GitRepository as cloned from the URL, using their default settings.
should be included in the Artifact produced for this GitRepository.
+
+
+sparseCheckout
+
+[]string
+
+ |
+
+(Optional)
+ SparseCheckout specifies a list of directories to checkout when cloning
+the repository. If specified, only these directories are included in the
+Artifact produced for this GitRepository.
+ |
+
@@ -1863,6 +1877,20 @@ the GitRepository as cloned from the URL, using their default settings.
should be included in the Artifact produced for this GitRepository.
+
+
+sparseCheckout
+
+[]string
+
+ |
+
+(Optional)
+ SparseCheckout specifies a list of directories to checkout when cloning
+the repository. If specified, only these directories are included in the
+Artifact produced for this GitRepository.
+ |
+
@@ -1983,6 +2011,19 @@ produce the current Artifact.
+observedSparseCheckout
+
+[]string
+
+ |
+
+(Optional)
+ ObservedSparseCheckout is the observed list of directories used to
+produce the current Artifact.
+ |
+
+
+
sourceVerificationMode
diff --git a/docs/spec/v1/gitrepositories.md b/docs/spec/v1/gitrepositories.md
index a9c5d2a2..b57e2b9d 100644
--- a/docs/spec/v1/gitrepositories.md
+++ b/docs/spec/v1/gitrepositories.md
@@ -615,6 +615,28 @@ list](#default-exclusions), and may overrule the [`.sourceignore` file
exclusions](#sourceignore-file). See [excluding files](#excluding-files)
for more information.
+### Sparse checkout
+
+`.spec.sparseCheckout` is an optional field to specify list of directories to
+checkout when cloning the repository. If specified, only the specified directory
+contents will be present in the artifact produced for this repository.
+
+```yaml
+apiVersion: source.toolkit.fluxcd.io/v1
+kind: GitRepository
+metadata:
+ name: podinfo
+ namespace: default
+spec:
+ interval: 5m
+ url: https://github.com/stefanprodan/podinfo
+ ref:
+ branch: master
+ sparseCheckout:
+ - charts
+ - kustomize
+```
+
### Suspend
`.spec.suspend` is an optional field to suspend the reconciliation of a
@@ -1157,6 +1179,27 @@ status:
...
```
+### Observed Sparse Checkout
+
+The source-controller reports observed sparse checkout in the GitRepository's
+`.status.observedSparseCheckout`. The observed sparse checkout is the latest
+`.spec.sparseCheckout` value which resulted in a [ready
+state](#ready-gitrepository), or stalled due to error it can not recover from
+without human intervention. The value is the same as the [sparseCheckout in
+spec](#sparse-checkout). It indicates the sparse checkout configuration used in
+building the current artifact in storage. It is also used by the controller to
+determine if an artifact needs to be rebuilt.
+
+Example:
+```yaml
+status:
+ ...
+ observedSparseCheckout:
+ - charts
+ - kustomize
+ ...
+```
+
### Source Verification Mode
The source-controller reports the Git object(s) it verified in the Git
diff --git a/internal/controller/gitrepository_controller.go b/internal/controller/gitrepository_controller.go
index 6b68af55..d5361be3 100644
--- a/internal/controller/gitrepository_controller.go
+++ b/internal/controller/gitrepository_controller.go
@@ -590,6 +590,16 @@ func (r *GitRepositoryReconciler) reconcileSource(ctx context.Context, sp *patch
ctrl.LoggerFrom(ctx).V(logger.DebugLevel).Info("git repository checked out", "url", obj.Spec.URL, "revision", commitReference(obj, commit))
conditions.Delete(obj, sourcev1.FetchFailedCondition)
+ // Validate sparse checkout paths after successful checkout.
+ if err := r.validateSparseCheckoutPaths(ctx, obj, dir); err != nil {
+ e := serror.NewStalling(
+ fmt.Errorf("failed to sparse checkout directories : %w", err),
+ sourcev1.GitOperationFailedReason,
+ )
+ conditions.MarkTrue(obj, sourcev1.FetchFailedCondition, e.Reason, "%s", e)
+ return sreconcile.ResultEmpty, e
+ }
+
// Verify commit signature
if result, err := r.verifySignature(ctx, obj, *commit); err != nil || result == sreconcile.ResultEmpty {
return result, err
@@ -812,6 +822,7 @@ func (r *GitRepositoryReconciler) reconcileArtifact(ctx context.Context, sp *pat
obj.Status.ObservedIgnore = obj.Spec.Ignore
obj.Status.ObservedRecurseSubmodules = obj.Spec.RecurseSubmodules
obj.Status.ObservedInclude = obj.Spec.Include
+ obj.Status.ObservedSparseCheckout = obj.Spec.SparseCheckout
// Remove the deprecated symlink.
// TODO(hidde): remove 2 minor versions from introduction of v1.
@@ -884,6 +895,7 @@ func (r *GitRepositoryReconciler) reconcileInclude(ctx context.Context, sp *patc
// performs a git checkout.
func (r *GitRepositoryReconciler) gitCheckout(ctx context.Context, obj *sourcev1.GitRepository,
authOpts *git.AuthOptions, proxyOpts *transport.ProxyOptions, dir string, optimized bool) (*git.Commit, error) {
+
// Configure checkout strategy.
cloneOpts := repository.CloneConfig{
RecurseSubmodules: obj.Spec.RecurseSubmodules,
@@ -896,7 +908,14 @@ func (r *GitRepositoryReconciler) gitCheckout(ctx context.Context, obj *sourcev1
cloneOpts.SemVer = ref.SemVer
cloneOpts.RefName = ref.Name
}
-
+ if obj.Spec.SparseCheckout != nil {
+ // Trim any leading "./" in the directory paths since underlying go-git API does not honor them.
+ sparseCheckoutDirs := make([]string, len(obj.Spec.SparseCheckout))
+ for i, path := range obj.Spec.SparseCheckout {
+ sparseCheckoutDirs[i] = strings.TrimPrefix(path, "./")
+ }
+ cloneOpts.SparseCheckoutDirectories = sparseCheckoutDirs
+ }
// Only if the object has an existing artifact in storage, attempt to
// short-circuit clone operation. reconcileStorage has already verified
// that the artifact exists.
@@ -1172,6 +1191,14 @@ func gitContentConfigChanged(obj *sourcev1.GitRepository, includes *artifactSet)
if requiresVerification(obj) {
return true
}
+ if len(obj.Spec.SparseCheckout) != len(obj.Status.ObservedSparseCheckout) {
+ return true
+ }
+ for index, dir := range obj.Spec.SparseCheckout {
+ if dir != obj.Status.ObservedSparseCheckout[index] {
+ return true
+ }
+ }
// Convert artifactSet to index addressable artifacts and ensure that it and
// the included artifacts include all the include from the spec.
@@ -1206,6 +1233,19 @@ func gitContentConfigChanged(obj *sourcev1.GitRepository, includes *artifactSet)
return false
}
+// validateSparseCheckoutPaths checks if the sparse checkout paths exist in the cloned repository.
+func (r *GitRepositoryReconciler) validateSparseCheckoutPaths(ctx context.Context, obj *sourcev1.GitRepository, dir string) error {
+ if obj.Spec.SparseCheckout != nil {
+ for _, path := range obj.Spec.SparseCheckout {
+ fullPath := filepath.Join(dir, path)
+ if _, err := os.Lstat(fullPath); err != nil {
+ return fmt.Errorf("sparse checkout dir '%s' does not exist in repository: %w", path, err)
+ }
+ }
+ }
+ return nil
+}
+
// Returns true if both GitRepositoryIncludes are equal.
func gitRepositoryIncludeEqual(a, b sourcev1.GitRepositoryInclude) bool {
if a.GitRepositoryRef != b.GitRepositoryRef {
diff --git a/internal/controller/gitrepository_controller_test.go b/internal/controller/gitrepository_controller_test.go
index 5eb4713f..5dca5b16 100644
--- a/internal/controller/gitrepository_controller_test.go
+++ b/internal/controller/gitrepository_controller_test.go
@@ -3130,6 +3130,38 @@ func TestGitContentConfigChanged(t *testing.T) {
},
want: false,
},
+ {
+ name: "unobserved sparse checkout",
+ obj: sourcev1.GitRepository{
+ Spec: sourcev1.GitRepositorySpec{SparseCheckout: []string{"a/b/c", "x/y/z"}},
+ Status: sourcev1.GitRepositoryStatus{ObservedSparseCheckout: []string{"a/b/c"}},
+ },
+ want: true,
+ },
+ {
+ name: "unobserved case sensitive sparse checkout",
+ obj: sourcev1.GitRepository{
+ Spec: sourcev1.GitRepositorySpec{SparseCheckout: []string{"a/b/c", "x/y/Z"}},
+ Status: sourcev1.GitRepositoryStatus{ObservedSparseCheckout: []string{"a/b/c", "x/y/z"}},
+ },
+ want: true,
+ },
+ {
+ name: "observed sparse checkout",
+ obj: sourcev1.GitRepository{
+ Spec: sourcev1.GitRepositorySpec{SparseCheckout: []string{"a/b/c", "x/y/z"}},
+ Status: sourcev1.GitRepositoryStatus{ObservedSparseCheckout: []string{"a/b/c", "x/y/z"}},
+ },
+ want: false,
+ },
+ {
+ name: "observed sparse checkout with leading slash",
+ obj: sourcev1.GitRepository{
+ Spec: sourcev1.GitRepositorySpec{SparseCheckout: []string{"./a/b/c", "./x/y/z"}},
+ Status: sourcev1.GitRepositoryStatus{ObservedSparseCheckout: []string{"./a/b/c", "./x/y/z"}},
+ },
+ want: false,
+ },
{
name: "unobserved include",
obj: sourcev1.GitRepository{
|