Sparse Checkout Directories in GitRepositories.

- Add `.spec.sparseCheckout` and `.status.observedSparseCheckout` fields to `GitRepository`.
    - Add controller support to send the sparse checkout directories to go-git via pkg methods.
    - Use `.status/observedSparseCheckout` to detect drift in configuration.
    - Trim leading "./" in directory paths.
    - Validate spec configuration by checking directories specified in spec exist in the cloned repository after successful checkout
    - Add tests for testing the observed sparse checkout behavior.
    - Add docs describing the new fields.

Signed-off-by: Dipti Pai <diptipai89@outlook.com>
This commit is contained in:
Dipti Pai 2025-04-10 15:05:31 -07:00 committed by dipti-pai
parent 034bc4962d
commit 61f9eb28fc
7 changed files with 193 additions and 1 deletions

View File

@ -148,6 +148,12 @@ type GitRepositorySpec struct {
// should be included in the Artifact produced for this GitRepository.
// +optional
Include []GitRepositoryInclude `json:"include,omitempty"`
// SparseCheckout specifies a list of directories to checkout when cloning
// the repository. If specified, only these directories are included in the
// Artifact produced for this GitRepository.
// +optional
SparseCheckout []string `json:"sparseCheckout,omitempty"`
}
// GitRepositoryInclude specifies a local reference to a GitRepository which
@ -266,6 +272,11 @@ type GitRepositoryStatus struct {
// +optional
ObservedInclude []GitRepositoryInclude `json:"observedInclude,omitempty"`
// ObservedSparseCheckout is the observed list of directories used to
// produce the current Artifact.
// +optional
ObservedSparseCheckout []string `json:"observedSparseCheckout,omitempty"`
// SourceVerificationMode is the last used verification mode indicating
// which Git object(s) have been verified.
// +optional

View File

@ -347,6 +347,11 @@ func (in *GitRepositorySpec) DeepCopyInto(out *GitRepositorySpec) {
*out = make([]GitRepositoryInclude, len(*in))
copy(*out, *in)
}
if in.SparseCheckout != nil {
in, out := &in.SparseCheckout, &out.SparseCheckout
*out = make([]string, len(*in))
copy(*out, *in)
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GitRepositorySpec.
@ -395,6 +400,11 @@ func (in *GitRepositoryStatus) DeepCopyInto(out *GitRepositoryStatus) {
*out = make([]GitRepositoryInclude, len(*in))
copy(*out, *in)
}
if in.ObservedSparseCheckout != nil {
in, out := &in.ObservedSparseCheckout, &out.ObservedSparseCheckout
*out = make([]string, len(*in))
copy(*out, *in)
}
if in.SourceVerificationMode != nil {
in, out := &in.SourceVerificationMode, &out.SourceVerificationMode
*out = new(GitVerificationMode)

View File

@ -174,6 +174,14 @@ spec:
required:
- name
type: object
sparseCheckout:
description: |-
SparseCheckout specifies a list of directories to checkout when cloning
the repository. If specified, only these directories are included in the
Artifact produced for this GitRepository.
items:
type: string
type: array
suspend:
description: |-
Suspend tells the controller to suspend the reconciliation of this
@ -443,6 +451,13 @@ spec:
ObservedRecurseSubmodules is the observed resource submodules
configuration used to produce the current Artifact.
type: boolean
observedSparseCheckout:
description: |-
ObservedSparseCheckout is the observed list of directories used to
produce the current Artifact.
items:
type: string
type: array
sourceVerificationMode:
description: |-
SourceVerificationMode is the last used verification mode indicating

View File

@ -523,6 +523,20 @@ the GitRepository as cloned from the URL, using their default settings.</p>
should be included in the Artifact produced for this GitRepository.</p>
</td>
</tr>
<tr>
<td>
<code>sparseCheckout</code><br>
<em>
[]string
</em>
</td>
<td>
<em>(Optional)</em>
<p>SparseCheckout specifies a list of directories to checkout when cloning
the repository. If specified, only these directories are included in the
Artifact produced for this GitRepository.</p>
</td>
</tr>
</table>
</td>
</tr>
@ -1863,6 +1877,20 @@ the GitRepository as cloned from the URL, using their default settings.</p>
should be included in the Artifact produced for this GitRepository.</p>
</td>
</tr>
<tr>
<td>
<code>sparseCheckout</code><br>
<em>
[]string
</em>
</td>
<td>
<em>(Optional)</em>
<p>SparseCheckout specifies a list of directories to checkout when cloning
the repository. If specified, only these directories are included in the
Artifact produced for this GitRepository.</p>
</td>
</tr>
</tbody>
</table>
</div>
@ -1983,6 +2011,19 @@ produce the current Artifact.</p>
</tr>
<tr>
<td>
<code>observedSparseCheckout</code><br>
<em>
[]string
</em>
</td>
<td>
<em>(Optional)</em>
<p>ObservedSparseCheckout is the observed list of directories used to
produce the current Artifact.</p>
</td>
</tr>
<tr>
<td>
<code>sourceVerificationMode</code><br>
<em>
<a href="#source.toolkit.fluxcd.io/v1.GitVerificationMode">

View File

@ -615,6 +615,28 @@ list](#default-exclusions), and may overrule the [`.sourceignore` file
exclusions](#sourceignore-file). See [excluding files](#excluding-files)
for more information.
### Sparse checkout
`.spec.sparseCheckout` is an optional field to specify list of directories to
checkout when cloning the repository. If specified, only the specified directory
contents will be present in the artifact produced for this repository.
```yaml
apiVersion: source.toolkit.fluxcd.io/v1
kind: GitRepository
metadata:
name: podinfo
namespace: default
spec:
interval: 5m
url: https://github.com/stefanprodan/podinfo
ref:
branch: master
sparseCheckout:
- charts
- kustomize
```
### Suspend
`.spec.suspend` is an optional field to suspend the reconciliation of a
@ -1157,6 +1179,27 @@ status:
...
```
### Observed Sparse Checkout
The source-controller reports observed sparse checkout in the GitRepository's
`.status.observedSparseCheckout`. The observed sparse checkout is the latest
`.spec.sparseCheckout` value which resulted in a [ready
state](#ready-gitrepository), or stalled due to error it can not recover from
without human intervention. The value is the same as the [sparseCheckout in
spec](#sparse-checkout). It indicates the sparse checkout configuration used in
building the current artifact in storage. It is also used by the controller to
determine if an artifact needs to be rebuilt.
Example:
```yaml
status:
...
observedSparseCheckout:
- charts
- kustomize
...
```
### Source Verification Mode
The source-controller reports the Git object(s) it verified in the Git

View File

@ -590,6 +590,16 @@ func (r *GitRepositoryReconciler) reconcileSource(ctx context.Context, sp *patch
ctrl.LoggerFrom(ctx).V(logger.DebugLevel).Info("git repository checked out", "url", obj.Spec.URL, "revision", commitReference(obj, commit))
conditions.Delete(obj, sourcev1.FetchFailedCondition)
// Validate sparse checkout paths after successful checkout.
if err := r.validateSparseCheckoutPaths(ctx, obj, dir); err != nil {
e := serror.NewStalling(
fmt.Errorf("failed to sparse checkout directories : %w", err),
sourcev1.GitOperationFailedReason,
)
conditions.MarkTrue(obj, sourcev1.FetchFailedCondition, e.Reason, "%s", e)
return sreconcile.ResultEmpty, e
}
// Verify commit signature
if result, err := r.verifySignature(ctx, obj, *commit); err != nil || result == sreconcile.ResultEmpty {
return result, err
@ -812,6 +822,7 @@ func (r *GitRepositoryReconciler) reconcileArtifact(ctx context.Context, sp *pat
obj.Status.ObservedIgnore = obj.Spec.Ignore
obj.Status.ObservedRecurseSubmodules = obj.Spec.RecurseSubmodules
obj.Status.ObservedInclude = obj.Spec.Include
obj.Status.ObservedSparseCheckout = obj.Spec.SparseCheckout
// Remove the deprecated symlink.
// TODO(hidde): remove 2 minor versions from introduction of v1.
@ -884,6 +895,7 @@ func (r *GitRepositoryReconciler) reconcileInclude(ctx context.Context, sp *patc
// performs a git checkout.
func (r *GitRepositoryReconciler) gitCheckout(ctx context.Context, obj *sourcev1.GitRepository,
authOpts *git.AuthOptions, proxyOpts *transport.ProxyOptions, dir string, optimized bool) (*git.Commit, error) {
// Configure checkout strategy.
cloneOpts := repository.CloneConfig{
RecurseSubmodules: obj.Spec.RecurseSubmodules,
@ -896,7 +908,14 @@ func (r *GitRepositoryReconciler) gitCheckout(ctx context.Context, obj *sourcev1
cloneOpts.SemVer = ref.SemVer
cloneOpts.RefName = ref.Name
}
if obj.Spec.SparseCheckout != nil {
// Trim any leading "./" in the directory paths since underlying go-git API does not honor them.
sparseCheckoutDirs := make([]string, len(obj.Spec.SparseCheckout))
for i, path := range obj.Spec.SparseCheckout {
sparseCheckoutDirs[i] = strings.TrimPrefix(path, "./")
}
cloneOpts.SparseCheckoutDirectories = sparseCheckoutDirs
}
// Only if the object has an existing artifact in storage, attempt to
// short-circuit clone operation. reconcileStorage has already verified
// that the artifact exists.
@ -1172,6 +1191,14 @@ func gitContentConfigChanged(obj *sourcev1.GitRepository, includes *artifactSet)
if requiresVerification(obj) {
return true
}
if len(obj.Spec.SparseCheckout) != len(obj.Status.ObservedSparseCheckout) {
return true
}
for index, dir := range obj.Spec.SparseCheckout {
if dir != obj.Status.ObservedSparseCheckout[index] {
return true
}
}
// Convert artifactSet to index addressable artifacts and ensure that it and
// the included artifacts include all the include from the spec.
@ -1206,6 +1233,19 @@ func gitContentConfigChanged(obj *sourcev1.GitRepository, includes *artifactSet)
return false
}
// validateSparseCheckoutPaths checks if the sparse checkout paths exist in the cloned repository.
func (r *GitRepositoryReconciler) validateSparseCheckoutPaths(ctx context.Context, obj *sourcev1.GitRepository, dir string) error {
if obj.Spec.SparseCheckout != nil {
for _, path := range obj.Spec.SparseCheckout {
fullPath := filepath.Join(dir, path)
if _, err := os.Lstat(fullPath); err != nil {
return fmt.Errorf("sparse checkout dir '%s' does not exist in repository: %w", path, err)
}
}
}
return nil
}
// Returns true if both GitRepositoryIncludes are equal.
func gitRepositoryIncludeEqual(a, b sourcev1.GitRepositoryInclude) bool {
if a.GitRepositoryRef != b.GitRepositoryRef {

View File

@ -3130,6 +3130,38 @@ func TestGitContentConfigChanged(t *testing.T) {
},
want: false,
},
{
name: "unobserved sparse checkout",
obj: sourcev1.GitRepository{
Spec: sourcev1.GitRepositorySpec{SparseCheckout: []string{"a/b/c", "x/y/z"}},
Status: sourcev1.GitRepositoryStatus{ObservedSparseCheckout: []string{"a/b/c"}},
},
want: true,
},
{
name: "unobserved case sensitive sparse checkout",
obj: sourcev1.GitRepository{
Spec: sourcev1.GitRepositorySpec{SparseCheckout: []string{"a/b/c", "x/y/Z"}},
Status: sourcev1.GitRepositoryStatus{ObservedSparseCheckout: []string{"a/b/c", "x/y/z"}},
},
want: true,
},
{
name: "observed sparse checkout",
obj: sourcev1.GitRepository{
Spec: sourcev1.GitRepositorySpec{SparseCheckout: []string{"a/b/c", "x/y/z"}},
Status: sourcev1.GitRepositoryStatus{ObservedSparseCheckout: []string{"a/b/c", "x/y/z"}},
},
want: false,
},
{
name: "observed sparse checkout with leading slash",
obj: sourcev1.GitRepository{
Spec: sourcev1.GitRepositorySpec{SparseCheckout: []string{"./a/b/c", "./x/y/z"}},
Status: sourcev1.GitRepositoryStatus{ObservedSparseCheckout: []string{"./a/b/c", "./x/y/z"}},
},
want: false,
},
{
name: "unobserved include",
obj: sourcev1.GitRepository{