From 860d7051dadcee69b7f3f55df7bbb77430c7a8ae Mon Sep 17 00:00:00 2001 From: Paulo Gomes Date: Tue, 26 Apr 2022 14:22:41 +0100 Subject: [PATCH] libgit2: optimise checkout branch clones No-op reconciliations are very inefficient, as they carry out a full clone operation of the target repository even when no changes have taken place. This change will execute a remote-ls operation, and cancel the clone operation if the remote tip commit is still the same as the one observed on the last reconcilation. In such cases, an git.NoChangesError is returned. Signed-off-by: Paulo Gomes --- controllers/gitrepository_controller.go | 22 ++++++- pkg/git/git.go | 12 ++++ pkg/git/libgit2/checkout.go | 85 ++++++++++++++++++++++--- pkg/git/libgit2/checkout_test.go | 19 +++++- pkg/git/options.go | 5 ++ 5 files changed, 130 insertions(+), 13 deletions(-) diff --git a/controllers/gitrepository_controller.go b/controllers/gitrepository_controller.go index e19ffb49..a6cbc16d 100644 --- a/controllers/gitrepository_controller.go +++ b/controllers/gitrepository_controller.go @@ -311,8 +311,9 @@ func (r *GitRepositoryReconciler) notify(oldObj, newObj *sourcev1.GitRepository, // reconcileStorage ensures the current state of the storage matches the // desired and previously observed state. // -// All Artifacts for the object except for the current one in the Status are -// garbage collected from the Storage. +// The garbage collection is executed based on the flag based settings and +// may remove files that are beyond their TTL or the maximum number of files +// to survive a collection cycle. // If the Artifact in the Status of the object disappeared from the Storage, // it is removed from the object. // If the object does not have an Artifact in its Status, a Reconciling @@ -411,6 +412,11 @@ func (r *GitRepositoryReconciler) reconcileSource(ctx context.Context, checkoutOpts.Tag = ref.Tag checkoutOpts.SemVer = ref.SemVer } + + if artifact := obj.GetArtifact(); artifact != nil { + checkoutOpts.LastRevision = artifact.Revision + } + checkoutStrategy, err := strategy.CheckoutStrategyForImplementation(ctx, git.Implementation(obj.Spec.GitImplementation), checkoutOpts) if err != nil { @@ -455,6 +461,11 @@ func (r *GitRepositoryReconciler) reconcileSource(ctx context.Context, defer cancel() c, err := checkoutStrategy.Checkout(gitCtx, dir, repositoryURL, authOpts) if err != nil { + var v git.NoChangesError + if errors.As(err, &v) { + return sreconcile.ResultSuccess, nil + } + e := &serror.Event{ Err: fmt.Errorf("failed to checkout and determine revision: %w", err), Reason: sourcev1.GitOperationFailedReason, @@ -495,6 +506,13 @@ func (r *GitRepositoryReconciler) reconcileSource(ctx context.Context, // object are set, and the symlink in the Storage is updated to its path. func (r *GitRepositoryReconciler) reconcileArtifact(ctx context.Context, obj *sourcev1.GitRepository, commit *git.Commit, includes *artifactSet, dir string) (sreconcile.Result, error) { + // If reconciliation resulted in git.NoChangesError, + // avoid reconciling artifact, as this was already done + // on a previous reconciliation. + if commit == nil || commit.Hash.String() == "" { + return sreconcile.ResultSuccess, nil + } + // Create potential new artifact with current available metadata artifact := r.Storage.NewArtifactFor(obj.Kind, obj.GetObjectMeta(), commit.String(), fmt.Sprintf("%s.tar.gz", commit.Hash.String())) diff --git a/pkg/git/git.go b/pkg/git/git.go index b939e893..cc45498d 100644 --- a/pkg/git/git.go +++ b/pkg/git/git.go @@ -106,3 +106,15 @@ func (c *Commit) ShortMessage() string { type CheckoutStrategy interface { Checkout(ctx context.Context, path, url string, config *AuthOptions) (*Commit, error) } + +// NoChangesError represents the case in which a Git clone operation +// is attempted, but cancelled as the revision is still the same as +// the one observed on the last successful reconciliation. +type NoChangesError struct { + Message string + ObservedRevision string +} + +func (e NoChangesError) Error() string { + return fmt.Sprintf("%s: observed revision '%s'", e.Message, e.ObservedRevision) +} diff --git a/pkg/git/libgit2/checkout.go b/pkg/git/libgit2/checkout.go index 046019df..3df8d7f7 100644 --- a/pkg/git/libgit2/checkout.go +++ b/pkg/git/libgit2/checkout.go @@ -52,40 +52,105 @@ func CheckoutStrategyForOptions(ctx context.Context, opt git.CheckoutOptions) gi if branch == "" { branch = git.DefaultBranch } - return &CheckoutBranch{Branch: branch} + return &CheckoutBranch{ + Branch: branch, + LastRevision: opt.LastRevision, + } } } type CheckoutBranch struct { - Branch string + Branch string + LastRevision string } func (c *CheckoutBranch) Checkout(ctx context.Context, path, url string, opts *git.AuthOptions) (*git.Commit, error) { - repo, err := safeClone(url, path, &git2go.CloneOptions{ - FetchOptions: git2go.FetchOptions{ + repo, err := git2go.InitRepository(path, false) + if err != nil { + return nil, fmt.Errorf("unable to init repository for '%s': %w", managed.EffectiveURL(url), gitutil.LibGit2Error(err)) + } + defer repo.Free() + + remote, err := repo.Remotes.Create("origin", url) + if err != nil { + return nil, fmt.Errorf("unable to create remote for '%s': %w", managed.EffectiveURL(url), gitutil.LibGit2Error(err)) + } + defer remote.Free() + + callBacks := RemoteCallbacks(ctx, opts) + err = remote.ConnectFetch(&callBacks, &git2go.ProxyOptions{Type: git2go.ProxyTypeAuto}, nil) + if err != nil { + return nil, fmt.Errorf("unable to fetch-connect to remote '%s': %w", managed.EffectiveURL(url), gitutil.LibGit2Error(err)) + } + defer remote.Disconnect() + + // When the last observed revision is set, check whether it is still + // the same at the remote branch. If so, short-circuit the clone operation here. + if c.LastRevision != "" { + heads, err := remote.Ls(c.Branch) + if err != nil { + return nil, fmt.Errorf("unable to remote ls for '%s': %w", managed.EffectiveURL(url), gitutil.LibGit2Error(err)) + } + if len(heads) > 0 { + currentRevision := fmt.Sprintf("%s/%s", c.Branch, heads[0].Id.String()) + if currentRevision == c.LastRevision { + return nil, git.NoChangesError{ + Message: "no changes since last reconcilation", + ObservedRevision: currentRevision, + } + } + } + } + + // Limit the fetch operation to the specific branch, to decrease network usage. + err = remote.Fetch([]string{c.Branch}, + &git2go.FetchOptions{ DownloadTags: git2go.DownloadTagsNone, RemoteCallbacks: RemoteCallbacks(ctx, opts), ProxyOptions: git2go.ProxyOptions{Type: git2go.ProxyTypeAuto}, }, - CheckoutOptions: git2go.CheckoutOptions{ - Strategy: git2go.CheckoutForce, - }, - CheckoutBranch: c.Branch, + "") + if err != nil { + return nil, fmt.Errorf("unable to fetch remote '%s': %w", + managed.EffectiveURL(url), gitutil.LibGit2Error(err)) + } + + branch, err := repo.References.Lookup(fmt.Sprintf("refs/remotes/origin/%s", c.Branch)) + if err != nil { + return nil, fmt.Errorf("unable to lookup branch '%s' for '%s': %w", + c.Branch, managed.EffectiveURL(url), gitutil.LibGit2Error(err)) + } + defer branch.Free() + + upstreamCommit, err := repo.LookupCommit(branch.Target()) + if err != nil { + return nil, fmt.Errorf("unable to lookup commit '%s' for '%s': %w", + c.Branch, managed.EffectiveURL(url), gitutil.LibGit2Error(err)) + } + defer upstreamCommit.Free() + + // Once the index has been updated with Fetch, and we know the tip commit, + // a hard reset can be used to align the local worktree with the remote branch's. + err = repo.ResetToCommit(upstreamCommit, git2go.ResetHard, &git2go.CheckoutOptions{ + Strategy: git2go.CheckoutForce, }) if err != nil { - return nil, fmt.Errorf("unable to clone '%s': %w", managed.EffectiveURL(url), gitutil.LibGit2Error(err)) + return nil, fmt.Errorf("unable to hard reset to commit for '%s': %w", managed.EffectiveURL(url), gitutil.LibGit2Error(err)) } - defer repo.Free() + + // Use the current worktree's head as reference for the commit to be returned. head, err := repo.Head() if err != nil { return nil, fmt.Errorf("git resolve HEAD error: %w", err) } defer head.Free() + cc, err := repo.LookupCommit(head.Target()) if err != nil { return nil, fmt.Errorf("failed to lookup HEAD commit '%s' for branch '%s': %w", head.Target(), c.Branch, err) } defer cc.Free() + return buildCommit(cc, "refs/heads/"+c.Branch), nil } diff --git a/pkg/git/libgit2/checkout_test.go b/pkg/git/libgit2/checkout_test.go index 6b5ef5b3..f09594c7 100644 --- a/pkg/git/libgit2/checkout_test.go +++ b/pkg/git/libgit2/checkout_test.go @@ -77,6 +77,7 @@ func TestCheckoutBranch_Checkout(t *testing.T) { filesCreated map[string]string expectedCommit string expectedErr string + lastRevision string }{ { name: "Default branch", @@ -95,6 +96,21 @@ func TestCheckoutBranch_Checkout(t *testing.T) { branch: "invalid", expectedErr: "reference 'refs/remotes/origin/invalid' not found", }, + { + name: "skip clone - lastRevision hasn't changed", + branch: defaultBranch, + filesCreated: map[string]string{"branch": "second"}, + expectedCommit: secondCommit.String(), + lastRevision: fmt.Sprintf("%s/%s", defaultBranch, secondCommit.String()), + expectedErr: fmt.Sprintf("no changes since last reconcilation: observed revision '%s/%s'", defaultBranch, secondCommit.String()), + }, + { + name: "lastRevision is different", + branch: defaultBranch, + filesCreated: map[string]string{"branch": "second"}, + expectedCommit: secondCommit.String(), + lastRevision: fmt.Sprintf("%s/%s", defaultBranch, firstCommit.String()), + }, } for _, tt := range tests { @@ -102,7 +118,8 @@ func TestCheckoutBranch_Checkout(t *testing.T) { g := NewWithT(t) branch := CheckoutBranch{ - Branch: tt.branch, + Branch: tt.branch, + LastRevision: tt.lastRevision, } tmpDir := t.TempDir() diff --git a/pkg/git/options.go b/pkg/git/options.go index bd0b4d7b..b5e8f2c4 100644 --- a/pkg/git/options.go +++ b/pkg/git/options.go @@ -48,6 +48,11 @@ type CheckoutOptions struct { // RecurseSubmodules defines if submodules should be checked out, // not supported by all Implementations. RecurseSubmodules bool + + // LastRevision holds the revision observed on the last successful + // reconciliation. + // It is used to skip clone operations when no changes were detected. + LastRevision string } type TransportType string