Add --git-gc flag to control GC on each sync

Values:
* "auto" - run `git gc --auto` (default, respects git gc.* configs)
* "always" - run `git gc`
* "aggressive" - run `git gc --aggressive` (may require a longer timeout)
* "off" - do not run `git gc` on each sync (good for --one-time use)
This commit is contained in:
Tim Hockin 2022-02-23 16:49:38 -08:00
parent a37a758e69
commit f4d124bded
2 changed files with 156 additions and 12 deletions

View File

@ -130,6 +130,8 @@ var flGitCmd = pflag.String("git", envString("GIT_SYNC_GIT", "git"),
"the git command to run (subject to PATH search, mostly for testing)")
var flGitConfig = pflag.String("git-config", envString("GIT_SYNC_GIT_CONFIG", ""),
"additional git config options in 'key1:val1,key2:val2' format")
var flGitGC = pflag.String("git-gc", envString("GIT_SYNC_GIT_GC", "auto"),
"git garbage collection behavior: one of 'auto', 'always', 'aggressive', or 'off'")
var flHTTPBind = pflag.String("http-bind", envString("GIT_SYNC_HTTP_BIND", ""),
"the bind address (including port) for git-sync's HTTP endpoint")
@ -188,6 +190,15 @@ const (
submodulesOff submodulesMode = "off"
)
type gcMode string
const (
gcAuto = "auto"
gcAlways = "always"
gcAggressive = "aggressive"
gcOff = "off"
)
func init() {
prometheus.MustRegister(syncDuration)
prometheus.MustRegister(syncCount)
@ -258,6 +269,7 @@ type repoSync struct {
rev string // the rev or SHA to sync
depth int // for shallow sync
submodules submodulesMode // how to handle submodules
gc gcMode // garbage collection
chmod int // mode to change repo to, or 0
link string // the name of the symlink to publish under `root`
authURL string // a URL to re-fetch credentials, or ""
@ -317,6 +329,12 @@ func main() {
handleError(log, true, "ERROR: --submodules must be one of %q, %q, or %q", submodulesRecursive, submodulesShallow, submodulesOff)
}
switch *flGitGC {
case gcAuto, gcAlways, gcAggressive, gcOff:
default:
handleError(log, true, "ERROR: --git-gc must be one of %q, %q, %q, or %q", gcAuto, gcAlways, gcAggressive, gcOff)
}
if *flRoot == "" {
handleError(log, true, "ERROR: --root must be specified")
}
@ -458,6 +476,7 @@ func main() {
rev: *flRev,
depth: *flDepth,
submodules: submodulesMode(*flSubmodules),
gc: gcMode(*flGitGC),
chmod: *flChmod,
link: absLink,
authURL: *flAskPassURL,
@ -941,11 +960,6 @@ func (git *repoSync) AddWorktreeAndSwap(ctx context.Context, hash string) error
return err
}
// GC clone
if _, err := git.run.Run(ctx, git.root, nil, git.cmd, "gc", "--prune=all"); err != nil {
return err
}
// The .git file in the worktree directory holds a reference to
// /git/.git/worktrees/<worktree-dir-name>. Replace it with a reference
// using relative paths, so that other containers can use a different volume
@ -1050,19 +1064,53 @@ func (git *repoSync) AddWorktreeAndSwap(ctx context.Context, hash string) error
setRepoReady()
// From here on we have to save errors until the end.
var cleanupErrs multiError
// Clean up previous worktrees.
var cleanupErr error
// Clean up previous worktree(s).
if oldWorktree != "" {
cleanupErr = git.CleanupWorkTree(ctx, git.root, oldWorktree)
if err := git.CleanupWorkTree(ctx, git.root, oldWorktree); err != nil {
cleanupErrs = append(cleanupErrs, err)
}
}
if cleanupErr != nil {
return cleanupErr
// Run GC if needed.
if git.gc != gcOff {
args := []string{"gc"}
switch git.gc {
case gcAuto:
args = append(args, "--auto")
case gcAlways:
// no extra flags
case gcAggressive:
args = append(args, "--aggressive")
}
if _, err := git.run.Run(ctx, git.root, nil, git.cmd, args...); err != nil {
cleanupErrs = append(cleanupErrs, err)
}
}
if len(cleanupErrs) > 0 {
return cleanupErrs
}
return nil
}
type multiError []error
func (m multiError) Error() string {
if len(m) == 0 {
return "<no error>"
}
if len(m) == 1 {
return m[0].Error()
}
strs := make([]string, 0, len(m))
for _, e := range m {
strs = append(strs, e.Error())
}
return strings.Join(strs, "; ")
}
// CloneRepo does an initial clone of the git repo.
func (git *repoSync) CloneRepo(ctx context.Context) error {
args := []string{"clone", "--no-checkout"}
@ -1663,6 +1711,18 @@ OPTIONS
Within quoted values, commas MAY be escaped, but are not required
to be. Any other escape sequence is an error. (default: "")
--git-gc <string>, $GIT_SYNC_GIT_GC
The git garbage collection behavior: one of 'auto', 'always',
'aggressive', or 'off'. (default: auto)
- auto: Run "git gc --auto" once per successful sync. This mode
respects git's gc.* config params.
- always: Run "git gc" once per successful sync.
- aggressive: Run "git gc --aggressive" once per successful sync.
This mode can be slow and may require a longer --sync-timeout value.
- off: Disable explicit git garbage collection, which may be a good
fit when also using --one-time.
-h, --help
Print help text and exit.

View File

@ -475,7 +475,7 @@ function e2e::worktree_cleanup() {
--branch="$MAIN_BRANCH" \
--rev=HEAD \
--root="$ROOT" \
--dest="link" \
--link="link" \
>> "$1" 2>&1 &
# wait for first sync
@ -1871,11 +1871,95 @@ function e2e::github_https() {
--branch=master \
--rev=HEAD \
--root="$ROOT" \
--dest="link" \
--link="link" \
>> "$1" 2>&1
assert_file_exists "$ROOT"/link/LICENSE
}
##############################################
# Test git-gc=auto
##############################################
function e2e::gc_auto() {
echo "$FUNCNAME" > "$REPO"/file
git -C "$REPO" commit -qam "$FUNCNAME"
GIT_SYNC \
--one-time \
--repo="file://$REPO" \
--branch="$MAIN_BRANCH" \
--rev=HEAD \
--root="$ROOT" \
--link="link" \
--git-gc="auto" \
>> "$1" 2>&1
assert_link_exists "$ROOT"/link
assert_file_exists "$ROOT"/link/file
assert_file_eq "$ROOT"/link/file "$FUNCNAME"
}
##############################################
# Test git-gc=always
##############################################
function e2e::gc_always() {
echo "$FUNCNAME" > "$REPO"/file
git -C "$REPO" commit -qam "$FUNCNAME"
GIT_SYNC \
--one-time \
--repo="file://$REPO" \
--branch="$MAIN_BRANCH" \
--rev=HEAD \
--root="$ROOT" \
--link="link" \
--git-gc="always" \
>> "$1" 2>&1
assert_link_exists "$ROOT"/link
assert_file_exists "$ROOT"/link/file
assert_file_eq "$ROOT"/link/file "$FUNCNAME"
}
##############################################
# Test git-gc=aggressive
##############################################
function e2e::gc_aggressive() {
echo "$FUNCNAME" > "$REPO"/file
git -C "$REPO" commit -qam "$FUNCNAME"
GIT_SYNC \
--one-time \
--repo="file://$REPO" \
--branch="$MAIN_BRANCH" \
--rev=HEAD \
--root="$ROOT" \
--link="link" \
--git-gc="aggressive" \
>> "$1" 2>&1
assert_link_exists "$ROOT"/link
assert_file_exists "$ROOT"/link/file
assert_file_eq "$ROOT"/link/file "$FUNCNAME"
}
##############################################
# Test git-gc=off
##############################################
function e2e::gc_off() {
echo "$FUNCNAME" > "$REPO"/file
git -C "$REPO" commit -qam "$FUNCNAME"
GIT_SYNC \
--one-time \
--repo="file://$REPO" \
--branch="$MAIN_BRANCH" \
--rev=HEAD \
--root="$ROOT" \
--link="link" \
--git-gc="off" \
>> "$1" 2>&1
assert_link_exists "$ROOT"/link
assert_file_exists "$ROOT"/link/file
assert_file_eq "$ROOT"/link/file "$FUNCNAME"
}
#
# main
#