Add --git-gc flag to control GC on each sync

Values:
* "auto" - run `git gc --auto` (default, respects git gc.* configs)
* "always" - run `git gc`
* "aggressive" - run `git gc --aggressive` (may require a longer timeout)
* "off" - do not run `git gc` on each sync (good for --one-time use)
This commit is contained in:
Tim Hockin 2022-02-23 16:49:38 -08:00
parent a37a758e69
commit f4d124bded
2 changed files with 156 additions and 12 deletions

View File

@ -130,6 +130,8 @@ var flGitCmd = pflag.String("git", envString("GIT_SYNC_GIT", "git"),
"the git command to run (subject to PATH search, mostly for testing)") "the git command to run (subject to PATH search, mostly for testing)")
var flGitConfig = pflag.String("git-config", envString("GIT_SYNC_GIT_CONFIG", ""), var flGitConfig = pflag.String("git-config", envString("GIT_SYNC_GIT_CONFIG", ""),
"additional git config options in 'key1:val1,key2:val2' format") "additional git config options in 'key1:val1,key2:val2' format")
var flGitGC = pflag.String("git-gc", envString("GIT_SYNC_GIT_GC", "auto"),
"git garbage collection behavior: one of 'auto', 'always', 'aggressive', or 'off'")
var flHTTPBind = pflag.String("http-bind", envString("GIT_SYNC_HTTP_BIND", ""), var flHTTPBind = pflag.String("http-bind", envString("GIT_SYNC_HTTP_BIND", ""),
"the bind address (including port) for git-sync's HTTP endpoint") "the bind address (including port) for git-sync's HTTP endpoint")
@ -188,6 +190,15 @@ const (
submodulesOff submodulesMode = "off" submodulesOff submodulesMode = "off"
) )
type gcMode string
const (
gcAuto = "auto"
gcAlways = "always"
gcAggressive = "aggressive"
gcOff = "off"
)
func init() { func init() {
prometheus.MustRegister(syncDuration) prometheus.MustRegister(syncDuration)
prometheus.MustRegister(syncCount) prometheus.MustRegister(syncCount)
@ -258,6 +269,7 @@ type repoSync struct {
rev string // the rev or SHA to sync rev string // the rev or SHA to sync
depth int // for shallow sync depth int // for shallow sync
submodules submodulesMode // how to handle submodules submodules submodulesMode // how to handle submodules
gc gcMode // garbage collection
chmod int // mode to change repo to, or 0 chmod int // mode to change repo to, or 0
link string // the name of the symlink to publish under `root` link string // the name of the symlink to publish under `root`
authURL string // a URL to re-fetch credentials, or "" authURL string // a URL to re-fetch credentials, or ""
@ -317,6 +329,12 @@ func main() {
handleError(log, true, "ERROR: --submodules must be one of %q, %q, or %q", submodulesRecursive, submodulesShallow, submodulesOff) handleError(log, true, "ERROR: --submodules must be one of %q, %q, or %q", submodulesRecursive, submodulesShallow, submodulesOff)
} }
switch *flGitGC {
case gcAuto, gcAlways, gcAggressive, gcOff:
default:
handleError(log, true, "ERROR: --git-gc must be one of %q, %q, %q, or %q", gcAuto, gcAlways, gcAggressive, gcOff)
}
if *flRoot == "" { if *flRoot == "" {
handleError(log, true, "ERROR: --root must be specified") handleError(log, true, "ERROR: --root must be specified")
} }
@ -458,6 +476,7 @@ func main() {
rev: *flRev, rev: *flRev,
depth: *flDepth, depth: *flDepth,
submodules: submodulesMode(*flSubmodules), submodules: submodulesMode(*flSubmodules),
gc: gcMode(*flGitGC),
chmod: *flChmod, chmod: *flChmod,
link: absLink, link: absLink,
authURL: *flAskPassURL, authURL: *flAskPassURL,
@ -941,11 +960,6 @@ func (git *repoSync) AddWorktreeAndSwap(ctx context.Context, hash string) error
return err return err
} }
// GC clone
if _, err := git.run.Run(ctx, git.root, nil, git.cmd, "gc", "--prune=all"); err != nil {
return err
}
// The .git file in the worktree directory holds a reference to // The .git file in the worktree directory holds a reference to
// /git/.git/worktrees/<worktree-dir-name>. Replace it with a reference // /git/.git/worktrees/<worktree-dir-name>. Replace it with a reference
// using relative paths, so that other containers can use a different volume // using relative paths, so that other containers can use a different volume
@ -1050,19 +1064,53 @@ func (git *repoSync) AddWorktreeAndSwap(ctx context.Context, hash string) error
setRepoReady() setRepoReady()
// From here on we have to save errors until the end. // From here on we have to save errors until the end.
var cleanupErrs multiError
// Clean up previous worktrees. // Clean up previous worktree(s).
var cleanupErr error
if oldWorktree != "" { if oldWorktree != "" {
cleanupErr = git.CleanupWorkTree(ctx, git.root, oldWorktree) if err := git.CleanupWorkTree(ctx, git.root, oldWorktree); err != nil {
cleanupErrs = append(cleanupErrs, err)
}
} }
if cleanupErr != nil { // Run GC if needed.
return cleanupErr if git.gc != gcOff {
args := []string{"gc"}
switch git.gc {
case gcAuto:
args = append(args, "--auto")
case gcAlways:
// no extra flags
case gcAggressive:
args = append(args, "--aggressive")
}
if _, err := git.run.Run(ctx, git.root, nil, git.cmd, args...); err != nil {
cleanupErrs = append(cleanupErrs, err)
}
}
if len(cleanupErrs) > 0 {
return cleanupErrs
} }
return nil return nil
} }
type multiError []error
func (m multiError) Error() string {
if len(m) == 0 {
return "<no error>"
}
if len(m) == 1 {
return m[0].Error()
}
strs := make([]string, 0, len(m))
for _, e := range m {
strs = append(strs, e.Error())
}
return strings.Join(strs, "; ")
}
// CloneRepo does an initial clone of the git repo. // CloneRepo does an initial clone of the git repo.
func (git *repoSync) CloneRepo(ctx context.Context) error { func (git *repoSync) CloneRepo(ctx context.Context) error {
args := []string{"clone", "--no-checkout"} args := []string{"clone", "--no-checkout"}
@ -1663,6 +1711,18 @@ OPTIONS
Within quoted values, commas MAY be escaped, but are not required Within quoted values, commas MAY be escaped, but are not required
to be. Any other escape sequence is an error. (default: "") to be. Any other escape sequence is an error. (default: "")
--git-gc <string>, $GIT_SYNC_GIT_GC
The git garbage collection behavior: one of 'auto', 'always',
'aggressive', or 'off'. (default: auto)
- auto: Run "git gc --auto" once per successful sync. This mode
respects git's gc.* config params.
- always: Run "git gc" once per successful sync.
- aggressive: Run "git gc --aggressive" once per successful sync.
This mode can be slow and may require a longer --sync-timeout value.
- off: Disable explicit git garbage collection, which may be a good
fit when also using --one-time.
-h, --help -h, --help
Print help text and exit. Print help text and exit.

View File

@ -475,7 +475,7 @@ function e2e::worktree_cleanup() {
--branch="$MAIN_BRANCH" \ --branch="$MAIN_BRANCH" \
--rev=HEAD \ --rev=HEAD \
--root="$ROOT" \ --root="$ROOT" \
--dest="link" \ --link="link" \
>> "$1" 2>&1 & >> "$1" 2>&1 &
# wait for first sync # wait for first sync
@ -1871,11 +1871,95 @@ function e2e::github_https() {
--branch=master \ --branch=master \
--rev=HEAD \ --rev=HEAD \
--root="$ROOT" \ --root="$ROOT" \
--dest="link" \ --link="link" \
>> "$1" 2>&1 >> "$1" 2>&1
assert_file_exists "$ROOT"/link/LICENSE assert_file_exists "$ROOT"/link/LICENSE
} }
##############################################
# Test git-gc=auto
##############################################
function e2e::gc_auto() {
echo "$FUNCNAME" > "$REPO"/file
git -C "$REPO" commit -qam "$FUNCNAME"
GIT_SYNC \
--one-time \
--repo="file://$REPO" \
--branch="$MAIN_BRANCH" \
--rev=HEAD \
--root="$ROOT" \
--link="link" \
--git-gc="auto" \
>> "$1" 2>&1
assert_link_exists "$ROOT"/link
assert_file_exists "$ROOT"/link/file
assert_file_eq "$ROOT"/link/file "$FUNCNAME"
}
##############################################
# Test git-gc=always
##############################################
function e2e::gc_always() {
echo "$FUNCNAME" > "$REPO"/file
git -C "$REPO" commit -qam "$FUNCNAME"
GIT_SYNC \
--one-time \
--repo="file://$REPO" \
--branch="$MAIN_BRANCH" \
--rev=HEAD \
--root="$ROOT" \
--link="link" \
--git-gc="always" \
>> "$1" 2>&1
assert_link_exists "$ROOT"/link
assert_file_exists "$ROOT"/link/file
assert_file_eq "$ROOT"/link/file "$FUNCNAME"
}
##############################################
# Test git-gc=aggressive
##############################################
function e2e::gc_aggressive() {
echo "$FUNCNAME" > "$REPO"/file
git -C "$REPO" commit -qam "$FUNCNAME"
GIT_SYNC \
--one-time \
--repo="file://$REPO" \
--branch="$MAIN_BRANCH" \
--rev=HEAD \
--root="$ROOT" \
--link="link" \
--git-gc="aggressive" \
>> "$1" 2>&1
assert_link_exists "$ROOT"/link
assert_file_exists "$ROOT"/link/file
assert_file_eq "$ROOT"/link/file "$FUNCNAME"
}
##############################################
# Test git-gc=off
##############################################
function e2e::gc_off() {
echo "$FUNCNAME" > "$REPO"/file
git -C "$REPO" commit -qam "$FUNCNAME"
GIT_SYNC \
--one-time \
--repo="file://$REPO" \
--branch="$MAIN_BRANCH" \
--rev=HEAD \
--root="$ROOT" \
--link="link" \
--git-gc="off" \
>> "$1" 2>&1
assert_link_exists "$ROOT"/link
assert_file_exists "$ROOT"/link/file
assert_file_eq "$ROOT"/link/file "$FUNCNAME"
}
# #
# main # main
# #