From a37a758e69a77ca247e724f0c8d5a6f83f3bf354 Mon Sep 17 00:00:00 2001 From: Tim Hockin Date: Wed, 23 Feb 2022 16:53:05 -0800 Subject: [PATCH 1/3] Set some gc.* git config In particular, this sets `gc.autoDetach` to "false". --- cmd/git-sync/main.go | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/cmd/git-sync/main.go b/cmd/git-sync/main.go index c778efb..1607b1c 100644 --- a/cmd/git-sync/main.go +++ b/cmd/git-sync/main.go @@ -500,6 +500,12 @@ func main() { } } + // Set additional configs we want, but users might override. + if err := git.setupDefaultGitConfigs(ctx); err != nil { + log.Error(err, "ERROR: can't set default git configs") + os.Exit(1) + } + // This needs to be after all other git-related config flags. if *flGitConfig != "" { if err := git.setupExtraGitConfigs(ctx, *flGitConfig); err != nil { @@ -1379,6 +1385,24 @@ func (git *repoSync) CallAskPassURL(ctx context.Context) error { return nil } +func (git *repoSync) setupDefaultGitConfigs(ctx context.Context) error { + configs := []keyVal{{ + // Never auto-detach GC runs. + key: "gc.autoDetach", + val: "false", + }, { + // Fairly aggressive GC. + key: "gc.pruneExpire", + val: "now", + }} + for _, kv := range configs { + if _, err := git.run.Run(ctx, "", nil, git.cmd, "config", "--global", kv.key, kv.val); err != nil { + return fmt.Errorf("error configuring git %q %q: %v", kv.key, kv.val, err) + } + } + return nil +} + func (git *repoSync) setupExtraGitConfigs(ctx context.Context, configsFlag string) error { git.log.V(1).Info("setting additional git configs") From f4d124bded3b61392d06c8f925b3c29ca8f4d7f1 Mon Sep 17 00:00:00 2001 From: Tim Hockin Date: Wed, 23 Feb 2022 16:49:38 -0800 Subject: [PATCH 2/3] Add --git-gc flag to control GC on each sync Values: * "auto" - run `git gc --auto` (default, respects git gc.* configs) * "always" - run `git gc` * "aggressive" - run `git gc --aggressive` (may require a longer timeout) * "off" - do not run `git gc` on each sync (good for --one-time use) --- cmd/git-sync/main.go | 80 +++++++++++++++++++++++++++++++++++----- test_e2e.sh | 88 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 156 insertions(+), 12 deletions(-) diff --git a/cmd/git-sync/main.go b/cmd/git-sync/main.go index 1607b1c..81c789c 100644 --- a/cmd/git-sync/main.go +++ b/cmd/git-sync/main.go @@ -130,6 +130,8 @@ var flGitCmd = pflag.String("git", envString("GIT_SYNC_GIT", "git"), "the git command to run (subject to PATH search, mostly for testing)") var flGitConfig = pflag.String("git-config", envString("GIT_SYNC_GIT_CONFIG", ""), "additional git config options in 'key1:val1,key2:val2' format") +var flGitGC = pflag.String("git-gc", envString("GIT_SYNC_GIT_GC", "auto"), + "git garbage collection behavior: one of 'auto', 'always', 'aggressive', or 'off'") var flHTTPBind = pflag.String("http-bind", envString("GIT_SYNC_HTTP_BIND", ""), "the bind address (including port) for git-sync's HTTP endpoint") @@ -188,6 +190,15 @@ const ( submodulesOff submodulesMode = "off" ) +type gcMode string + +const ( + gcAuto = "auto" + gcAlways = "always" + gcAggressive = "aggressive" + gcOff = "off" +) + func init() { prometheus.MustRegister(syncDuration) prometheus.MustRegister(syncCount) @@ -258,6 +269,7 @@ type repoSync struct { rev string // the rev or SHA to sync depth int // for shallow sync submodules submodulesMode // how to handle submodules + gc gcMode // garbage collection chmod int // mode to change repo to, or 0 link string // the name of the symlink to publish under `root` authURL string // a URL to re-fetch credentials, or "" @@ -317,6 +329,12 @@ func main() { handleError(log, true, "ERROR: --submodules must be one of %q, %q, or %q", submodulesRecursive, submodulesShallow, submodulesOff) } + switch *flGitGC { + case gcAuto, gcAlways, gcAggressive, gcOff: + default: + handleError(log, true, "ERROR: --git-gc must be one of %q, %q, %q, or %q", gcAuto, gcAlways, gcAggressive, gcOff) + } + if *flRoot == "" { handleError(log, true, "ERROR: --root must be specified") } @@ -458,6 +476,7 @@ func main() { rev: *flRev, depth: *flDepth, submodules: submodulesMode(*flSubmodules), + gc: gcMode(*flGitGC), chmod: *flChmod, link: absLink, authURL: *flAskPassURL, @@ -941,11 +960,6 @@ func (git *repoSync) AddWorktreeAndSwap(ctx context.Context, hash string) error return err } - // GC clone - if _, err := git.run.Run(ctx, git.root, nil, git.cmd, "gc", "--prune=all"); err != nil { - return err - } - // The .git file in the worktree directory holds a reference to // /git/.git/worktrees/. Replace it with a reference // using relative paths, so that other containers can use a different volume @@ -1050,19 +1064,53 @@ func (git *repoSync) AddWorktreeAndSwap(ctx context.Context, hash string) error setRepoReady() // From here on we have to save errors until the end. + var cleanupErrs multiError - // Clean up previous worktrees. - var cleanupErr error + // Clean up previous worktree(s). if oldWorktree != "" { - cleanupErr = git.CleanupWorkTree(ctx, git.root, oldWorktree) + if err := git.CleanupWorkTree(ctx, git.root, oldWorktree); err != nil { + cleanupErrs = append(cleanupErrs, err) + } } - if cleanupErr != nil { - return cleanupErr + // Run GC if needed. + if git.gc != gcOff { + args := []string{"gc"} + switch git.gc { + case gcAuto: + args = append(args, "--auto") + case gcAlways: + // no extra flags + case gcAggressive: + args = append(args, "--aggressive") + } + if _, err := git.run.Run(ctx, git.root, nil, git.cmd, args...); err != nil { + cleanupErrs = append(cleanupErrs, err) + } + } + + if len(cleanupErrs) > 0 { + return cleanupErrs } return nil } +type multiError []error + +func (m multiError) Error() string { + if len(m) == 0 { + return "" + } + if len(m) == 1 { + return m[0].Error() + } + strs := make([]string, 0, len(m)) + for _, e := range m { + strs = append(strs, e.Error()) + } + return strings.Join(strs, "; ") +} + // CloneRepo does an initial clone of the git repo. func (git *repoSync) CloneRepo(ctx context.Context) error { args := []string{"clone", "--no-checkout"} @@ -1663,6 +1711,18 @@ OPTIONS Within quoted values, commas MAY be escaped, but are not required to be. Any other escape sequence is an error. (default: "") + --git-gc , $GIT_SYNC_GIT_GC + The git garbage collection behavior: one of 'auto', 'always', + 'aggressive', or 'off'. (default: auto) + + - auto: Run "git gc --auto" once per successful sync. This mode + respects git's gc.* config params. + - always: Run "git gc" once per successful sync. + - aggressive: Run "git gc --aggressive" once per successful sync. + This mode can be slow and may require a longer --sync-timeout value. + - off: Disable explicit git garbage collection, which may be a good + fit when also using --one-time. + -h, --help Print help text and exit. diff --git a/test_e2e.sh b/test_e2e.sh index 2e2b98c..cdf6410 100755 --- a/test_e2e.sh +++ b/test_e2e.sh @@ -475,7 +475,7 @@ function e2e::worktree_cleanup() { --branch="$MAIN_BRANCH" \ --rev=HEAD \ --root="$ROOT" \ - --dest="link" \ + --link="link" \ >> "$1" 2>&1 & # wait for first sync @@ -1871,11 +1871,95 @@ function e2e::github_https() { --branch=master \ --rev=HEAD \ --root="$ROOT" \ - --dest="link" \ + --link="link" \ >> "$1" 2>&1 assert_file_exists "$ROOT"/link/LICENSE } +############################################## +# Test git-gc=auto +############################################## +function e2e::gc_auto() { + echo "$FUNCNAME" > "$REPO"/file + git -C "$REPO" commit -qam "$FUNCNAME" + + GIT_SYNC \ + --one-time \ + --repo="file://$REPO" \ + --branch="$MAIN_BRANCH" \ + --rev=HEAD \ + --root="$ROOT" \ + --link="link" \ + --git-gc="auto" \ + >> "$1" 2>&1 + assert_link_exists "$ROOT"/link + assert_file_exists "$ROOT"/link/file + assert_file_eq "$ROOT"/link/file "$FUNCNAME" +} + +############################################## +# Test git-gc=always +############################################## +function e2e::gc_always() { + echo "$FUNCNAME" > "$REPO"/file + git -C "$REPO" commit -qam "$FUNCNAME" + + GIT_SYNC \ + --one-time \ + --repo="file://$REPO" \ + --branch="$MAIN_BRANCH" \ + --rev=HEAD \ + --root="$ROOT" \ + --link="link" \ + --git-gc="always" \ + >> "$1" 2>&1 + assert_link_exists "$ROOT"/link + assert_file_exists "$ROOT"/link/file + assert_file_eq "$ROOT"/link/file "$FUNCNAME" +} + +############################################## +# Test git-gc=aggressive +############################################## +function e2e::gc_aggressive() { + echo "$FUNCNAME" > "$REPO"/file + git -C "$REPO" commit -qam "$FUNCNAME" + + GIT_SYNC \ + --one-time \ + --repo="file://$REPO" \ + --branch="$MAIN_BRANCH" \ + --rev=HEAD \ + --root="$ROOT" \ + --link="link" \ + --git-gc="aggressive" \ + >> "$1" 2>&1 + assert_link_exists "$ROOT"/link + assert_file_exists "$ROOT"/link/file + assert_file_eq "$ROOT"/link/file "$FUNCNAME" +} + +############################################## +# Test git-gc=off +############################################## +function e2e::gc_off() { + echo "$FUNCNAME" > "$REPO"/file + git -C "$REPO" commit -qam "$FUNCNAME" + + GIT_SYNC \ + --one-time \ + --repo="file://$REPO" \ + --branch="$MAIN_BRANCH" \ + --rev=HEAD \ + --root="$ROOT" \ + --link="link" \ + --git-gc="off" \ + >> "$1" 2>&1 + assert_link_exists "$ROOT"/link + assert_file_exists "$ROOT"/link/file + assert_file_eq "$ROOT"/link/file "$FUNCNAME" +} + # # main # From 8c5f33d5ddb20f5fdea3083aa61627ca5c4caf45 Mon Sep 17 00:00:00 2001 From: Tim Hockin Date: Thu, 24 Feb 2022 09:00:57 -0800 Subject: [PATCH 3/3] Make e2e treat args as tests-name regexes --- test_e2e.sh | 46 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/test_e2e.sh b/test_e2e.sh index cdf6410..650f3c3 100755 --- a/test_e2e.sh +++ b/test_e2e.sh @@ -1978,35 +1978,53 @@ function list_tests() { } # Figure out which, if any, tests to run. -tests=($(list_tests)) +all_tests=($(list_tests)) +tests_to_run=() function print_tests() { echo "available tests:" - for t in "${tests[@]}"; do + for t in "${all_tests[@]}"; do echo " $t" done } -for t; do +# Validate and accumulate tests to run if args are specified. +for arg; do # Use -? to list known tests. - if [[ "${t}" == "-?" ]]; then + if [[ "${arg}" == "-?" ]]; then print_tests exit 0 fi - # Make sure we know this test. - if [[ " ${tests[*]} " =~ " ${t} " ]]; then - continue + if [[ "${arg}" =~ ^- ]]; then + echo "ERROR: unknown flag '${arg}'" + exit 1 fi - # Not a known test or flag. - echo "ERROR: unknown test or flag: '${t}'" - echo - print_tests - exit 1 + # Make sure each non-flag arg matches at least one test. + nmatches=0 + for t in "${all_tests[@]}"; do + if [[ "${t}" =~ ${arg} ]]; then + nmatches=$((nmatches+1)) + # Don't run tests twice, just keep the first match. + if [[ " ${tests_to_run[*]} " =~ " ${t} " ]]; then + continue + fi + tests_to_run+=("${t}") + continue + fi + done + if [[ ${nmatches} == 0 ]]; then + echo "ERROR: no tests match pattern '${arg}'" + echo + print_tests + exit 1 + fi + tests_to_run+=("${matches[@]}") done +set -- "${tests_to_run[@]}" -# If no tests specified, run them all. +# If no tests were specified, run them all. if [[ "$#" == 0 ]]; then - set -- "${tests[@]}" + set -- "${all_tests[@]}" fi # Build it