Merge pull request #496 from thockin/master

Add GC controls, e2e regexes (v4 branch)
This commit is contained in:
Kubernetes Prow Robot 2022-03-08 03:48:33 -08:00 committed by GitHub
commit e16e919af1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 212 additions and 26 deletions

View File

@ -130,6 +130,8 @@ var flGitCmd = pflag.String("git", envString("GIT_SYNC_GIT", "git"),
"the git command to run (subject to PATH search, mostly for testing)")
var flGitConfig = pflag.String("git-config", envString("GIT_SYNC_GIT_CONFIG", ""),
"additional git config options in 'key1:val1,key2:val2' format")
var flGitGC = pflag.String("git-gc", envString("GIT_SYNC_GIT_GC", "auto"),
"git garbage collection behavior: one of 'auto', 'always', 'aggressive', or 'off'")
var flHTTPBind = pflag.String("http-bind", envString("GIT_SYNC_HTTP_BIND", ""),
"the bind address (including port) for git-sync's HTTP endpoint")
@ -188,6 +190,15 @@ const (
submodulesOff submodulesMode = "off"
)
type gcMode string
const (
gcAuto = "auto"
gcAlways = "always"
gcAggressive = "aggressive"
gcOff = "off"
)
func init() {
prometheus.MustRegister(syncDuration)
prometheus.MustRegister(syncCount)
@ -258,6 +269,7 @@ type repoSync struct {
rev string // the rev or SHA to sync
depth int // for shallow sync
submodules submodulesMode // how to handle submodules
gc gcMode // garbage collection
chmod int // mode to change repo to, or 0
link string // the name of the symlink to publish under `root`
authURL string // a URL to re-fetch credentials, or ""
@ -317,6 +329,12 @@ func main() {
handleError(log, true, "ERROR: --submodules must be one of %q, %q, or %q", submodulesRecursive, submodulesShallow, submodulesOff)
}
switch *flGitGC {
case gcAuto, gcAlways, gcAggressive, gcOff:
default:
handleError(log, true, "ERROR: --git-gc must be one of %q, %q, %q, or %q", gcAuto, gcAlways, gcAggressive, gcOff)
}
if *flRoot == "" {
handleError(log, true, "ERROR: --root must be specified")
}
@ -458,6 +476,7 @@ func main() {
rev: *flRev,
depth: *flDepth,
submodules: submodulesMode(*flSubmodules),
gc: gcMode(*flGitGC),
chmod: *flChmod,
link: absLink,
authURL: *flAskPassURL,
@ -500,6 +519,12 @@ func main() {
}
}
// Set additional configs we want, but users might override.
if err := git.setupDefaultGitConfigs(ctx); err != nil {
log.Error(err, "ERROR: can't set default git configs")
os.Exit(1)
}
// This needs to be after all other git-related config flags.
if *flGitConfig != "" {
if err := git.setupExtraGitConfigs(ctx, *flGitConfig); err != nil {
@ -935,11 +960,6 @@ func (git *repoSync) AddWorktreeAndSwap(ctx context.Context, hash string) error
return err
}
// GC clone
if _, err := git.run.Run(ctx, git.root, nil, git.cmd, "gc", "--prune=all"); err != nil {
return err
}
// The .git file in the worktree directory holds a reference to
// /git/.git/worktrees/<worktree-dir-name>. Replace it with a reference
// using relative paths, so that other containers can use a different volume
@ -1044,19 +1064,53 @@ func (git *repoSync) AddWorktreeAndSwap(ctx context.Context, hash string) error
setRepoReady()
// From here on we have to save errors until the end.
var cleanupErrs multiError
// Clean up previous worktrees.
var cleanupErr error
// Clean up previous worktree(s).
if oldWorktree != "" {
cleanupErr = git.CleanupWorkTree(ctx, git.root, oldWorktree)
if err := git.CleanupWorkTree(ctx, git.root, oldWorktree); err != nil {
cleanupErrs = append(cleanupErrs, err)
}
}
if cleanupErr != nil {
return cleanupErr
// Run GC if needed.
if git.gc != gcOff {
args := []string{"gc"}
switch git.gc {
case gcAuto:
args = append(args, "--auto")
case gcAlways:
// no extra flags
case gcAggressive:
args = append(args, "--aggressive")
}
if _, err := git.run.Run(ctx, git.root, nil, git.cmd, args...); err != nil {
cleanupErrs = append(cleanupErrs, err)
}
}
if len(cleanupErrs) > 0 {
return cleanupErrs
}
return nil
}
type multiError []error
func (m multiError) Error() string {
if len(m) == 0 {
return "<no error>"
}
if len(m) == 1 {
return m[0].Error()
}
strs := make([]string, 0, len(m))
for _, e := range m {
strs = append(strs, e.Error())
}
return strings.Join(strs, "; ")
}
// CloneRepo does an initial clone of the git repo.
func (git *repoSync) CloneRepo(ctx context.Context) error {
args := []string{"clone", "--no-checkout"}
@ -1379,6 +1433,24 @@ func (git *repoSync) CallAskPassURL(ctx context.Context) error {
return nil
}
func (git *repoSync) setupDefaultGitConfigs(ctx context.Context) error {
configs := []keyVal{{
// Never auto-detach GC runs.
key: "gc.autoDetach",
val: "false",
}, {
// Fairly aggressive GC.
key: "gc.pruneExpire",
val: "now",
}}
for _, kv := range configs {
if _, err := git.run.Run(ctx, "", nil, git.cmd, "config", "--global", kv.key, kv.val); err != nil {
return fmt.Errorf("error configuring git %q %q: %v", kv.key, kv.val, err)
}
}
return nil
}
func (git *repoSync) setupExtraGitConfigs(ctx context.Context, configsFlag string) error {
git.log.V(1).Info("setting additional git configs")
@ -1639,6 +1711,18 @@ OPTIONS
Within quoted values, commas MAY be escaped, but are not required
to be. Any other escape sequence is an error. (default: "")
--git-gc <string>, $GIT_SYNC_GIT_GC
The git garbage collection behavior: one of 'auto', 'always',
'aggressive', or 'off'. (default: auto)
- auto: Run "git gc --auto" once per successful sync. This mode
respects git's gc.* config params.
- always: Run "git gc" once per successful sync.
- aggressive: Run "git gc --aggressive" once per successful sync.
This mode can be slow and may require a longer --sync-timeout value.
- off: Disable explicit git garbage collection, which may be a good
fit when also using --one-time.
-h, --help
Print help text and exit.

View File

@ -475,7 +475,7 @@ function e2e::worktree_cleanup() {
--branch="$MAIN_BRANCH" \
--rev=HEAD \
--root="$ROOT" \
--dest="link" \
--link="link" \
>> "$1" 2>&1 &
# wait for first sync
@ -1871,11 +1871,95 @@ function e2e::github_https() {
--branch=master \
--rev=HEAD \
--root="$ROOT" \
--dest="link" \
--link="link" \
>> "$1" 2>&1
assert_file_exists "$ROOT"/link/LICENSE
}
##############################################
# Test git-gc=auto
##############################################
function e2e::gc_auto() {
echo "$FUNCNAME" > "$REPO"/file
git -C "$REPO" commit -qam "$FUNCNAME"
GIT_SYNC \
--one-time \
--repo="file://$REPO" \
--branch="$MAIN_BRANCH" \
--rev=HEAD \
--root="$ROOT" \
--link="link" \
--git-gc="auto" \
>> "$1" 2>&1
assert_link_exists "$ROOT"/link
assert_file_exists "$ROOT"/link/file
assert_file_eq "$ROOT"/link/file "$FUNCNAME"
}
##############################################
# Test git-gc=always
##############################################
function e2e::gc_always() {
echo "$FUNCNAME" > "$REPO"/file
git -C "$REPO" commit -qam "$FUNCNAME"
GIT_SYNC \
--one-time \
--repo="file://$REPO" \
--branch="$MAIN_BRANCH" \
--rev=HEAD \
--root="$ROOT" \
--link="link" \
--git-gc="always" \
>> "$1" 2>&1
assert_link_exists "$ROOT"/link
assert_file_exists "$ROOT"/link/file
assert_file_eq "$ROOT"/link/file "$FUNCNAME"
}
##############################################
# Test git-gc=aggressive
##############################################
function e2e::gc_aggressive() {
echo "$FUNCNAME" > "$REPO"/file
git -C "$REPO" commit -qam "$FUNCNAME"
GIT_SYNC \
--one-time \
--repo="file://$REPO" \
--branch="$MAIN_BRANCH" \
--rev=HEAD \
--root="$ROOT" \
--link="link" \
--git-gc="aggressive" \
>> "$1" 2>&1
assert_link_exists "$ROOT"/link
assert_file_exists "$ROOT"/link/file
assert_file_eq "$ROOT"/link/file "$FUNCNAME"
}
##############################################
# Test git-gc=off
##############################################
function e2e::gc_off() {
echo "$FUNCNAME" > "$REPO"/file
git -C "$REPO" commit -qam "$FUNCNAME"
GIT_SYNC \
--one-time \
--repo="file://$REPO" \
--branch="$MAIN_BRANCH" \
--rev=HEAD \
--root="$ROOT" \
--link="link" \
--git-gc="off" \
>> "$1" 2>&1
assert_link_exists "$ROOT"/link
assert_file_exists "$ROOT"/link/file
assert_file_eq "$ROOT"/link/file "$FUNCNAME"
}
#
# main
#
@ -1894,35 +1978,53 @@ function list_tests() {
}
# Figure out which, if any, tests to run.
tests=($(list_tests))
all_tests=($(list_tests))
tests_to_run=()
function print_tests() {
echo "available tests:"
for t in "${tests[@]}"; do
for t in "${all_tests[@]}"; do
echo " $t"
done
}
for t; do
# Validate and accumulate tests to run if args are specified.
for arg; do
# Use -? to list known tests.
if [[ "${t}" == "-?" ]]; then
if [[ "${arg}" == "-?" ]]; then
print_tests
exit 0
fi
# Make sure we know this test.
if [[ " ${tests[*]} " =~ " ${t} " ]]; then
continue
if [[ "${arg}" =~ ^- ]]; then
echo "ERROR: unknown flag '${arg}'"
exit 1
fi
# Not a known test or flag.
echo "ERROR: unknown test or flag: '${t}'"
echo
print_tests
exit 1
# Make sure each non-flag arg matches at least one test.
nmatches=0
for t in "${all_tests[@]}"; do
if [[ "${t}" =~ ${arg} ]]; then
nmatches=$((nmatches+1))
# Don't run tests twice, just keep the first match.
if [[ " ${tests_to_run[*]} " =~ " ${t} " ]]; then
continue
fi
tests_to_run+=("${t}")
continue
fi
done
if [[ ${nmatches} == 0 ]]; then
echo "ERROR: no tests match pattern '${arg}'"
echo
print_tests
exit 1
fi
tests_to_run+=("${matches[@]}")
done
set -- "${tests_to_run[@]}"
# If no tests specified, run them all.
# If no tests were specified, run them all.
if [[ "$#" == 0 ]]; then
set -- "${tests[@]}"
set -- "${all_tests[@]}"
fi
# Build it