gce: try to avoid concurrent IAM project operations

We set up a process-wide table of mutexes, to avoid concurrent IAM
operations on GCE projects.  Best-effort is reasonable here, we will
retry, but avoiding concurrent operations just avoids logspam and a
needless retry from self-conflicts.
This commit is contained in:
justinsb 2022-12-30 10:55:40 -05:00
parent b07168c985
commit f016c396ec
3 changed files with 79 additions and 0 deletions

View File

@ -0,0 +1,68 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package mutexes
import (
"sync"
)
var InProcess LocalMutexes
// LocalMutexes is a store of named mutexes, used to avoid concurrent local operations.
// For example, GCE project IAM mutation uses a read / conditional-write approach,
// so we try to avoid making two local concurrent calls to the same project.
type LocalMutexes struct {
mutex sync.Mutex
mutexes map[string]*localMutex
}
func (m *LocalMutexes) Get(key string) LocalMutex {
m.mutex.Lock()
defer m.mutex.Unlock()
if m.mutexes == nil {
m.mutexes = make(map[string]*localMutex)
}
l := m.mutexes[key]
if l == nil {
l = &localMutex{}
m.mutexes[key] = l
}
return l
}
// LocalMutex is the interface for local mutexes.
type LocalMutex interface {
Lock()
Unlock()
}
// localMutex implements LocalMutex.
type localMutex struct {
m sync.Mutex
}
// Lock implements LocalMutex.
func (m *localMutex) Lock() {
m.m.Lock()
}
// Unlock implements LocalMutex.
func (m *localMutex) Unlock() {
m.m.Unlock()
}

View File

@ -33,6 +33,7 @@ import (
"k8s.io/kops/dnsprovider/pkg/dnsprovider" "k8s.io/kops/dnsprovider/pkg/dnsprovider"
"k8s.io/kops/dnsprovider/pkg/dnsprovider/providers/google/clouddns" "k8s.io/kops/dnsprovider/pkg/dnsprovider/providers/google/clouddns"
"k8s.io/kops/pkg/apis/kops" "k8s.io/kops/pkg/apis/kops"
"k8s.io/kops/pkg/mutexes"
"k8s.io/kops/upup/pkg/fi" "k8s.io/kops/upup/pkg/fi"
"k8s.io/kops/upup/pkg/fi/cloudup/gce/gcemetadata" "k8s.io/kops/upup/pkg/fi/cloudup/gce/gcemetadata"
) )
@ -55,6 +56,11 @@ type GCECloud interface {
CloudResourceManager() *cloudresourcemanager.Service CloudResourceManager() *cloudresourcemanager.Service
} }
// MutexForProjectIAM returns a mutex to prevent local concurrent operations on project IAM.
func MutexForProjectIAM(projectID string) mutexes.LocalMutex {
return mutexes.InProcess.Get("iam/projects/" + projectID)
}
type gceCloudImplementation struct { type gceCloudImplementation struct {
compute *computeClientImpl compute *computeClientImpl
storage *storage.Service storage *storage.Service

View File

@ -104,6 +104,11 @@ func (_ *ProjectIAMBinding) RenderGCE(t *gce.GCEAPITarget, a, e, changes *Projec
member := fi.ValueOf(e.Member) member := fi.ValueOf(e.Member)
role := fi.ValueOf(e.Role) role := fi.ValueOf(e.Role)
// Avoid concurrent operations
localMutex := gce.MutexForProjectIAM(projectID)
localMutex.Lock()
defer localMutex.Unlock()
request := &cloudresourcemanager.GetIamPolicyRequest{} request := &cloudresourcemanager.GetIamPolicyRequest{}
policy, err := t.Cloud.CloudResourceManager().Projects.GetIamPolicy(projectID, request).Context(ctx).Do() policy, err := t.Cloud.CloudResourceManager().Projects.GetIamPolicy(projectID, request).Context(ctx).Do()
if err != nil { if err != nil {