mirror of https://github.com/kubernetes/kops.git
GCE: Fix race around route deletion
Because the control-plane can recreate routes, there's a race between deleting instances and deleting routes. Add a dependency so we don't try to delete routes until after we've deleted all the instances.
This commit is contained in:
parent
3e683ca9e8
commit
cfd4e91a2c
|
@ -15,7 +15,6 @@ go_library(
|
||||||
"//upup/pkg/fi/cloudup/gce:go_default_library",
|
"//upup/pkg/fi/cloudup/gce:go_default_library",
|
||||||
"//vendor/google.golang.org/api/compute/v1:go_default_library",
|
"//vendor/google.golang.org/api/compute/v1:go_default_library",
|
||||||
"//vendor/google.golang.org/api/dns/v1:go_default_library",
|
"//vendor/google.golang.org/api/dns/v1:go_default_library",
|
||||||
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
|
|
||||||
"//vendor/k8s.io/klog/v2:go_default_library",
|
"//vendor/k8s.io/klog/v2:go_default_library",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
|
@ -23,7 +23,6 @@ import (
|
||||||
|
|
||||||
compute "google.golang.org/api/compute/v1"
|
compute "google.golang.org/api/compute/v1"
|
||||||
clouddns "google.golang.org/api/dns/v1"
|
clouddns "google.golang.org/api/dns/v1"
|
||||||
"k8s.io/apimachinery/pkg/util/sets"
|
|
||||||
"k8s.io/klog/v2"
|
"k8s.io/klog/v2"
|
||||||
"k8s.io/kops/pkg/dns"
|
"k8s.io/kops/pkg/dns"
|
||||||
"k8s.io/kops/pkg/resources"
|
"k8s.io/kops/pkg/resources"
|
||||||
|
@ -57,6 +56,8 @@ const maxPrefixTokens = 5
|
||||||
const maxGCERouteNameLength = 63
|
const maxGCERouteNameLength = 63
|
||||||
|
|
||||||
func ListResourcesGCE(gceCloud gce.GCECloud, clusterName string, region string) (map[string]*resources.Resource, error) {
|
func ListResourcesGCE(gceCloud gce.GCECloud, clusterName string, region string) (map[string]*resources.Resource, error) {
|
||||||
|
ctx := context.TODO()
|
||||||
|
|
||||||
if region == "" {
|
if region == "" {
|
||||||
region = gceCloud.Region()
|
region = gceCloud.Region()
|
||||||
}
|
}
|
||||||
|
@ -71,7 +72,7 @@ func ListResourcesGCE(gceCloud gce.GCECloud, clusterName string, region string)
|
||||||
|
|
||||||
{
|
{
|
||||||
// TODO: Only zones in api.Cluster object, if we have one?
|
// TODO: Only zones in api.Cluster object, if we have one?
|
||||||
gceZones, err := d.gceCloud.Compute().Zones().List(context.Background(), d.gceCloud.Project())
|
gceZones, err := d.gceCloud.Compute().Zones().List(ctx, d.gceCloud.Project())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("error listing zones: %v", err)
|
return nil, fmt.Errorf("error listing zones: %v", err)
|
||||||
}
|
}
|
||||||
|
@ -116,10 +117,8 @@ func ListResourcesGCE(gceCloud gce.GCECloud, clusterName string, region string)
|
||||||
}
|
}
|
||||||
|
|
||||||
// We try to clean up orphaned routes.
|
// We try to clean up orphaned routes.
|
||||||
// Technically we still have a race condition here - until the master(s) are terminated, they will keep
|
|
||||||
// creating routes. Another option might be to have a post-destroy cleanup, and only remove routes with no target.
|
|
||||||
{
|
{
|
||||||
resourceTrackers, err := d.listRoutes(resources)
|
resourceTrackers, err := d.listRoutes(ctx, resources)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -557,20 +556,18 @@ func deleteFirewallRule(cloud fi.Cloud, r *resources.Resource) error {
|
||||||
return c.WaitForOp(op)
|
return c.WaitForOp(op)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *clusterDiscoveryGCE) listRoutes(resourceMap map[string]*resources.Resource) ([]*resources.Resource, error) {
|
func (d *clusterDiscoveryGCE) listRoutes(ctx context.Context, resourceMap map[string]*resources.Resource) ([]*resources.Resource, error) {
|
||||||
c := d.gceCloud
|
c := d.gceCloud
|
||||||
|
|
||||||
var resourceTrackers []*resources.Resource
|
var resourceTrackers []*resources.Resource
|
||||||
|
|
||||||
instances := sets.NewString()
|
instancesToDelete := make(map[string]*resources.Resource)
|
||||||
for _, resource := range resourceMap {
|
for _, resource := range resourceMap {
|
||||||
if resource.Type == typeInstance {
|
if resource.Type == typeInstance {
|
||||||
instances.Insert(resource.ID)
|
instancesToDelete[resource.ID] = resource
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx := context.Background()
|
|
||||||
|
|
||||||
// TODO: Push-down prefix?
|
// TODO: Push-down prefix?
|
||||||
routes, err := c.Compute().Routes().List(ctx, c.Project())
|
routes, err := c.Compute().Routes().List(ctx, c.Project())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -596,7 +593,7 @@ func (d *clusterDiscoveryGCE) listRoutes(resourceMap map[string]*resources.Resou
|
||||||
klog.Warningf("error parsing URL for NextHopInstance=%q", r.NextHopInstance)
|
klog.Warningf("error parsing URL for NextHopInstance=%q", r.NextHopInstance)
|
||||||
}
|
}
|
||||||
|
|
||||||
if instances.Has(u.Zone + "/" + u.Name) {
|
if instancesToDelete[u.Zone+"/"+u.Name] != nil {
|
||||||
remove = true
|
remove = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -610,10 +607,11 @@ func (d *clusterDiscoveryGCE) listRoutes(resourceMap map[string]*resources.Resou
|
||||||
Obj: r,
|
Obj: r,
|
||||||
}
|
}
|
||||||
|
|
||||||
// We don't need to block
|
// To avoid race conditions where the control-plane re-adds the routes, we delete routes
|
||||||
//if r.NextHopInstance != "" {
|
// only after we have deleted all the instances.
|
||||||
// resourceTracker.Blocked = append(resourceTracker.Blocks, typeInstance+":"+gce.LastComponent(r.NextHopInstance))
|
for _, instance := range instancesToDelete {
|
||||||
//}
|
resourceTracker.Blocked = append(resourceTracker.Blocked, typeInstance+":"+instance.ID)
|
||||||
|
}
|
||||||
|
|
||||||
klog.V(4).Infof("Found resource: %s", r.SelfLink)
|
klog.V(4).Infof("Found resource: %s", r.SelfLink)
|
||||||
resourceTrackers = append(resourceTrackers, resourceTracker)
|
resourceTrackers = append(resourceTrackers, resourceTracker)
|
||||||
|
|
Loading…
Reference in New Issue