mirror of https://github.com/knative/pkg.git
Add cluster deletion funciton (#674)
* Add cluster deletion funciton * Apply suggestions from code review Co-Authored-By: Adriano Cunha <35786489+adrcunha@users.noreply.github.com>
This commit is contained in:
parent
1bea34570b
commit
55634011c4
|
|
@ -23,16 +23,32 @@ import (
|
|||
"knative.dev/pkg/testutils/clustermanager/boskos"
|
||||
)
|
||||
|
||||
const (
|
||||
fakeOwner = "fake-owner"
|
||||
)
|
||||
|
||||
// FakeBoskosClient implements boskos.Operation
|
||||
type FakeBoskosClient struct {
|
||||
resources []*boskoscommon.Resource
|
||||
}
|
||||
|
||||
func (c *FakeBoskosClient) getOwner(host *string) string {
|
||||
if nil == host {
|
||||
return fakeOwner
|
||||
}
|
||||
return *host
|
||||
}
|
||||
|
||||
func (c *FakeBoskosClient) GetResources() []*boskoscommon.Resource {
|
||||
return c.resources
|
||||
}
|
||||
|
||||
// AcquireGKEProject fakes to be no op
|
||||
func (c *FakeBoskosClient) AcquireGKEProject(host *string) (*boskoscommon.Resource, error) {
|
||||
for _, res := range c.resources {
|
||||
if res.State == boskoscommon.Free {
|
||||
res.State = boskoscommon.Busy
|
||||
res.Owner = c.getOwner(host)
|
||||
return res, nil
|
||||
}
|
||||
}
|
||||
|
|
@ -41,18 +57,15 @@ func (c *FakeBoskosClient) AcquireGKEProject(host *string) (*boskoscommon.Resour
|
|||
|
||||
// ReleaseGKEProject fakes to be no op
|
||||
func (c *FakeBoskosClient) ReleaseGKEProject(host *string, name string) error {
|
||||
if nil == host {
|
||||
return fmt.Errorf("host has to be set")
|
||||
}
|
||||
|
||||
owner := c.getOwner(host)
|
||||
for _, res := range c.resources {
|
||||
if res.Name == name {
|
||||
if res.Owner == *host {
|
||||
if res.Owner == owner {
|
||||
res.Owner = ""
|
||||
res.State = boskoscommon.Free
|
||||
return nil
|
||||
} else {
|
||||
return fmt.Errorf("Got owner: '%s', expect owner: '%s'", res.Owner, *host)
|
||||
return fmt.Errorf("Got owner: '%s', expect owner: '%s'", res.Owner, owner)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,4 +26,5 @@ type ClusterOperations interface {
|
|||
Provider() string
|
||||
Initialize() error
|
||||
Acquire() error
|
||||
Delete() error
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ var (
|
|||
protectedClusters = []string{"knative-prow"}
|
||||
// These are arbitrary numbers determined based on past experience
|
||||
creationTimeout = 20 * time.Minute
|
||||
deletionTimeout = 10 * time.Minute
|
||||
)
|
||||
|
||||
// GKEClient implements Client
|
||||
|
|
@ -77,6 +78,7 @@ type GKECluster struct {
|
|||
// GKESDKOperations wraps GKE SDK related functions
|
||||
type GKESDKOperations interface {
|
||||
create(string, string, *container.CreateClusterRequest) (*container.Operation, error)
|
||||
delete(string, string, string) (*container.Operation, error)
|
||||
get(string, string, string) (*container.Cluster, error)
|
||||
getOperation(string, string, string) (*container.Operation, error)
|
||||
}
|
||||
|
|
@ -91,6 +93,12 @@ func (gsc *GKESDKClient) create(project, location string, rb *container.CreateCl
|
|||
return gsc.Projects.Locations.Clusters.Create(parent, rb).Context(context.Background()).Do()
|
||||
}
|
||||
|
||||
// delete deletes GKE cluster and waits until completion
|
||||
func (gsc *GKESDKClient) delete(project, clusterName, location string) (*container.Operation, error) {
|
||||
parent := fmt.Sprintf("projects/%s/locations/%s/clusters/%s", project, location, clusterName)
|
||||
return gsc.Projects.Locations.Clusters.Delete(parent).Context(context.Background()).Do()
|
||||
}
|
||||
|
||||
func (gsc *GKESDKClient) get(project, location, cluster string) (*container.Cluster, error) {
|
||||
clusterFullPath := fmt.Sprintf("projects/%s/locations/%s/clusters/%s", project, location, cluster)
|
||||
return gsc.Projects.Locations.Clusters.Get(clusterFullPath).Context(context.Background()).Do()
|
||||
|
|
@ -222,7 +230,10 @@ func (gc *GKECluster) Acquire() error {
|
|||
}
|
||||
}
|
||||
var cluster *container.Cluster
|
||||
var op *container.Operation
|
||||
for i, region := range regions {
|
||||
// Restore innocence
|
||||
err = nil
|
||||
rb := &container.CreateClusterRequest{
|
||||
Cluster: &container.Cluster{
|
||||
Name: clusterName,
|
||||
|
|
@ -235,24 +246,36 @@ func (gc *GKECluster) Acquire() error {
|
|||
}
|
||||
|
||||
clusterLoc := getClusterLocation(region, gc.Request.Zone)
|
||||
// TODO(chaodaiG): add deleting logic once cluster deletion logic is done
|
||||
|
||||
log.Printf("Creating cluster %q' in %q", clusterName, clusterLoc)
|
||||
var createOp *container.Operation
|
||||
createOp, err = gc.operations.create(*gc.Project, clusterLoc, rb)
|
||||
// Deleting cluster if it already exists
|
||||
existingCluster, _ := gc.operations.get(*gc.Project, clusterLoc, clusterName)
|
||||
if nil != existingCluster {
|
||||
log.Printf("Cluster %q already exists in %q. Deleting...", clusterName, clusterLoc)
|
||||
op, err = gc.operations.delete(*gc.Project, clusterName, clusterLoc)
|
||||
if nil == err {
|
||||
err = gc.wait(clusterLoc, op.Name, deletionTimeout)
|
||||
}
|
||||
}
|
||||
// Creating cluster only if previous step succeeded
|
||||
if nil == err {
|
||||
if err = gc.wait(clusterLoc, createOp.Name, creationTimeout); nil == err {
|
||||
cluster, err = gc.operations.get(*gc.Project, clusterLoc, rb.Cluster.Name)
|
||||
log.Printf("Creating cluster %q in %q", clusterName, clusterLoc)
|
||||
op, err = gc.operations.create(*gc.Project, clusterLoc, rb)
|
||||
if nil == err {
|
||||
if err = gc.wait(clusterLoc, op.Name, creationTimeout); nil == err {
|
||||
cluster, err = gc.operations.get(*gc.Project, clusterLoc, rb.Cluster.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
if nil != err {
|
||||
errMsg := fmt.Sprintf("error creating cluster: '%v'", err)
|
||||
errMsg := fmt.Sprintf("Error during cluster creation: '%v'. ", err)
|
||||
if gc.NeedCleanup { // Delete half created cluster if it's user created
|
||||
// TODO(chaodaiG): add this part when deletion logic is done
|
||||
errMsg = fmt.Sprintf("%sDeleting cluster %q in %q in background...\n", errMsg, clusterName, clusterLoc)
|
||||
go gc.operations.delete(*gc.Project, clusterName, clusterLoc)
|
||||
}
|
||||
// Retry another region if cluster creation failed.
|
||||
// TODO(chaodaiG): catch specific errors as we know what the error look like for stockout etc.
|
||||
if len(regions) != i+1 {
|
||||
errMsg = fmt.Sprintf("%s. Retry another region '%s' for cluster creation", errMsg, regions[i+1])
|
||||
errMsg = fmt.Sprintf("%sRetry another region %q for cluster creation", errMsg, regions[i+1])
|
||||
}
|
||||
log.Printf(errMsg)
|
||||
} else {
|
||||
|
|
@ -265,6 +288,39 @@ func (gc *GKECluster) Acquire() error {
|
|||
return err
|
||||
}
|
||||
|
||||
// Delete takes care of GKE cluster resource cleanup. It only release Boskos resource if running in
|
||||
// Prow, otherwise deletes the cluster if marked NeedsCleanup
|
||||
func (gc *GKECluster) Delete() error {
|
||||
gc.ensureProtected()
|
||||
// Release Boskos if running in Prow, will let Janitor taking care of
|
||||
// clusters deleting
|
||||
if common.IsProw() {
|
||||
log.Printf("Releasing Boskos resource: '%v'", *gc.Project)
|
||||
return gc.boskosOps.ReleaseGKEProject(nil, *gc.Project)
|
||||
}
|
||||
|
||||
// NeedCleanup is only true if running locally and cluster created by the
|
||||
// process
|
||||
if !gc.NeedCleanup {
|
||||
return nil
|
||||
}
|
||||
// Should only get here if running locally and cluster created by this
|
||||
// client, so at this moment cluster should have been set
|
||||
if nil == gc.Cluster {
|
||||
return fmt.Errorf("cluster doesn't exist")
|
||||
}
|
||||
|
||||
log.Printf("Deleting cluster %q in %q", gc.Cluster.Name, gc.Cluster.Location)
|
||||
op, err := gc.operations.delete(*gc.Project, gc.Cluster.Name, gc.Cluster.Location)
|
||||
if nil == err {
|
||||
err = gc.wait(gc.Cluster.Location, op.Name, deletionTimeout)
|
||||
}
|
||||
if nil != err {
|
||||
return fmt.Errorf("failed deleting cluster: '%v'", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// wait depends on unique opName(operation ID created by cloud), and waits until
|
||||
// it's done
|
||||
func (gc *GKECluster) wait(location, opName string, wait time.Duration) error {
|
||||
|
|
|
|||
|
|
@ -17,16 +17,19 @@ limitations under the License.
|
|||
package clustermanager
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"google.golang.org/api/container/v1"
|
||||
|
||||
boskoscommon "k8s.io/test-infra/boskos/common"
|
||||
boskosFake "knative.dev/pkg/testutils/clustermanager/boskos/fake"
|
||||
"knative.dev/pkg/testutils/common"
|
||||
)
|
||||
|
|
@ -67,7 +70,7 @@ func newFakeGKESDKClient() *FakeGKESDKClient {
|
|||
// fgsc.opStatus by fgsc.opStatus[string(fgsc.opNumber+1)]="PENDING" to make the
|
||||
// next operation pending
|
||||
func (fgsc *FakeGKESDKClient) newOp() *container.Operation {
|
||||
opName := string(fgsc.opNumber)
|
||||
opName := strconv.Itoa(fgsc.opNumber)
|
||||
op := &container.Operation{
|
||||
Name: opName,
|
||||
Status: "DONE",
|
||||
|
|
@ -87,7 +90,7 @@ func (fgsc *FakeGKESDKClient) create(project, location string, rb *container.Cre
|
|||
if cls, ok := fgsc.clusters[parent]; ok {
|
||||
for _, cl := range cls {
|
||||
if cl.Name == name {
|
||||
return nil, fmt.Errorf("cluster already exist")
|
||||
return nil, errors.New("cluster already exist")
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
|
@ -103,6 +106,24 @@ func (fgsc *FakeGKESDKClient) create(project, location string, rb *container.Cre
|
|||
return fgsc.newOp(), nil
|
||||
}
|
||||
|
||||
func (fgsc *FakeGKESDKClient) delete(project, clusterName, location string) (*container.Operation, error) {
|
||||
parent := fmt.Sprintf("projects/%s/locations/%s", project, location)
|
||||
found := -1
|
||||
if clusters, ok := fgsc.clusters[parent]; ok {
|
||||
for i, cluster := range clusters {
|
||||
if cluster.Name == clusterName {
|
||||
found = i
|
||||
}
|
||||
}
|
||||
}
|
||||
if found == -1 {
|
||||
return nil, fmt.Errorf("cluster %q not found for deletion", clusterName)
|
||||
}
|
||||
// Delete this cluster
|
||||
fgsc.clusters[parent] = append(fgsc.clusters[parent][:found], fgsc.clusters[parent][found+1:]...)
|
||||
return fgsc.newOp(), nil
|
||||
}
|
||||
|
||||
func (fgsc *FakeGKESDKClient) get(project, location, cluster string) (*container.Cluster, error) {
|
||||
parent := fmt.Sprintf("projects/%s/locations/%s", project, location)
|
||||
if cls, ok := fgsc.clusters[parent]; ok {
|
||||
|
|
@ -478,6 +499,7 @@ func TestAcquire(t *testing.T) {
|
|||
fakeBuildID := "1234"
|
||||
datas := []struct {
|
||||
existCluster *container.Cluster
|
||||
kubeconfigSet bool
|
||||
nextOpStatus []string
|
||||
expClusterName string
|
||||
expClusterLocation string
|
||||
|
|
@ -488,20 +510,33 @@ func TestAcquire(t *testing.T) {
|
|||
&container.Cluster{
|
||||
Name: "customcluster",
|
||||
Location: "us-central1",
|
||||
}, []string{}, "customcluster", "us-central1", nil,
|
||||
}, true, []string{}, "customcluster", "us-central1", nil,
|
||||
}, {
|
||||
// cluster exists but not set in kubeconfig, cluster will be deleted
|
||||
// then created
|
||||
&container.Cluster{
|
||||
Name: fakeClusterName,
|
||||
Location: "us-central1",
|
||||
}, false, []string{}, fakeClusterName, "us-central1", nil,
|
||||
}, {
|
||||
// cluster exists but not set in kubeconfig, cluster deletion
|
||||
// failed, will recreate in us-west1
|
||||
&container.Cluster{
|
||||
Name: fakeClusterName,
|
||||
Location: "us-central1",
|
||||
}, false, []string{"BAD"}, fakeClusterName, "us-west1", nil,
|
||||
}, {
|
||||
// cluster creation succeeded
|
||||
nil, []string{}, fakeClusterName, "us-central1", nil,
|
||||
nil, false, []string{}, fakeClusterName, "us-central1", nil,
|
||||
}, {
|
||||
// cluster creation succeeded retry
|
||||
nil, []string{"PENDING"}, fakeClusterName, "us-west1", nil,
|
||||
nil, false, []string{"PENDING"}, fakeClusterName, "us-west1", nil,
|
||||
}, {
|
||||
// cluster creation failed all retry
|
||||
nil, []string{"PENDING", "PENDING", "PENDING"},
|
||||
"", "", fmt.Errorf("timed out waiting"),
|
||||
nil, false, []string{"PENDING", "PENDING", "PENDING"}, "", "", fmt.Errorf("timed out waiting"),
|
||||
}, {
|
||||
// cluster creation went bad state
|
||||
nil, []string{"BAD", "BAD", "BAD"}, "", "", fmt.Errorf("unexpected operation status: %q", "BAD"),
|
||||
nil, false, []string{"BAD", "BAD", "BAD"}, "", "", fmt.Errorf("unexpected operation status: %q", "BAD"),
|
||||
},
|
||||
}
|
||||
|
||||
|
|
@ -528,12 +563,22 @@ func TestAcquire(t *testing.T) {
|
|||
return oldFunc(key)
|
||||
}
|
||||
fgc := setupFakeGKECluster()
|
||||
opCount := 0
|
||||
if nil != data.existCluster {
|
||||
fgc.Cluster = data.existCluster
|
||||
opCount++
|
||||
fgc.operations.create(fakeProj, data.existCluster.Location, &container.CreateClusterRequest{
|
||||
Cluster: &container.Cluster{
|
||||
Name: data.existCluster.Name,
|
||||
},
|
||||
ProjectId: fakeProj,
|
||||
})
|
||||
if data.kubeconfigSet {
|
||||
fgc.Cluster = data.existCluster
|
||||
}
|
||||
}
|
||||
fgc.Project = &fakeProj
|
||||
for i, status := range data.nextOpStatus {
|
||||
fgc.operations.(*FakeGKESDKClient).opStatus[string(i)] = status
|
||||
fgc.operations.(*FakeGKESDKClient).opStatus[strconv.Itoa(opCount+i)] = status
|
||||
}
|
||||
|
||||
fgc.Request = &GKERequest{
|
||||
|
|
@ -543,6 +588,9 @@ func TestAcquire(t *testing.T) {
|
|||
Zone: "",
|
||||
BackupRegions: DefaultGKEBackupRegions,
|
||||
}
|
||||
// Set NeedCleanup to false for easier testing, as it launches a
|
||||
// goroutine
|
||||
fgc.NeedCleanup = false
|
||||
err := fgc.Acquire()
|
||||
var gotName, gotLocation string
|
||||
if nil != fgc.Cluster {
|
||||
|
|
@ -555,3 +603,129 @@ func TestAcquire(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDelete(t *testing.T) {
|
||||
datas := []struct {
|
||||
isProw bool
|
||||
needCleanup bool
|
||||
boskosState []*boskoscommon.Resource
|
||||
cluster *container.Cluster
|
||||
expBoskos []*boskoscommon.Resource
|
||||
expCluster *container.Cluster
|
||||
expErr error
|
||||
}{
|
||||
{
|
||||
// Not in prow, NeedCleanup is false
|
||||
false,
|
||||
false,
|
||||
[]*boskoscommon.Resource{},
|
||||
&container.Cluster{
|
||||
Name: "customcluster",
|
||||
Location: "us-central1",
|
||||
},
|
||||
nil,
|
||||
&container.Cluster{
|
||||
Name: "customcluster",
|
||||
Location: "us-central1",
|
||||
Status: "RUNNING",
|
||||
},
|
||||
nil,
|
||||
}, {
|
||||
// Not in prow, NeedCleanup is true
|
||||
false,
|
||||
true,
|
||||
[]*boskoscommon.Resource{},
|
||||
&container.Cluster{
|
||||
Name: "customcluster",
|
||||
Location: "us-central1",
|
||||
},
|
||||
nil,
|
||||
nil,
|
||||
nil,
|
||||
}, {
|
||||
// Not in prow, NeedCleanup is true, but cluster doesn't exist
|
||||
false,
|
||||
true,
|
||||
[]*boskoscommon.Resource{},
|
||||
nil,
|
||||
nil,
|
||||
nil,
|
||||
fmt.Errorf("cluster doesn't exist"),
|
||||
}, {
|
||||
// In prow, only need to release boskos
|
||||
true,
|
||||
true,
|
||||
[]*boskoscommon.Resource{&boskoscommon.Resource{
|
||||
Name: fakeProj,
|
||||
}},
|
||||
&container.Cluster{
|
||||
Name: "customcluster",
|
||||
Location: "us-central1",
|
||||
},
|
||||
[]*boskoscommon.Resource{&boskoscommon.Resource{
|
||||
Type: "gke-project",
|
||||
Name: fakeProj,
|
||||
State: boskoscommon.Free,
|
||||
}},
|
||||
&container.Cluster{
|
||||
Name: "customcluster",
|
||||
Location: "us-central1",
|
||||
Status: "RUNNING",
|
||||
},
|
||||
nil,
|
||||
},
|
||||
}
|
||||
|
||||
// mock GetOSEnv for testing
|
||||
oldFunc := common.GetOSEnv
|
||||
// mock timeout so it doesn't run forever
|
||||
oldTimeout := creationTimeout
|
||||
creationTimeout = 100 * time.Millisecond
|
||||
defer func() {
|
||||
// restore
|
||||
common.GetOSEnv = oldFunc
|
||||
creationTimeout = oldTimeout
|
||||
}()
|
||||
|
||||
for _, data := range datas {
|
||||
common.GetOSEnv = func(key string) string {
|
||||
switch key {
|
||||
case "PROW_JOB_ID": // needed to mock IsProw()
|
||||
if data.isProw {
|
||||
return "fake_job_id"
|
||||
}
|
||||
return ""
|
||||
}
|
||||
return oldFunc(key)
|
||||
}
|
||||
fgc := setupFakeGKECluster()
|
||||
fgc.Project = &fakeProj
|
||||
fgc.NeedCleanup = data.needCleanup
|
||||
if nil != data.cluster {
|
||||
fgc.operations.create(fakeProj, data.cluster.Location, &container.CreateClusterRequest{
|
||||
Cluster: &container.Cluster{
|
||||
Name: data.cluster.Name,
|
||||
},
|
||||
ProjectId: fakeProj,
|
||||
})
|
||||
fgc.Cluster = data.cluster
|
||||
}
|
||||
// Set up fake boskos
|
||||
for _, bos := range data.boskosState {
|
||||
fgc.boskosOps.(*boskosFake.FakeBoskosClient).NewGKEProject(bos.Name)
|
||||
// Acquire with default user
|
||||
fgc.boskosOps.(*boskosFake.FakeBoskosClient).AcquireGKEProject(nil)
|
||||
}
|
||||
|
||||
err := fgc.Delete()
|
||||
var clusterGot *container.Cluster
|
||||
if nil != data.cluster {
|
||||
clusterGot, _ = fgc.operations.get(fakeProj, data.cluster.Location, data.cluster.Name)
|
||||
}
|
||||
gotBoskos := fgc.boskosOps.(*boskosFake.FakeBoskosClient).GetResources()
|
||||
if !reflect.DeepEqual(err, data.expErr) || !reflect.DeepEqual(clusterGot, data.expCluster) || !reflect.DeepEqual(gotBoskos, data.expBoskos) {
|
||||
t.Errorf("testing deleting cluster, with:\n\tIs Prow: '%v'\n\texisting cluster: '%v'\n\tboskos state: '%v'\nwant: boskos - '%v', cluster - '%v', err - '%v'\ngot: boskos - '%v', cluster - '%v', err - '%v'",
|
||||
data.isProw, data.cluster, data.boskosState, data.expBoskos, data.expCluster, data.expErr, nil, clusterGot, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue