From 4a2fe15be71a3fbe42f9d40bafd788304ed51250 Mon Sep 17 00:00:00 2001 From: "Dr. Stefan Schimanski" Date: Thu, 2 Feb 2017 10:08:41 +0100 Subject: [PATCH] Mechanical move: pkg/storage -> k8s.io/apiserver --- pkg/storage/errors/BUILD | 18 + pkg/storage/errors/doc.go | 18 + pkg/storage/errors/storage.go | 108 ++++ pkg/storage/etcd/OWNERS | 24 + pkg/storage/etcd/api_object_versioner.go | 98 +++ pkg/storage/etcd/api_object_versioner_test.go | 58 ++ pkg/storage/etcd/doc.go | 17 + pkg/storage/etcd/etcd_helper.go | 610 ++++++++++++++++++ pkg/storage/etcd/etcd_helper_test.go | 567 ++++++++++++++++ pkg/storage/etcd/etcd_watcher.go | 487 ++++++++++++++ pkg/storage/etcd/etcd_watcher_test.go | 562 ++++++++++++++++ pkg/storage/etcd/etcdtest/doc.go | 17 + pkg/storage/etcd/etcdtest/etcdtest.go | 39 ++ .../etcd/testing/testingcert/certificates.go | 113 ++++ pkg/storage/etcd/testing/utils.go | 327 ++++++++++ pkg/storage/etcd/util/doc.go | 19 + pkg/storage/etcd/util/etcd_util.go | 99 +++ pkg/storage/etcd/util/etcd_util_test.go | 120 ++++ pkg/storage/etcd3/OWNERS | 5 + pkg/storage/etcd3/compact.go | 161 +++++ pkg/storage/etcd3/compact_test.go | 87 +++ pkg/storage/etcd3/event.go | 57 ++ pkg/storage/etcd3/store.go | 513 +++++++++++++++ pkg/storage/etcd3/store_test.go | 571 ++++++++++++++++ pkg/storage/etcd3/watcher.go | 375 +++++++++++ pkg/storage/etcd3/watcher_test.go | 375 +++++++++++ pkg/storage/storagebackend/factory/etcd2.go | 81 +++ pkg/storage/storagebackend/factory/etcd3.go | 62 ++ pkg/storage/storagebackend/factory/factory.go | 43 ++ .../storagebackend/factory/tls_test.go | 106 +++ pkg/storage/testing/OWNERS | 9 + pkg/storage/testing/types.generated.go | 423 ++++++++++++ pkg/storage/testing/types.go | 30 + pkg/storage/testing/utils.go | 61 ++ pkg/storage/tests/cacher_test.go | 579 +++++++++++++++++ pkg/storage/tests/utils.go | 32 + 36 files changed, 6871 insertions(+) create mode 100644 pkg/storage/errors/BUILD create mode 100644 pkg/storage/errors/doc.go create mode 100644 pkg/storage/errors/storage.go create mode 100755 pkg/storage/etcd/OWNERS create mode 100644 pkg/storage/etcd/api_object_versioner.go create mode 100644 pkg/storage/etcd/api_object_versioner_test.go create mode 100644 pkg/storage/etcd/doc.go create mode 100644 pkg/storage/etcd/etcd_helper.go create mode 100644 pkg/storage/etcd/etcd_helper_test.go create mode 100644 pkg/storage/etcd/etcd_watcher.go create mode 100644 pkg/storage/etcd/etcd_watcher_test.go create mode 100644 pkg/storage/etcd/etcdtest/doc.go create mode 100644 pkg/storage/etcd/etcdtest/etcdtest.go create mode 100644 pkg/storage/etcd/testing/testingcert/certificates.go create mode 100644 pkg/storage/etcd/testing/utils.go create mode 100644 pkg/storage/etcd/util/doc.go create mode 100644 pkg/storage/etcd/util/etcd_util.go create mode 100644 pkg/storage/etcd/util/etcd_util_test.go create mode 100755 pkg/storage/etcd3/OWNERS create mode 100644 pkg/storage/etcd3/compact.go create mode 100644 pkg/storage/etcd3/compact_test.go create mode 100644 pkg/storage/etcd3/event.go create mode 100644 pkg/storage/etcd3/store.go create mode 100644 pkg/storage/etcd3/store_test.go create mode 100644 pkg/storage/etcd3/watcher.go create mode 100644 pkg/storage/etcd3/watcher_test.go create mode 100644 pkg/storage/storagebackend/factory/etcd2.go create mode 100644 pkg/storage/storagebackend/factory/etcd3.go create mode 100644 pkg/storage/storagebackend/factory/factory.go create mode 100644 pkg/storage/storagebackend/factory/tls_test.go create mode 100755 pkg/storage/testing/OWNERS create mode 100644 pkg/storage/testing/types.generated.go create mode 100644 pkg/storage/testing/types.go create mode 100644 pkg/storage/testing/utils.go create mode 100644 pkg/storage/tests/cacher_test.go create mode 100644 pkg/storage/tests/utils.go diff --git a/pkg/storage/errors/BUILD b/pkg/storage/errors/BUILD new file mode 100644 index 000000000..a7403fd47 --- /dev/null +++ b/pkg/storage/errors/BUILD @@ -0,0 +1,18 @@ +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) + +load("@io_bazel_rules_go//go:def.bzl") + +filegroup( + name = "package-srcs", + srcs = glob(["**"]), + tags = ["automanaged"], + visibility = ["//visibility:private"], +) + +filegroup( + name = "all-srcs", + srcs = [":package-srcs"], + tags = ["automanaged"], +) diff --git a/pkg/storage/errors/doc.go b/pkg/storage/errors/doc.go new file mode 100644 index 000000000..3d3150c56 --- /dev/null +++ b/pkg/storage/errors/doc.go @@ -0,0 +1,18 @@ +/* +Copyright 2014 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package etcd provides conversion of etcd errors to API errors. +package storage // import "k8s.io/apiserver/pkg/storage/errors" diff --git a/pkg/storage/errors/storage.go b/pkg/storage/errors/storage.go new file mode 100644 index 000000000..84f8d8577 --- /dev/null +++ b/pkg/storage/errors/storage.go @@ -0,0 +1,108 @@ +/* +Copyright 2014 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package storage + +import ( + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apiserver/pkg/storage" +) + +// InterpretListError converts a generic error on a retrieval +// operation into the appropriate API error. +func InterpretListError(err error, qualifiedResource schema.GroupResource) error { + switch { + case storage.IsNotFound(err): + return errors.NewNotFound(qualifiedResource, "") + case storage.IsUnreachable(err): + return errors.NewServerTimeout(qualifiedResource, "list", 2) // TODO: make configurable or handled at a higher level + default: + return err + } +} + +// InterpretGetError converts a generic error on a retrieval +// operation into the appropriate API error. +func InterpretGetError(err error, qualifiedResource schema.GroupResource, name string) error { + switch { + case storage.IsNotFound(err): + return errors.NewNotFound(qualifiedResource, name) + case storage.IsUnreachable(err): + return errors.NewServerTimeout(qualifiedResource, "get", 2) // TODO: make configurable or handled at a higher level + default: + return err + } +} + +// InterpretCreateError converts a generic error on a create +// operation into the appropriate API error. +func InterpretCreateError(err error, qualifiedResource schema.GroupResource, name string) error { + switch { + case storage.IsNodeExist(err): + return errors.NewAlreadyExists(qualifiedResource, name) + case storage.IsUnreachable(err): + return errors.NewServerTimeout(qualifiedResource, "create", 2) // TODO: make configurable or handled at a higher level + default: + return err + } +} + +// InterpretUpdateError converts a generic error on an update +// operation into the appropriate API error. +func InterpretUpdateError(err error, qualifiedResource schema.GroupResource, name string) error { + switch { + case storage.IsConflict(err), storage.IsNodeExist(err), storage.IsInvalidObj(err): + return errors.NewConflict(qualifiedResource, name, err) + case storage.IsUnreachable(err): + return errors.NewServerTimeout(qualifiedResource, "update", 2) // TODO: make configurable or handled at a higher level + case storage.IsNotFound(err): + return errors.NewNotFound(qualifiedResource, name) + case storage.IsInternalError(err): + return errors.NewInternalError(err) + default: + return err + } +} + +// InterpretDeleteError converts a generic error on a delete +// operation into the appropriate API error. +func InterpretDeleteError(err error, qualifiedResource schema.GroupResource, name string) error { + switch { + case storage.IsNotFound(err): + return errors.NewNotFound(qualifiedResource, name) + case storage.IsUnreachable(err): + return errors.NewServerTimeout(qualifiedResource, "delete", 2) // TODO: make configurable or handled at a higher level + case storage.IsConflict(err), storage.IsNodeExist(err), storage.IsInvalidObj(err): + return errors.NewConflict(qualifiedResource, name, err) + case storage.IsInternalError(err): + return errors.NewInternalError(err) + default: + return err + } +} + +// InterpretWatchError converts a generic error on a watch +// operation into the appropriate API error. +func InterpretWatchError(err error, resource schema.GroupResource, name string) error { + switch { + case storage.IsInvalidError(err): + invalidError, _ := err.(storage.InvalidError) + return errors.NewInvalid(schema.GroupKind{Group: resource.Group, Kind: resource.Resource}, name, invalidError.Errs) + default: + return err + } +} diff --git a/pkg/storage/etcd/OWNERS b/pkg/storage/etcd/OWNERS new file mode 100755 index 000000000..ac5dfab00 --- /dev/null +++ b/pkg/storage/etcd/OWNERS @@ -0,0 +1,24 @@ +reviewers: +- lavalamp +- smarterclayton +- wojtek-t +- deads2k +- derekwaynecarr +- caesarxuchao +- mikedanese +- liggitt +- davidopp +- pmorie +- luxas +- janetkuo +- roberthbailey +- timstclair +- timothysc +- dims +- hongchaodeng +- krousey +- fgrzadkowski +- resouer +- pweil- +- mqliang +- feihujiang diff --git a/pkg/storage/etcd/api_object_versioner.go b/pkg/storage/etcd/api_object_versioner.go new file mode 100644 index 000000000..1480ad4cc --- /dev/null +++ b/pkg/storage/etcd/api_object_versioner.go @@ -0,0 +1,98 @@ +/* +Copyright 2014 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package etcd + +import ( + "strconv" + + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apiserver/pkg/storage" +) + +// APIObjectVersioner implements versioning and extracting etcd node information +// for objects that have an embedded ObjectMeta or ListMeta field. +type APIObjectVersioner struct{} + +// UpdateObject implements Versioner +func (a APIObjectVersioner) UpdateObject(obj runtime.Object, resourceVersion uint64) error { + accessor, err := meta.Accessor(obj) + if err != nil { + return err + } + versionString := "" + if resourceVersion != 0 { + versionString = strconv.FormatUint(resourceVersion, 10) + } + accessor.SetResourceVersion(versionString) + return nil +} + +// UpdateList implements Versioner +func (a APIObjectVersioner) UpdateList(obj runtime.Object, resourceVersion uint64) error { + listMeta, err := metav1.ListMetaFor(obj) + if err != nil || listMeta == nil { + return err + } + versionString := "" + if resourceVersion != 0 { + versionString = strconv.FormatUint(resourceVersion, 10) + } + listMeta.ResourceVersion = versionString + return nil +} + +// ObjectResourceVersion implements Versioner +func (a APIObjectVersioner) ObjectResourceVersion(obj runtime.Object) (uint64, error) { + accessor, err := meta.Accessor(obj) + if err != nil { + return 0, err + } + version := accessor.GetResourceVersion() + if len(version) == 0 { + return 0, nil + } + return strconv.ParseUint(version, 10, 64) +} + +// APIObjectVersioner implements Versioner +var Versioner storage.Versioner = APIObjectVersioner{} + +// CompareResourceVersion compares etcd resource versions. Outside this API they are all strings, +// but etcd resource versions are special, they're actually ints, so we can easily compare them. +func (a APIObjectVersioner) CompareResourceVersion(lhs, rhs runtime.Object) int { + lhsVersion, err := Versioner.ObjectResourceVersion(lhs) + if err != nil { + // coder error + panic(err) + } + rhsVersion, err := Versioner.ObjectResourceVersion(rhs) + if err != nil { + // coder error + panic(err) + } + + if lhsVersion == rhsVersion { + return 0 + } + if lhsVersion < rhsVersion { + return -1 + } + + return 1 +} diff --git a/pkg/storage/etcd/api_object_versioner_test.go b/pkg/storage/etcd/api_object_versioner_test.go new file mode 100644 index 000000000..acbce6243 --- /dev/null +++ b/pkg/storage/etcd/api_object_versioner_test.go @@ -0,0 +1,58 @@ +/* +Copyright 2014 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package etcd + +import ( + "testing" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + storagetesting "k8s.io/kubernetes/pkg/storage/testing" +) + +func TestObjectVersioner(t *testing.T) { + v := APIObjectVersioner{} + if ver, err := v.ObjectResourceVersion(&storagetesting.TestResource{ObjectMeta: metav1.ObjectMeta{ResourceVersion: "5"}}); err != nil || ver != 5 { + t.Errorf("unexpected version: %d %v", ver, err) + } + if ver, err := v.ObjectResourceVersion(&storagetesting.TestResource{ObjectMeta: metav1.ObjectMeta{ResourceVersion: "a"}}); err == nil || ver != 0 { + t.Errorf("unexpected version: %d %v", ver, err) + } + obj := &storagetesting.TestResource{ObjectMeta: metav1.ObjectMeta{ResourceVersion: "a"}} + if err := v.UpdateObject(obj, 5); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if obj.ResourceVersion != "5" || obj.DeletionTimestamp != nil { + t.Errorf("unexpected resource version: %#v", obj) + } +} + +func TestCompareResourceVersion(t *testing.T) { + five := &storagetesting.TestResource{ObjectMeta: metav1.ObjectMeta{ResourceVersion: "5"}} + six := &storagetesting.TestResource{ObjectMeta: metav1.ObjectMeta{ResourceVersion: "6"}} + + versioner := APIObjectVersioner{} + + if e, a := -1, versioner.CompareResourceVersion(five, six); e != a { + t.Errorf("expected %v got %v", e, a) + } + if e, a := 1, versioner.CompareResourceVersion(six, five); e != a { + t.Errorf("expected %v got %v", e, a) + } + if e, a := 0, versioner.CompareResourceVersion(six, six); e != a { + t.Errorf("expected %v got %v", e, a) + } +} diff --git a/pkg/storage/etcd/doc.go b/pkg/storage/etcd/doc.go new file mode 100644 index 000000000..22b269d5a --- /dev/null +++ b/pkg/storage/etcd/doc.go @@ -0,0 +1,17 @@ +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package etcd // import "k8s.io/kubernetes/pkg/storage/etcd" diff --git a/pkg/storage/etcd/etcd_helper.go b/pkg/storage/etcd/etcd_helper.go new file mode 100644 index 000000000..08944b0c0 --- /dev/null +++ b/pkg/storage/etcd/etcd_helper.go @@ -0,0 +1,610 @@ +/* +Copyright 2014 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package etcd + +import ( + "errors" + "fmt" + "path" + "reflect" + "time" + + etcd "github.com/coreos/etcd/client" + "github.com/golang/glog" + "golang.org/x/net/context" + + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/conversion" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/watch" + "k8s.io/apiserver/pkg/storage" + "k8s.io/apiserver/pkg/storage/etcd/metrics" + utilcache "k8s.io/apiserver/pkg/util/cache" + utiltrace "k8s.io/apiserver/pkg/util/trace" + etcdutil "k8s.io/kubernetes/pkg/storage/etcd/util" +) + +// Creates a new storage interface from the client +// TODO: deprecate in favor of storage.Config abstraction over time +func NewEtcdStorage(client etcd.Client, codec runtime.Codec, prefix string, quorum bool, cacheSize int, copier runtime.ObjectCopier) storage.Interface { + return &etcdHelper{ + etcdMembersAPI: etcd.NewMembersAPI(client), + etcdKeysAPI: etcd.NewKeysAPI(client), + codec: codec, + versioner: APIObjectVersioner{}, + copier: copier, + pathPrefix: path.Join("/", prefix), + quorum: quorum, + cache: utilcache.NewCache(cacheSize), + } +} + +// etcdHelper is the reference implementation of storage.Interface. +type etcdHelper struct { + etcdMembersAPI etcd.MembersAPI + etcdKeysAPI etcd.KeysAPI + codec runtime.Codec + copier runtime.ObjectCopier + // Note that versioner is required for etcdHelper to work correctly. + // The public constructors (NewStorage & NewEtcdStorage) are setting it + // correctly, so be careful when manipulating with it manually. + // optional, has to be set to perform any atomic operations + versioner storage.Versioner + // prefix for all etcd keys + pathPrefix string + // if true, perform quorum read + quorum bool + + // We cache objects stored in etcd. For keys we use Node.ModifiedIndex which is equivalent + // to resourceVersion. + // This depends on etcd's indexes being globally unique across all objects/types. This will + // have to revisited if we decide to do things like multiple etcd clusters, or etcd will + // support multi-object transaction that will result in many objects with the same index. + // Number of entries stored in the cache is controlled by maxEtcdCacheEntries constant. + // TODO: Measure how much this cache helps after the conversion code is optimized. + cache utilcache.Cache +} + +func init() { + metrics.Register() +} + +// Implements storage.Interface. +func (h *etcdHelper) Versioner() storage.Versioner { + return h.versioner +} + +// Implements storage.Interface. +func (h *etcdHelper) Create(ctx context.Context, key string, obj, out runtime.Object, ttl uint64) error { + trace := utiltrace.New("etcdHelper::Create " + getTypeName(obj)) + defer trace.LogIfLong(250 * time.Millisecond) + if ctx == nil { + glog.Errorf("Context is nil") + } + key = path.Join(h.pathPrefix, key) + data, err := runtime.Encode(h.codec, obj) + trace.Step("Object encoded") + if err != nil { + return err + } + if version, err := h.versioner.ObjectResourceVersion(obj); err == nil && version != 0 { + return errors.New("resourceVersion may not be set on objects to be created") + } + trace.Step("Version checked") + + startTime := time.Now() + opts := etcd.SetOptions{ + TTL: time.Duration(ttl) * time.Second, + PrevExist: etcd.PrevNoExist, + } + response, err := h.etcdKeysAPI.Set(ctx, key, string(data), &opts) + trace.Step("Object created") + metrics.RecordEtcdRequestLatency("create", getTypeName(obj), startTime) + if err != nil { + return toStorageErr(err, key, 0) + } + if out != nil { + if _, err := conversion.EnforcePtr(out); err != nil { + panic("unable to convert output object to pointer") + } + _, _, err = h.extractObj(response, err, out, false, false) + } + return err +} + +func checkPreconditions(key string, preconditions *storage.Preconditions, out runtime.Object) error { + if preconditions == nil { + return nil + } + objMeta, err := metav1.ObjectMetaFor(out) + if err != nil { + return storage.NewInternalErrorf("can't enforce preconditions %v on un-introspectable object %v, got error: %v", *preconditions, out, err) + } + if preconditions.UID != nil && *preconditions.UID != objMeta.UID { + errMsg := fmt.Sprintf("Precondition failed: UID in precondition: %v, UID in object meta: %v", preconditions.UID, objMeta.UID) + return storage.NewInvalidObjError(key, errMsg) + } + return nil +} + +// Implements storage.Interface. +func (h *etcdHelper) Delete(ctx context.Context, key string, out runtime.Object, preconditions *storage.Preconditions) error { + if ctx == nil { + glog.Errorf("Context is nil") + } + key = path.Join(h.pathPrefix, key) + v, err := conversion.EnforcePtr(out) + if err != nil { + panic("unable to convert output object to pointer") + } + + if preconditions == nil { + startTime := time.Now() + response, err := h.etcdKeysAPI.Delete(ctx, key, nil) + metrics.RecordEtcdRequestLatency("delete", getTypeName(out), startTime) + if !etcdutil.IsEtcdNotFound(err) { + // if the object that existed prior to the delete is returned by etcd, update the out object. + if err != nil || response.PrevNode != nil { + _, _, err = h.extractObj(response, err, out, false, true) + } + } + return toStorageErr(err, key, 0) + } + + // Check the preconditions match. + obj := reflect.New(v.Type()).Interface().(runtime.Object) + for { + _, node, res, err := h.bodyAndExtractObj(ctx, key, obj, false) + if err != nil { + return toStorageErr(err, key, 0) + } + if err := checkPreconditions(key, preconditions, obj); err != nil { + return toStorageErr(err, key, 0) + } + index := uint64(0) + if node != nil { + index = node.ModifiedIndex + } else if res != nil { + index = res.Index + } + opt := etcd.DeleteOptions{PrevIndex: index} + startTime := time.Now() + response, err := h.etcdKeysAPI.Delete(ctx, key, &opt) + metrics.RecordEtcdRequestLatency("delete", getTypeName(out), startTime) + if etcdutil.IsEtcdTestFailed(err) { + glog.Infof("deletion of %s failed because of a conflict, going to retry", key) + } else { + if !etcdutil.IsEtcdNotFound(err) { + // if the object that existed prior to the delete is returned by etcd, update the out object. + if err != nil || response.PrevNode != nil { + _, _, err = h.extractObj(response, err, out, false, true) + } + } + return toStorageErr(err, key, 0) + } + } +} + +// Implements storage.Interface. +func (h *etcdHelper) Watch(ctx context.Context, key string, resourceVersion string, pred storage.SelectionPredicate) (watch.Interface, error) { + if ctx == nil { + glog.Errorf("Context is nil") + } + watchRV, err := storage.ParseWatchResourceVersion(resourceVersion) + if err != nil { + return nil, err + } + key = path.Join(h.pathPrefix, key) + w := newEtcdWatcher(false, h.quorum, nil, storage.SimpleFilter(pred), h.codec, h.versioner, nil, h) + go w.etcdWatch(ctx, h.etcdKeysAPI, key, watchRV) + return w, nil +} + +// Implements storage.Interface. +func (h *etcdHelper) WatchList(ctx context.Context, key string, resourceVersion string, pred storage.SelectionPredicate) (watch.Interface, error) { + if ctx == nil { + glog.Errorf("Context is nil") + } + watchRV, err := storage.ParseWatchResourceVersion(resourceVersion) + if err != nil { + return nil, err + } + key = path.Join(h.pathPrefix, key) + w := newEtcdWatcher(true, h.quorum, exceptKey(key), storage.SimpleFilter(pred), h.codec, h.versioner, nil, h) + go w.etcdWatch(ctx, h.etcdKeysAPI, key, watchRV) + return w, nil +} + +// Implements storage.Interface. +func (h *etcdHelper) Get(ctx context.Context, key string, resourceVersion string, objPtr runtime.Object, ignoreNotFound bool) error { + if ctx == nil { + glog.Errorf("Context is nil") + } + key = path.Join(h.pathPrefix, key) + _, _, _, err := h.bodyAndExtractObj(ctx, key, objPtr, ignoreNotFound) + return err +} + +// bodyAndExtractObj performs the normal Get path to etcd, returning the parsed node and response for additional information +// about the response, like the current etcd index and the ttl. +func (h *etcdHelper) bodyAndExtractObj(ctx context.Context, key string, objPtr runtime.Object, ignoreNotFound bool) (body string, node *etcd.Node, res *etcd.Response, err error) { + if ctx == nil { + glog.Errorf("Context is nil") + } + startTime := time.Now() + + opts := &etcd.GetOptions{ + Quorum: h.quorum, + } + + response, err := h.etcdKeysAPI.Get(ctx, key, opts) + metrics.RecordEtcdRequestLatency("get", getTypeName(objPtr), startTime) + if err != nil && !etcdutil.IsEtcdNotFound(err) { + return "", nil, nil, toStorageErr(err, key, 0) + } + body, node, err = h.extractObj(response, err, objPtr, ignoreNotFound, false) + return body, node, response, toStorageErr(err, key, 0) +} + +func (h *etcdHelper) extractObj(response *etcd.Response, inErr error, objPtr runtime.Object, ignoreNotFound, prevNode bool) (body string, node *etcd.Node, err error) { + if response != nil { + if prevNode { + node = response.PrevNode + } else { + node = response.Node + } + } + if inErr != nil || node == nil || len(node.Value) == 0 { + if ignoreNotFound { + v, err := conversion.EnforcePtr(objPtr) + if err != nil { + return "", nil, err + } + v.Set(reflect.Zero(v.Type())) + return "", nil, nil + } else if inErr != nil { + return "", nil, inErr + } + return "", nil, fmt.Errorf("unable to locate a value on the response: %#v", response) + } + body = node.Value + out, gvk, err := h.codec.Decode([]byte(body), nil, objPtr) + if err != nil { + return body, nil, err + } + if out != objPtr { + return body, nil, fmt.Errorf("unable to decode object %s into %v", gvk.String(), reflect.TypeOf(objPtr)) + } + // being unable to set the version does not prevent the object from being extracted + _ = h.versioner.UpdateObject(objPtr, node.ModifiedIndex) + return body, node, err +} + +// Implements storage.Interface. +func (h *etcdHelper) GetToList(ctx context.Context, key string, resourceVersion string, pred storage.SelectionPredicate, listObj runtime.Object) error { + if ctx == nil { + glog.Errorf("Context is nil") + } + trace := utiltrace.New("GetToList " + getTypeName(listObj)) + listPtr, err := meta.GetItemsPtr(listObj) + if err != nil { + return err + } + key = path.Join(h.pathPrefix, key) + startTime := time.Now() + trace.Step("About to read etcd node") + + opts := &etcd.GetOptions{ + Quorum: h.quorum, + } + response, err := h.etcdKeysAPI.Get(ctx, key, opts) + trace.Step("Etcd node read") + metrics.RecordEtcdRequestLatency("get", getTypeName(listPtr), startTime) + if err != nil { + if etcdutil.IsEtcdNotFound(err) { + return nil + } + return toStorageErr(err, key, 0) + } + + nodes := make([]*etcd.Node, 0) + nodes = append(nodes, response.Node) + + if err := h.decodeNodeList(nodes, storage.SimpleFilter(pred), listPtr); err != nil { + return err + } + trace.Step("Object decoded") + if err := h.versioner.UpdateList(listObj, response.Index); err != nil { + return err + } + return nil +} + +// decodeNodeList walks the tree of each node in the list and decodes into the specified object +func (h *etcdHelper) decodeNodeList(nodes []*etcd.Node, filter storage.FilterFunc, slicePtr interface{}) error { + trace := utiltrace.New("decodeNodeList " + getTypeName(slicePtr)) + defer trace.LogIfLong(400 * time.Millisecond) + v, err := conversion.EnforcePtr(slicePtr) + if err != nil || v.Kind() != reflect.Slice { + // This should not happen at runtime. + panic("need ptr to slice") + } + for _, node := range nodes { + if node.Dir { + trace.Step("Decoding dir " + node.Key + " START") + if err := h.decodeNodeList(node.Nodes, filter, slicePtr); err != nil { + return err + } + trace.Step("Decoding dir " + node.Key + " END") + continue + } + if obj, found := h.getFromCache(node.ModifiedIndex, filter); found { + // obj != nil iff it matches the filter function. + if obj != nil { + v.Set(reflect.Append(v, reflect.ValueOf(obj).Elem())) + } + } else { + obj, _, err := h.codec.Decode([]byte(node.Value), nil, reflect.New(v.Type().Elem()).Interface().(runtime.Object)) + if err != nil { + return err + } + // being unable to set the version does not prevent the object from being extracted + _ = h.versioner.UpdateObject(obj, node.ModifiedIndex) + if filter(obj) { + v.Set(reflect.Append(v, reflect.ValueOf(obj).Elem())) + } + if node.ModifiedIndex != 0 { + h.addToCache(node.ModifiedIndex, obj) + } + } + } + trace.Step(fmt.Sprintf("Decoded %v nodes", len(nodes))) + return nil +} + +// Implements storage.Interface. +func (h *etcdHelper) List(ctx context.Context, key string, resourceVersion string, pred storage.SelectionPredicate, listObj runtime.Object) error { + if ctx == nil { + glog.Errorf("Context is nil") + } + trace := utiltrace.New("List " + getTypeName(listObj)) + defer trace.LogIfLong(400 * time.Millisecond) + listPtr, err := meta.GetItemsPtr(listObj) + if err != nil { + return err + } + key = path.Join(h.pathPrefix, key) + startTime := time.Now() + trace.Step("About to list etcd node") + nodes, index, err := h.listEtcdNode(ctx, key) + trace.Step("Etcd node listed") + metrics.RecordEtcdRequestLatency("list", getTypeName(listPtr), startTime) + if err != nil { + return err + } + if err := h.decodeNodeList(nodes, storage.SimpleFilter(pred), listPtr); err != nil { + return err + } + trace.Step("Node list decoded") + if err := h.versioner.UpdateList(listObj, index); err != nil { + return err + } + return nil +} + +func (h *etcdHelper) listEtcdNode(ctx context.Context, key string) ([]*etcd.Node, uint64, error) { + if ctx == nil { + glog.Errorf("Context is nil") + } + opts := etcd.GetOptions{ + Recursive: true, + Sort: true, + Quorum: h.quorum, + } + result, err := h.etcdKeysAPI.Get(ctx, key, &opts) + if err != nil { + var index uint64 + if etcdError, ok := err.(etcd.Error); ok { + index = etcdError.Index + } + nodes := make([]*etcd.Node, 0) + if etcdutil.IsEtcdNotFound(err) { + return nodes, index, nil + } else { + return nodes, index, toStorageErr(err, key, 0) + } + } + return result.Node.Nodes, result.Index, nil +} + +// Implements storage.Interface. +func (h *etcdHelper) GuaranteedUpdate( + ctx context.Context, key string, ptrToType runtime.Object, ignoreNotFound bool, + preconditions *storage.Preconditions, tryUpdate storage.UpdateFunc, _ ...runtime.Object) error { + // Ignore the suggestion about current object. + if ctx == nil { + glog.Errorf("Context is nil") + } + v, err := conversion.EnforcePtr(ptrToType) + if err != nil { + // Panic is appropriate, because this is a programming error. + panic("need ptr to type") + } + key = path.Join(h.pathPrefix, key) + for { + obj := reflect.New(v.Type()).Interface().(runtime.Object) + origBody, node, res, err := h.bodyAndExtractObj(ctx, key, obj, ignoreNotFound) + if err != nil { + return toStorageErr(err, key, 0) + } + if err := checkPreconditions(key, preconditions, obj); err != nil { + return toStorageErr(err, key, 0) + } + meta := storage.ResponseMeta{} + if node != nil { + meta.TTL = node.TTL + meta.ResourceVersion = node.ModifiedIndex + } + // Get the object to be written by calling tryUpdate. + ret, newTTL, err := tryUpdate(obj, meta) + if err != nil { + return toStorageErr(err, key, 0) + } + + index := uint64(0) + ttl := uint64(0) + if node != nil { + index = node.ModifiedIndex + if node.TTL != 0 { + ttl = uint64(node.TTL) + } + if node.Expiration != nil && ttl == 0 { + ttl = 1 + } + } else if res != nil { + index = res.Index + } + + if newTTL != nil { + if ttl != 0 && *newTTL == 0 { + // TODO: remove this after we have verified this is no longer an issue + glog.V(4).Infof("GuaranteedUpdate is clearing TTL for %q, may not be intentional", key) + } + ttl = *newTTL + } + + // Since update object may have a resourceVersion set, we need to clear it here. + if err := h.versioner.UpdateObject(ret, 0); err != nil { + return errors.New("resourceVersion cannot be set on objects store in etcd") + } + + data, err := runtime.Encode(h.codec, ret) + if err != nil { + return err + } + + // First time this key has been used, try creating new value. + if index == 0 { + startTime := time.Now() + opts := etcd.SetOptions{ + TTL: time.Duration(ttl) * time.Second, + PrevExist: etcd.PrevNoExist, + } + response, err := h.etcdKeysAPI.Set(ctx, key, string(data), &opts) + metrics.RecordEtcdRequestLatency("create", getTypeName(ptrToType), startTime) + if etcdutil.IsEtcdNodeExist(err) { + continue + } + _, _, err = h.extractObj(response, err, ptrToType, false, false) + return toStorageErr(err, key, 0) + } + + if string(data) == origBody { + // If we don't send an update, we simply return the currently existing + // version of the object. + _, _, err := h.extractObj(res, nil, ptrToType, ignoreNotFound, false) + return err + } + + startTime := time.Now() + // Swap origBody with data, if origBody is the latest etcd data. + opts := etcd.SetOptions{ + PrevIndex: index, + TTL: time.Duration(ttl) * time.Second, + } + response, err := h.etcdKeysAPI.Set(ctx, key, string(data), &opts) + metrics.RecordEtcdRequestLatency("compareAndSwap", getTypeName(ptrToType), startTime) + if etcdutil.IsEtcdTestFailed(err) { + // Try again. + continue + } + _, _, err = h.extractObj(response, err, ptrToType, false, false) + return toStorageErr(err, key, int64(index)) + } +} + +// etcdCache defines interface used for caching objects stored in etcd. Objects are keyed by +// their Node.ModifiedIndex, which is unique across all types. +// All implementations must be thread-safe. +type etcdCache interface { + getFromCache(index uint64, filter storage.FilterFunc) (runtime.Object, bool) + addToCache(index uint64, obj runtime.Object) +} + +func getTypeName(obj interface{}) string { + return reflect.TypeOf(obj).String() +} + +func (h *etcdHelper) getFromCache(index uint64, filter storage.FilterFunc) (runtime.Object, bool) { + startTime := time.Now() + defer func() { + metrics.ObserveGetCache(startTime) + }() + obj, found := h.cache.Get(index) + if found { + if !filter(obj.(runtime.Object)) { + return nil, true + } + // We should not return the object itself to avoid polluting the cache if someone + // modifies returned values. + objCopy, err := h.copier.Copy(obj.(runtime.Object)) + if err != nil { + glog.Errorf("Error during DeepCopy of cached object: %q", err) + // We can't return a copy, thus we report the object as not found. + return nil, false + } + metrics.ObserveCacheHit() + return objCopy.(runtime.Object), true + } + metrics.ObserveCacheMiss() + return nil, false +} + +func (h *etcdHelper) addToCache(index uint64, obj runtime.Object) { + startTime := time.Now() + defer func() { + metrics.ObserveAddCache(startTime) + }() + objCopy, err := h.copier.Copy(obj) + if err != nil { + glog.Errorf("Error during DeepCopy of cached object: %q", err) + return + } + isOverwrite := h.cache.Add(index, objCopy) + if !isOverwrite { + metrics.ObserveNewEntry() + } +} + +func toStorageErr(err error, key string, rv int64) error { + if err == nil { + return nil + } + switch { + case etcdutil.IsEtcdNotFound(err): + return storage.NewKeyNotFoundError(key, rv) + case etcdutil.IsEtcdNodeExist(err): + return storage.NewKeyExistsError(key, rv) + case etcdutil.IsEtcdTestFailed(err): + return storage.NewResourceVersionConflictsError(key, rv) + case etcdutil.IsEtcdUnreachable(err): + return storage.NewUnreachableError(key, rv) + default: + return err + } +} diff --git a/pkg/storage/etcd/etcd_helper_test.go b/pkg/storage/etcd/etcd_helper_test.go new file mode 100644 index 000000000..e2793f0ab --- /dev/null +++ b/pkg/storage/etcd/etcd_helper_test.go @@ -0,0 +1,567 @@ +/* +Copyright 2014 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package etcd + +import ( + "path" + "reflect" + "sync" + "testing" + "time" + + etcd "github.com/coreos/etcd/client" + "golang.org/x/net/context" + apitesting "k8s.io/apimachinery/pkg/api/testing" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/conversion" + "k8s.io/apimachinery/pkg/fields" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/runtime/serializer" + "k8s.io/apiserver/pkg/apis/example" + examplev1 "k8s.io/apiserver/pkg/apis/example/v1" + "k8s.io/apiserver/pkg/storage" + "k8s.io/kubernetes/pkg/storage/etcd/etcdtest" + etcdtesting "k8s.io/kubernetes/pkg/storage/etcd/testing" + storagetesting "k8s.io/kubernetes/pkg/storage/testing" + storagetests "k8s.io/kubernetes/pkg/storage/tests" +) + +func testScheme(t *testing.T) (*runtime.Scheme, serializer.CodecFactory) { + scheme := runtime.NewScheme() + scheme.Log(t) + scheme.AddKnownTypes(schema.GroupVersion{Version: runtime.APIVersionInternal}, &storagetesting.TestResource{}) + scheme.AddKnownTypes(schema.GroupVersion{Version: runtime.APIVersionInternal}, &storagetesting.TestResource{}) + example.AddToScheme(scheme) + examplev1.AddToScheme(scheme) + if err := scheme.AddConversionFuncs( + func(in *storagetesting.TestResource, out *storagetesting.TestResource, s conversion.Scope) error { + *out = *in + return nil + }, + func(in, out *time.Time, s conversion.Scope) error { + *out = *in + return nil + }, + ); err != nil { + panic(err) + } + codecs := serializer.NewCodecFactory(scheme) + return scheme, codecs +} + +func newEtcdHelper(client etcd.Client, scheme *runtime.Scheme, codec runtime.Codec, prefix string) etcdHelper { + return *NewEtcdStorage(client, codec, prefix, false, etcdtest.DeserializationCacheSize, scheme).(*etcdHelper) +} + +// Returns an encoded version of example.Pod with the given name. +func getEncodedPod(name string, codec runtime.Codec) string { + pod, _ := runtime.Encode(codec, &examplev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: name}, + }) + return string(pod) +} + +func createObj(t *testing.T, helper etcdHelper, name string, obj, out runtime.Object, ttl uint64) error { + err := helper.Create(context.TODO(), name, obj, out, ttl) + if err != nil { + t.Errorf("Unexpected error %v", err) + } + return err +} + +func createPodList(t *testing.T, helper etcdHelper, list *example.PodList) error { + for i := range list.Items { + returnedObj := &example.Pod{} + err := createObj(t, helper, list.Items[i].Name, &list.Items[i], returnedObj, 0) + if err != nil { + return err + } + list.Items[i] = *returnedObj + } + return nil +} + +func TestList(t *testing.T) { + scheme, codecs := testScheme(t) + codec := apitesting.TestCodec(codecs, examplev1.SchemeGroupVersion) + server := etcdtesting.NewEtcdTestClientServer(t) + defer server.Terminate(t) + helper := newEtcdHelper(server.Client, scheme, codec, etcdtest.PathPrefix()) + + list := example.PodList{ + Items: []example.Pod{ + { + ObjectMeta: metav1.ObjectMeta{Name: "bar"}, + Spec: storagetests.DeepEqualSafePodSpec(), + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "baz"}, + Spec: storagetests.DeepEqualSafePodSpec(), + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "foo"}, + Spec: storagetests.DeepEqualSafePodSpec(), + }, + }, + } + + createPodList(t, helper, &list) + var got example.PodList + // TODO: a sorted filter function could be applied such implied + // ordering on the returned list doesn't matter. + err := helper.List(context.TODO(), "/", "", storage.Everything, &got) + if err != nil { + t.Errorf("Unexpected error %v", err) + } + + if e, a := list.Items, got.Items; !reflect.DeepEqual(e, a) { + t.Errorf("Expected %#v, got %#v", e, a) + } +} + +func TestListFiltered(t *testing.T) { + scheme, codecs := testScheme(t) + codec := apitesting.TestCodec(codecs, examplev1.SchemeGroupVersion) + server := etcdtesting.NewEtcdTestClientServer(t) + defer server.Terminate(t) + helper := newEtcdHelper(server.Client, scheme, codec, etcdtest.PathPrefix()) + + list := example.PodList{ + Items: []example.Pod{ + { + ObjectMeta: metav1.ObjectMeta{Name: "bar"}, + Spec: storagetests.DeepEqualSafePodSpec(), + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "baz"}, + Spec: storagetests.DeepEqualSafePodSpec(), + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "foo"}, + Spec: storagetests.DeepEqualSafePodSpec(), + }, + }, + } + + createPodList(t, helper, &list) + // List only "bar" pod + p := storage.SelectionPredicate{ + Label: labels.Everything(), + Field: fields.SelectorFromSet(fields.Set{"metadata.name": "bar"}), + GetAttrs: func(obj runtime.Object) (labels.Set, fields.Set, error) { + pod := obj.(*example.Pod) + return labels.Set(pod.Labels), fields.Set{"metadata.name": pod.Name}, nil + }, + } + var got example.PodList + err := helper.List(context.TODO(), "/", "", p, &got) + if err != nil { + t.Errorf("Unexpected error %v", err) + } + // Check to make certain that the filter function only returns "bar" + if e, a := list.Items[0], got.Items[0]; !reflect.DeepEqual(e, a) { + t.Errorf("Expected %#v, got %#v", e, a) + } +} + +// TestListAcrossDirectories ensures that the client excludes directories and flattens tree-response - simulates cross-namespace query +func TestListAcrossDirectories(t *testing.T) { + scheme, codecs := testScheme(t) + server := etcdtesting.NewEtcdTestClientServer(t) + codec := apitesting.TestCodec(codecs, examplev1.SchemeGroupVersion) + defer server.Terminate(t) + + roothelper := newEtcdHelper(server.Client, scheme, codec, etcdtest.PathPrefix()) + helper1 := newEtcdHelper(server.Client, scheme, codec, etcdtest.PathPrefix()+"/dir1") + helper2 := newEtcdHelper(server.Client, scheme, codec, etcdtest.PathPrefix()+"/dir2") + + list := example.PodList{ + Items: []example.Pod{ + { + ObjectMeta: metav1.ObjectMeta{Name: "baz"}, + Spec: storagetests.DeepEqualSafePodSpec(), + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "foo"}, + Spec: storagetests.DeepEqualSafePodSpec(), + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "bar"}, + Spec: storagetests.DeepEqualSafePodSpec(), + }, + }, + } + + returnedObj := &example.Pod{} + // create the 1st 2 elements in one directory + createObj(t, helper1, list.Items[0].Name, &list.Items[0], returnedObj, 0) + list.Items[0] = *returnedObj + createObj(t, helper1, list.Items[1].Name, &list.Items[1], returnedObj, 0) + list.Items[1] = *returnedObj + // create the last element in the other directory + createObj(t, helper2, list.Items[2].Name, &list.Items[2], returnedObj, 0) + list.Items[2] = *returnedObj + + var got example.PodList + err := roothelper.List(context.TODO(), "/", "", storage.Everything, &got) + if err != nil { + t.Errorf("Unexpected error %v", err) + } + if e, a := list.Items, got.Items; !reflect.DeepEqual(e, a) { + t.Errorf("Expected %#v, got %#v", e, a) + } +} + +func TestGet(t *testing.T) { + scheme, codecs := testScheme(t) + codec := apitesting.TestCodec(codecs, examplev1.SchemeGroupVersion) + server := etcdtesting.NewEtcdTestClientServer(t) + defer server.Terminate(t) + key := "/some/key" + helper := newEtcdHelper(server.Client, scheme, codec, etcdtest.PathPrefix()) + expect := example.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "foo"}, + Spec: storagetests.DeepEqualSafePodSpec(), + } + var got example.Pod + if err := helper.Create(context.TODO(), key, &expect, &got, 0); err != nil { + t.Errorf("Unexpected error %#v", err) + } + expect = got + if err := helper.Get(context.TODO(), key, "", &got, false); err != nil { + t.Errorf("Unexpected error %#v", err) + } + if !reflect.DeepEqual(got, expect) { + t.Errorf("Wanted %#v, got %#v", expect, got) + } +} + +func TestGetNotFoundErr(t *testing.T) { + scheme, codecs := testScheme(t) + codec := apitesting.TestCodec(codecs, schema.GroupVersion{Version: "v1"}) + server := etcdtesting.NewEtcdTestClientServer(t) + defer server.Terminate(t) + boguskey := "/some/boguskey" + helper := newEtcdHelper(server.Client, scheme, codec, etcdtest.PathPrefix()) + + var got example.Pod + err := helper.Get(context.TODO(), boguskey, "", &got, false) + if !storage.IsNotFound(err) { + t.Errorf("Unexpected reponse on key=%v, err=%v", boguskey, err) + } +} + +func TestCreate(t *testing.T) { + scheme, codecs := testScheme(t) + codec := apitesting.TestCodec(codecs, examplev1.SchemeGroupVersion) + obj := &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo"}} + server := etcdtesting.NewEtcdTestClientServer(t) + defer server.Terminate(t) + helper := newEtcdHelper(server.Client, scheme, codec, etcdtest.PathPrefix()) + returnedObj := &example.Pod{} + err := helper.Create(context.TODO(), "/some/key", obj, returnedObj, 5) + if err != nil { + t.Errorf("Unexpected error %#v", err) + } + _, err = runtime.Encode(codec, obj) + if err != nil { + t.Errorf("Unexpected error %#v", err) + } + err = helper.Get(context.TODO(), "/some/key", "", returnedObj, false) + if err != nil { + t.Errorf("Unexpected error %#v", err) + } + _, err = runtime.Encode(codec, returnedObj) + if err != nil { + t.Errorf("Unexpected error %#v", err) + } + if obj.Name != returnedObj.Name { + t.Errorf("Wanted %v, got %v", obj.Name, returnedObj.Name) + } +} + +func TestCreateNilOutParam(t *testing.T) { + scheme, codecs := testScheme(t) + codec := apitesting.TestCodec(codecs, examplev1.SchemeGroupVersion) + obj := &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo"}} + server := etcdtesting.NewEtcdTestClientServer(t) + defer server.Terminate(t) + helper := newEtcdHelper(server.Client, scheme, codec, etcdtest.PathPrefix()) + err := helper.Create(context.TODO(), "/some/key", obj, nil, 5) + if err != nil { + t.Errorf("Unexpected error %#v", err) + } +} + +func TestGuaranteedUpdate(t *testing.T) { + scheme, codecs := testScheme(t) + codec := apitesting.TestCodec(codecs, schema.GroupVersion{Version: runtime.APIVersionInternal}) + server := etcdtesting.NewEtcdTestClientServer(t) + defer server.Terminate(t) + key := "/some/key" + helper := newEtcdHelper(server.Client, scheme, codec, etcdtest.PathPrefix()) + + obj := &storagetesting.TestResource{ObjectMeta: metav1.ObjectMeta{Name: "foo"}, Value: 1} + err := helper.GuaranteedUpdate(context.TODO(), key, &storagetesting.TestResource{}, true, nil, storage.SimpleUpdate(func(in runtime.Object) (runtime.Object, error) { + return obj, nil + })) + if err != nil { + t.Errorf("Unexpected error %#v", err) + } + + // Update an existing node. + callbackCalled := false + objUpdate := &storagetesting.TestResource{ObjectMeta: metav1.ObjectMeta{Name: "foo"}, Value: 2} + err = helper.GuaranteedUpdate(context.TODO(), key, &storagetesting.TestResource{}, true, nil, storage.SimpleUpdate(func(in runtime.Object) (runtime.Object, error) { + callbackCalled = true + + if in.(*storagetesting.TestResource).Value != 1 { + t.Errorf("Callback input was not current set value") + } + + return objUpdate, nil + })) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + + objCheck := &storagetesting.TestResource{} + err = helper.Get(context.TODO(), key, "", objCheck, false) + if err != nil { + t.Errorf("Unexpected error %#v", err) + } + if objCheck.Value != 2 { + t.Errorf("Value should have been 2 but got %v", objCheck.Value) + } + + if !callbackCalled { + t.Errorf("tryUpdate callback should have been called.") + } +} + +func TestGuaranteedUpdateNoChange(t *testing.T) { + scheme, codecs := testScheme(t) + codec := apitesting.TestCodec(codecs, schema.GroupVersion{Version: runtime.APIVersionInternal}) + server := etcdtesting.NewEtcdTestClientServer(t) + defer server.Terminate(t) + key := "/some/key" + helper := newEtcdHelper(server.Client, scheme, codec, etcdtest.PathPrefix()) + + obj := &storagetesting.TestResource{ObjectMeta: metav1.ObjectMeta{Name: "foo"}, Value: 1} + err := helper.GuaranteedUpdate(context.TODO(), key, &storagetesting.TestResource{}, true, nil, storage.SimpleUpdate(func(in runtime.Object) (runtime.Object, error) { + return obj, nil + })) + if err != nil { + t.Errorf("Unexpected error %#v", err) + } + + // Update an existing node with the same data + callbackCalled := false + objUpdate := &storagetesting.TestResource{ObjectMeta: metav1.ObjectMeta{Name: "foo"}, Value: 1} + err = helper.GuaranteedUpdate(context.TODO(), key, &storagetesting.TestResource{}, true, nil, storage.SimpleUpdate(func(in runtime.Object) (runtime.Object, error) { + callbackCalled = true + return objUpdate, nil + })) + if err != nil { + t.Fatalf("Unexpected error %#v", err) + } + if !callbackCalled { + t.Errorf("tryUpdate callback should have been called.") + } +} + +func TestGuaranteedUpdateKeyNotFound(t *testing.T) { + scheme, codecs := testScheme(t) + codec := apitesting.TestCodec(codecs, schema.GroupVersion{Version: runtime.APIVersionInternal}) + server := etcdtesting.NewEtcdTestClientServer(t) + defer server.Terminate(t) + key := "/some/key" + helper := newEtcdHelper(server.Client, scheme, codec, etcdtest.PathPrefix()) + + // Create a new node. + obj := &storagetesting.TestResource{ObjectMeta: metav1.ObjectMeta{Name: "foo"}, Value: 1} + + f := storage.SimpleUpdate(func(in runtime.Object) (runtime.Object, error) { + return obj, nil + }) + + ignoreNotFound := false + err := helper.GuaranteedUpdate(context.TODO(), key, &storagetesting.TestResource{}, ignoreNotFound, nil, f) + if err == nil { + t.Errorf("Expected error for key not found.") + } + + ignoreNotFound = true + err = helper.GuaranteedUpdate(context.TODO(), key, &storagetesting.TestResource{}, ignoreNotFound, nil, f) + if err != nil { + t.Errorf("Unexpected error %v.", err) + } +} + +func TestGuaranteedUpdate_CreateCollision(t *testing.T) { + scheme, codecs := testScheme(t) + codec := apitesting.TestCodec(codecs, schema.GroupVersion{Version: runtime.APIVersionInternal}) + server := etcdtesting.NewEtcdTestClientServer(t) + defer server.Terminate(t) + key := "/some/key" + helper := newEtcdHelper(server.Client, scheme, codec, etcdtest.PathPrefix()) + + const concurrency = 10 + var wgDone sync.WaitGroup + var wgForceCollision sync.WaitGroup + wgDone.Add(concurrency) + wgForceCollision.Add(concurrency) + + for i := 0; i < concurrency; i++ { + // Increment storagetesting.TestResource.Value by 1 + go func() { + defer wgDone.Done() + + firstCall := true + err := helper.GuaranteedUpdate(context.TODO(), key, &storagetesting.TestResource{}, true, nil, storage.SimpleUpdate(func(in runtime.Object) (runtime.Object, error) { + defer func() { firstCall = false }() + + if firstCall { + // Force collision by joining all concurrent GuaranteedUpdate operations here. + wgForceCollision.Done() + wgForceCollision.Wait() + } + + currValue := in.(*storagetesting.TestResource).Value + obj := &storagetesting.TestResource{ObjectMeta: metav1.ObjectMeta{Name: "foo"}, Value: currValue + 1} + return obj, nil + })) + if err != nil { + t.Errorf("Unexpected error %#v", err) + } + }() + } + wgDone.Wait() + + stored := &storagetesting.TestResource{} + err := helper.Get(context.TODO(), key, "", stored, false) + if err != nil { + t.Errorf("Unexpected error %#v", stored) + } + if stored.Value != concurrency { + t.Errorf("Some of the writes were lost. Stored value: %d", stored.Value) + } +} + +func TestGuaranteedUpdateUIDMismatch(t *testing.T) { + scheme, codecs := testScheme(t) + codec := apitesting.TestCodec(codecs, examplev1.SchemeGroupVersion) + server := etcdtesting.NewEtcdTestClientServer(t) + defer server.Terminate(t) + prefix := path.Join("/", etcdtest.PathPrefix()) + helper := newEtcdHelper(server.Client, scheme, codec, prefix) + + obj := &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo", UID: "A"}} + podPtr := &example.Pod{} + err := helper.Create(context.TODO(), "/some/key", obj, podPtr, 0) + if err != nil { + t.Fatalf("Unexpected error %#v", err) + } + err = helper.GuaranteedUpdate(context.TODO(), "/some/key", podPtr, true, storage.NewUIDPreconditions("B"), storage.SimpleUpdate(func(in runtime.Object) (runtime.Object, error) { + return obj, nil + })) + if !storage.IsInvalidObj(err) { + t.Fatalf("Expect a Test Failed (write conflict) error, got: %v", err) + } +} + +func TestDeleteUIDMismatch(t *testing.T) { + scheme, codecs := testScheme(t) + codec := apitesting.TestCodec(codecs, examplev1.SchemeGroupVersion) + server := etcdtesting.NewEtcdTestClientServer(t) + defer server.Terminate(t) + prefix := path.Join("/", etcdtest.PathPrefix()) + helper := newEtcdHelper(server.Client, scheme, codec, prefix) + + obj := &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo", UID: "A"}} + podPtr := &example.Pod{} + err := helper.Create(context.TODO(), "/some/key", obj, podPtr, 0) + if err != nil { + t.Fatalf("Unexpected error %#v", err) + } + err = helper.Delete(context.TODO(), "/some/key", obj, storage.NewUIDPreconditions("B")) + if !storage.IsInvalidObj(err) { + t.Fatalf("Expect a Test Failed (write conflict) error, got: %v", err) + } +} + +type getFunc func(ctx context.Context, key string, opts *etcd.GetOptions) (*etcd.Response, error) + +type fakeDeleteKeysAPI struct { + etcd.KeysAPI + fakeGetFunc getFunc + getCount int + // The fakeGetFunc will be called fakeGetCap times before the KeysAPI's Get will be called. + fakeGetCap int +} + +func (f *fakeDeleteKeysAPI) Get(ctx context.Context, key string, opts *etcd.GetOptions) (*etcd.Response, error) { + f.getCount++ + if f.getCount < f.fakeGetCap { + return f.fakeGetFunc(ctx, key, opts) + } + return f.KeysAPI.Get(ctx, key, opts) +} + +// This is to emulate the case where another party updates the object when +// etcdHelper.Delete has verified the preconditions, but hasn't carried out the +// deletion yet. Etcd will fail the deletion and report the conflict. etcdHelper +// should retry until there is no conflict. +func TestDeleteWithRetry(t *testing.T) { + scheme, codecs := testScheme(t) + codec := apitesting.TestCodec(codecs, examplev1.SchemeGroupVersion) + server := etcdtesting.NewEtcdTestClientServer(t) + defer server.Terminate(t) + prefix := path.Join("/", etcdtest.PathPrefix()) + + obj := &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo", UID: "A"}} + // fakeGet returns a large ModifiedIndex to emulate the case that another + // party has updated the object. + fakeGet := func(ctx context.Context, key string, opts *etcd.GetOptions) (*etcd.Response, error) { + data, _ := runtime.Encode(codec, obj) + return &etcd.Response{Node: &etcd.Node{Value: string(data), ModifiedIndex: 99}}, nil + } + expectedRetries := 3 + helper := newEtcdHelper(server.Client, scheme, codec, prefix) + fake := &fakeDeleteKeysAPI{KeysAPI: helper.etcdKeysAPI, fakeGetCap: expectedRetries, fakeGetFunc: fakeGet} + helper.etcdKeysAPI = fake + + returnedObj := &example.Pod{} + err := helper.Create(context.TODO(), "/some/key", obj, returnedObj, 0) + if err != nil { + t.Errorf("Unexpected error %#v", err) + } + + err = helper.Delete(context.TODO(), "/some/key", obj, storage.NewUIDPreconditions("A")) + if err != nil { + t.Errorf("Unexpected error %#v", err) + } + if fake.getCount != expectedRetries { + t.Errorf("Expect %d retries, got %d", expectedRetries, fake.getCount) + } + err = helper.Get(context.TODO(), "/some/key", "", obj, false) + if !storage.IsNotFound(err) { + t.Errorf("Expect an NotFound error, got %v", err) + } +} diff --git a/pkg/storage/etcd/etcd_watcher.go b/pkg/storage/etcd/etcd_watcher.go new file mode 100644 index 000000000..280ecb657 --- /dev/null +++ b/pkg/storage/etcd/etcd_watcher.go @@ -0,0 +1,487 @@ +/* +Copyright 2014 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package etcd + +import ( + "fmt" + "net/http" + "reflect" + "sync" + "time" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/watch" + "k8s.io/apiserver/pkg/storage" + etcdutil "k8s.io/kubernetes/pkg/storage/etcd/util" + + etcd "github.com/coreos/etcd/client" + "github.com/golang/glog" + "golang.org/x/net/context" +) + +// Etcd watch event actions +const ( + EtcdCreate = "create" + EtcdGet = "get" + EtcdSet = "set" + EtcdCAS = "compareAndSwap" + EtcdDelete = "delete" + EtcdCAD = "compareAndDelete" + EtcdExpire = "expire" +) + +// TransformFunc attempts to convert an object to another object for use with a watcher. +type TransformFunc func(runtime.Object) (runtime.Object, error) + +// includeFunc returns true if the given key should be considered part of a watch +type includeFunc func(key string) bool + +// exceptKey is an includeFunc that returns false when the provided key matches the watched key +func exceptKey(except string) includeFunc { + return func(key string) bool { + return key != except + } +} + +// etcdWatcher converts a native etcd watch to a watch.Interface. +type etcdWatcher struct { + // HighWaterMarks for performance debugging. + // Important: Since HighWaterMark is using sync/atomic, it has to be at the top of the struct due to a bug on 32-bit platforms + // See: https://golang.org/pkg/sync/atomic/ for more information + incomingHWM storage.HighWaterMark + outgoingHWM storage.HighWaterMark + + encoding runtime.Codec + // Note that versioner is required for etcdWatcher to work correctly. + // There is no public constructor of it, so be careful when manipulating + // with it manually. + versioner storage.Versioner + transform TransformFunc + + list bool // If we're doing a recursive watch, should be true. + quorum bool // If we enable quorum, shoule be true + include includeFunc + filter storage.FilterFunc + + etcdIncoming chan *etcd.Response + etcdError chan error + ctx context.Context + cancel context.CancelFunc + etcdCallEnded chan struct{} + + outgoing chan watch.Event + userStop chan struct{} + stopped bool + stopLock sync.Mutex + // wg is used to avoid calls to etcd after Stop(), and to make sure + // that the translate goroutine is not leaked. + wg sync.WaitGroup + + // Injectable for testing. Send the event down the outgoing channel. + emit func(watch.Event) + + cache etcdCache +} + +// watchWaitDuration is the amount of time to wait for an error from watch. +const watchWaitDuration = 100 * time.Millisecond + +// newEtcdWatcher returns a new etcdWatcher; if list is true, watch sub-nodes. +// The versioner must be able to handle the objects that transform creates. +func newEtcdWatcher( + list bool, quorum bool, include includeFunc, filter storage.FilterFunc, + encoding runtime.Codec, versioner storage.Versioner, transform TransformFunc, + cache etcdCache) *etcdWatcher { + w := &etcdWatcher{ + encoding: encoding, + versioner: versioner, + transform: transform, + list: list, + quorum: quorum, + include: include, + filter: filter, + // Buffer this channel, so that the etcd client is not forced + // to context switch with every object it gets, and so that a + // long time spent decoding an object won't block the *next* + // object. Basically, we see a lot of "401 window exceeded" + // errors from etcd, and that's due to the client not streaming + // results but rather getting them one at a time. So we really + // want to never block the etcd client, if possible. The 100 is + // mostly arbitrary--we know it goes as high as 50, though. + // There's a V(2) log message that prints the length so we can + // monitor how much of this buffer is actually used. + etcdIncoming: make(chan *etcd.Response, 100), + etcdError: make(chan error, 1), + // Similarly to etcdIncomming, we don't want to force context + // switch on every new incoming object. + outgoing: make(chan watch.Event, 100), + userStop: make(chan struct{}), + stopped: false, + wg: sync.WaitGroup{}, + cache: cache, + ctx: nil, + cancel: nil, + } + w.emit = func(e watch.Event) { + if curLen := int64(len(w.outgoing)); w.outgoingHWM.Update(curLen) { + // Monitor if this gets backed up, and how much. + glog.V(1).Infof("watch (%v): %v objects queued in outgoing channel.", reflect.TypeOf(e.Object).String(), curLen) + } + // Give up on user stop, without this we leak a lot of goroutines in tests. + select { + case w.outgoing <- e: + case <-w.userStop: + } + } + // translate will call done. We need to Add() here because otherwise, + // if Stop() gets called before translate gets started, there'd be a + // problem. + w.wg.Add(1) + go w.translate() + return w +} + +// etcdWatch calls etcd's Watch function, and handles any errors. Meant to be called +// as a goroutine. +func (w *etcdWatcher) etcdWatch(ctx context.Context, client etcd.KeysAPI, key string, resourceVersion uint64) { + defer utilruntime.HandleCrash() + defer close(w.etcdError) + defer close(w.etcdIncoming) + + // All calls to etcd are coming from this function - once it is finished + // no other call to etcd should be generated by this watcher. + done := func() {} + + // We need to be prepared, that Stop() can be called at any time. + // It can potentially also be called, even before this function is called. + // If that is the case, we simply skip all the code here. + // See #18928 for more details. + var watcher etcd.Watcher + returned := func() bool { + w.stopLock.Lock() + defer w.stopLock.Unlock() + if w.stopped { + // Watcher has already been stopped - don't event initiate it here. + return true + } + w.wg.Add(1) + done = w.wg.Done + // Perform initialization of watcher under lock - we want to avoid situation when + // Stop() is called in the meantime (which in tests can cause etcd termination and + // strange behavior here). + if resourceVersion == 0 { + latest, err := etcdGetInitialWatchState(ctx, client, key, w.list, w.quorum, w.etcdIncoming) + if err != nil { + w.etcdError <- err + return true + } + resourceVersion = latest + } + + opts := etcd.WatcherOptions{ + Recursive: w.list, + AfterIndex: resourceVersion, + } + watcher = client.Watcher(key, &opts) + w.ctx, w.cancel = context.WithCancel(ctx) + return false + }() + defer done() + if returned { + return + } + + for { + resp, err := watcher.Next(w.ctx) + if err != nil { + w.etcdError <- err + return + } + w.etcdIncoming <- resp + } +} + +// etcdGetInitialWatchState turns an etcd Get request into a watch equivalent +func etcdGetInitialWatchState(ctx context.Context, client etcd.KeysAPI, key string, recursive bool, quorum bool, incoming chan<- *etcd.Response) (resourceVersion uint64, err error) { + opts := etcd.GetOptions{ + Recursive: recursive, + Sort: false, + Quorum: quorum, + } + resp, err := client.Get(ctx, key, &opts) + if err != nil { + if !etcdutil.IsEtcdNotFound(err) { + utilruntime.HandleError(fmt.Errorf("watch was unable to retrieve the current index for the provided key (%q): %v", key, err)) + return resourceVersion, toStorageErr(err, key, 0) + } + if etcdError, ok := err.(etcd.Error); ok { + resourceVersion = etcdError.Index + } + return resourceVersion, nil + } + resourceVersion = resp.Index + convertRecursiveResponse(resp.Node, resp, incoming) + return +} + +// convertRecursiveResponse turns a recursive get response from etcd into individual response objects +// by copying the original response. This emulates the behavior of a recursive watch. +func convertRecursiveResponse(node *etcd.Node, response *etcd.Response, incoming chan<- *etcd.Response) { + if node.Dir { + for i := range node.Nodes { + convertRecursiveResponse(node.Nodes[i], response, incoming) + } + return + } + copied := *response + copied.Action = "get" + copied.Node = node + incoming <- &copied +} + +// translate pulls stuff from etcd, converts, and pushes out the outgoing channel. Meant to be +// called as a goroutine. +func (w *etcdWatcher) translate() { + defer w.wg.Done() + defer close(w.outgoing) + defer utilruntime.HandleCrash() + + for { + select { + case err := <-w.etcdError: + if err != nil { + var status *metav1.Status + switch { + case etcdutil.IsEtcdWatchExpired(err): + status = &metav1.Status{ + Status: metav1.StatusFailure, + Message: err.Error(), + Code: http.StatusGone, // Gone + Reason: metav1.StatusReasonExpired, + } + // TODO: need to generate errors using api/errors which has a circular dependency on this package + // no other way to inject errors + // case etcdutil.IsEtcdUnreachable(err): + // status = errors.NewServerTimeout(...) + default: + status = &metav1.Status{ + Status: metav1.StatusFailure, + Message: err.Error(), + Code: http.StatusInternalServerError, + Reason: metav1.StatusReasonInternalError, + } + } + w.emit(watch.Event{ + Type: watch.Error, + Object: status, + }) + } + return + case <-w.userStop: + return + case res, ok := <-w.etcdIncoming: + if ok { + if curLen := int64(len(w.etcdIncoming)); w.incomingHWM.Update(curLen) { + // Monitor if this gets backed up, and how much. + glog.V(1).Infof("watch: %v objects queued in incoming channel.", curLen) + } + w.sendResult(res) + } + // If !ok, don't return here-- must wait for etcdError channel + // to give an error or be closed. + } + } +} + +func (w *etcdWatcher) decodeObject(node *etcd.Node) (runtime.Object, error) { + if obj, found := w.cache.getFromCache(node.ModifiedIndex, storage.SimpleFilter(storage.Everything)); found { + return obj, nil + } + + obj, err := runtime.Decode(w.encoding, []byte(node.Value)) + if err != nil { + return nil, err + } + + // ensure resource version is set on the object we load from etcd + if err := w.versioner.UpdateObject(obj, node.ModifiedIndex); err != nil { + utilruntime.HandleError(fmt.Errorf("failure to version api object (%d) %#v: %v", node.ModifiedIndex, obj, err)) + } + + // perform any necessary transformation + if w.transform != nil { + obj, err = w.transform(obj) + if err != nil { + utilruntime.HandleError(fmt.Errorf("failure to transform api object %#v: %v", obj, err)) + return nil, err + } + } + + if node.ModifiedIndex != 0 { + w.cache.addToCache(node.ModifiedIndex, obj) + } + return obj, nil +} + +func (w *etcdWatcher) sendAdd(res *etcd.Response) { + if res.Node == nil { + utilruntime.HandleError(fmt.Errorf("unexpected nil node: %#v", res)) + return + } + if w.include != nil && !w.include(res.Node.Key) { + return + } + obj, err := w.decodeObject(res.Node) + if err != nil { + utilruntime.HandleError(fmt.Errorf("failure to decode api object: %v\n'%v' from %#v %#v", err, string(res.Node.Value), res, res.Node)) + // TODO: expose an error through watch.Interface? + // Ignore this value. If we stop the watch on a bad value, a client that uses + // the resourceVersion to resume will never be able to get past a bad value. + return + } + if !w.filter(obj) { + return + } + action := watch.Added + w.emit(watch.Event{ + Type: action, + Object: obj, + }) +} + +func (w *etcdWatcher) sendModify(res *etcd.Response) { + if res.Node == nil { + glog.Errorf("unexpected nil node: %#v", res) + return + } + if w.include != nil && !w.include(res.Node.Key) { + return + } + curObj, err := w.decodeObject(res.Node) + if err != nil { + utilruntime.HandleError(fmt.Errorf("failure to decode api object: %v\n'%v' from %#v %#v", err, string(res.Node.Value), res, res.Node)) + // TODO: expose an error through watch.Interface? + // Ignore this value. If we stop the watch on a bad value, a client that uses + // the resourceVersion to resume will never be able to get past a bad value. + return + } + curObjPasses := w.filter(curObj) + oldObjPasses := false + var oldObj runtime.Object + if res.PrevNode != nil && res.PrevNode.Value != "" { + // Ignore problems reading the old object. + if oldObj, err = w.decodeObject(res.PrevNode); err == nil { + if err := w.versioner.UpdateObject(oldObj, res.Node.ModifiedIndex); err != nil { + utilruntime.HandleError(fmt.Errorf("failure to version api object (%d) %#v: %v", res.Node.ModifiedIndex, oldObj, err)) + } + oldObjPasses = w.filter(oldObj) + } + } + // Some changes to an object may cause it to start or stop matching a filter. + // We need to report those as adds/deletes. So we have to check both the previous + // and current value of the object. + switch { + case curObjPasses && oldObjPasses: + w.emit(watch.Event{ + Type: watch.Modified, + Object: curObj, + }) + case curObjPasses && !oldObjPasses: + w.emit(watch.Event{ + Type: watch.Added, + Object: curObj, + }) + case !curObjPasses && oldObjPasses: + w.emit(watch.Event{ + Type: watch.Deleted, + Object: oldObj, + }) + } + // Do nothing if neither new nor old object passed the filter. +} + +func (w *etcdWatcher) sendDelete(res *etcd.Response) { + if res.PrevNode == nil { + utilruntime.HandleError(fmt.Errorf("unexpected nil prev node: %#v", res)) + return + } + if w.include != nil && !w.include(res.PrevNode.Key) { + return + } + node := *res.PrevNode + if res.Node != nil { + // Note that this sends the *old* object with the etcd index for the time at + // which it gets deleted. This will allow users to restart the watch at the right + // index. + node.ModifiedIndex = res.Node.ModifiedIndex + } + obj, err := w.decodeObject(&node) + if err != nil { + utilruntime.HandleError(fmt.Errorf("failure to decode api object: %v\nfrom %#v %#v", err, res, res.Node)) + // TODO: expose an error through watch.Interface? + // Ignore this value. If we stop the watch on a bad value, a client that uses + // the resourceVersion to resume will never be able to get past a bad value. + return + } + if !w.filter(obj) { + return + } + w.emit(watch.Event{ + Type: watch.Deleted, + Object: obj, + }) +} + +func (w *etcdWatcher) sendResult(res *etcd.Response) { + switch res.Action { + case EtcdCreate, EtcdGet: + // "Get" will only happen in watch 0 case, where we explicitly want ADDED event + // for initial state. + w.sendAdd(res) + case EtcdSet, EtcdCAS: + w.sendModify(res) + case EtcdDelete, EtcdExpire, EtcdCAD: + w.sendDelete(res) + default: + utilruntime.HandleError(fmt.Errorf("unknown action: %v", res.Action)) + } +} + +// ResultChan implements watch.Interface. +func (w *etcdWatcher) ResultChan() <-chan watch.Event { + return w.outgoing +} + +// Stop implements watch.Interface. +func (w *etcdWatcher) Stop() { + w.stopLock.Lock() + if w.cancel != nil { + w.cancel() + w.cancel = nil + } + if !w.stopped { + w.stopped = true + close(w.userStop) + } + w.stopLock.Unlock() + + // Wait until all calls to etcd are finished and no other + // will be issued. + w.wg.Wait() +} diff --git a/pkg/storage/etcd/etcd_watcher_test.go b/pkg/storage/etcd/etcd_watcher_test.go new file mode 100644 index 000000000..cfdb5a7ab --- /dev/null +++ b/pkg/storage/etcd/etcd_watcher_test.go @@ -0,0 +1,562 @@ +/* +Copyright 2014 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package etcd + +import ( + rt "runtime" + "testing" + + apiequality "k8s.io/apimachinery/pkg/api/equality" + apitesting "k8s.io/apimachinery/pkg/api/testing" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/watch" + "k8s.io/apiserver/pkg/apis/example" + examplev1 "k8s.io/apiserver/pkg/apis/example/v1" + "k8s.io/apiserver/pkg/storage" + "k8s.io/kubernetes/pkg/storage/etcd/etcdtest" + etcdtesting "k8s.io/kubernetes/pkg/storage/etcd/testing" + + etcd "github.com/coreos/etcd/client" + "golang.org/x/net/context" +) + +var versioner = APIObjectVersioner{} + +// Implements etcdCache interface as empty methods (i.e. does not cache any objects) +type fakeEtcdCache struct{} + +func (f *fakeEtcdCache) getFromCache(index uint64, filter storage.FilterFunc) (runtime.Object, bool) { + return nil, false +} + +func (f *fakeEtcdCache) addToCache(index uint64, obj runtime.Object) { +} + +var _ etcdCache = &fakeEtcdCache{} + +func TestWatchInterpretations(t *testing.T) { + _, codecs := testScheme(t) + codec := apitesting.TestCodec(codecs, examplev1.SchemeGroupVersion) + // Declare some pods to make the test cases compact. + podFoo := &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo"}} + podBar := &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "bar"}} + podBaz := &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "baz"}} + + // All of these test cases will be run with the firstLetterIsB Filter. + table := map[string]struct { + actions []string // Run this test item for every action here. + prevNodeValue string + nodeValue string + expectEmit bool + expectType watch.EventType + expectObject runtime.Object + }{ + "create": { + actions: []string{"create", "get"}, + nodeValue: runtime.EncodeOrDie(codec, podBar), + expectEmit: true, + expectType: watch.Added, + expectObject: podBar, + }, + "create but filter blocks": { + actions: []string{"create", "get"}, + nodeValue: runtime.EncodeOrDie(codec, podFoo), + expectEmit: false, + }, + "delete": { + actions: []string{"delete"}, + prevNodeValue: runtime.EncodeOrDie(codec, podBar), + expectEmit: true, + expectType: watch.Deleted, + expectObject: podBar, + }, + "delete but filter blocks": { + actions: []string{"delete"}, + nodeValue: runtime.EncodeOrDie(codec, podFoo), + expectEmit: false, + }, + "modify appears to create 1": { + actions: []string{"set", "compareAndSwap"}, + nodeValue: runtime.EncodeOrDie(codec, podBar), + expectEmit: true, + expectType: watch.Added, + expectObject: podBar, + }, + "modify appears to create 2": { + actions: []string{"set", "compareAndSwap"}, + prevNodeValue: runtime.EncodeOrDie(codec, podFoo), + nodeValue: runtime.EncodeOrDie(codec, podBar), + expectEmit: true, + expectType: watch.Added, + expectObject: podBar, + }, + "modify appears to delete": { + actions: []string{"set", "compareAndSwap"}, + prevNodeValue: runtime.EncodeOrDie(codec, podBar), + nodeValue: runtime.EncodeOrDie(codec, podFoo), + expectEmit: true, + expectType: watch.Deleted, + expectObject: podBar, // Should return last state that passed the filter! + }, + "modify modifies": { + actions: []string{"set", "compareAndSwap"}, + prevNodeValue: runtime.EncodeOrDie(codec, podBar), + nodeValue: runtime.EncodeOrDie(codec, podBaz), + expectEmit: true, + expectType: watch.Modified, + expectObject: podBaz, + }, + "modify ignores": { + actions: []string{"set", "compareAndSwap"}, + nodeValue: runtime.EncodeOrDie(codec, podFoo), + expectEmit: false, + }, + } + firstLetterIsB := func(obj runtime.Object) bool { + return obj.(*example.Pod).Name[0] == 'b' + } + for name, item := range table { + for _, action := range item.actions { + w := newEtcdWatcher(true, false, nil, firstLetterIsB, codec, versioner, nil, &fakeEtcdCache{}) + emitCalled := false + w.emit = func(event watch.Event) { + emitCalled = true + if !item.expectEmit { + return + } + if e, a := item.expectType, event.Type; e != a { + t.Errorf("'%v - %v': expected %v, got %v", name, action, e, a) + } + if e, a := item.expectObject, event.Object; !apiequality.Semantic.DeepDerivative(e, a) { + t.Errorf("'%v - %v': expected %v, got %v", name, action, e, a) + } + } + + var n, pn *etcd.Node + if item.nodeValue != "" { + n = &etcd.Node{Value: item.nodeValue} + } + if item.prevNodeValue != "" { + pn = &etcd.Node{Value: item.prevNodeValue} + } + + w.sendResult(&etcd.Response{ + Action: action, + Node: n, + PrevNode: pn, + }) + + if e, a := item.expectEmit, emitCalled; e != a { + t.Errorf("'%v - %v': expected %v, got %v", name, action, e, a) + } + w.Stop() + } + } +} + +func TestWatchInterpretation_ResponseNotSet(t *testing.T) { + _, codecs := testScheme(t) + codec := codecs.LegacyCodec(schema.GroupVersion{Version: "v1"}) + w := newEtcdWatcher(false, false, nil, storage.SimpleFilter(storage.Everything), codec, versioner, nil, &fakeEtcdCache{}) + w.emit = func(e watch.Event) { + t.Errorf("Unexpected emit: %v", e) + } + + w.sendResult(&etcd.Response{ + Action: "update", + }) + w.Stop() +} + +func TestWatchInterpretation_ResponseNoNode(t *testing.T) { + _, codecs := testScheme(t) + codec := codecs.LegacyCodec(schema.GroupVersion{Version: "v1"}) + actions := []string{"create", "set", "compareAndSwap", "delete"} + for _, action := range actions { + w := newEtcdWatcher(false, false, nil, storage.SimpleFilter(storage.Everything), codec, versioner, nil, &fakeEtcdCache{}) + w.emit = func(e watch.Event) { + t.Errorf("Unexpected emit: %v", e) + } + w.sendResult(&etcd.Response{ + Action: action, + }) + w.Stop() + } +} + +func TestWatchInterpretation_ResponseBadData(t *testing.T) { + _, codecs := testScheme(t) + codec := codecs.LegacyCodec(schema.GroupVersion{Version: "v1"}) + actions := []string{"create", "set", "compareAndSwap", "delete"} + for _, action := range actions { + w := newEtcdWatcher(false, false, nil, storage.SimpleFilter(storage.Everything), codec, versioner, nil, &fakeEtcdCache{}) + w.emit = func(e watch.Event) { + t.Errorf("Unexpected emit: %v", e) + } + w.sendResult(&etcd.Response{ + Action: action, + Node: &etcd.Node{ + Value: "foobar", + }, + }) + w.sendResult(&etcd.Response{ + Action: action, + PrevNode: &etcd.Node{ + Value: "foobar", + }, + }) + w.Stop() + } +} + +func TestSendResultDeleteEventHaveLatestIndex(t *testing.T) { + _, codecs := testScheme(t) + codec := apitesting.TestCodec(codecs, examplev1.SchemeGroupVersion) + filter := func(obj runtime.Object) bool { + return obj.(*example.Pod).Name != "bar" + } + w := newEtcdWatcher(false, false, nil, filter, codec, versioner, nil, &fakeEtcdCache{}) + + eventChan := make(chan watch.Event, 1) + w.emit = func(e watch.Event) { + eventChan <- e + } + + fooPod := &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo"}} + barPod := &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "bar"}} + fooBytes, err := runtime.Encode(codec, fooPod) + if err != nil { + t.Fatalf("Encode failed: %v", err) + } + barBytes, err := runtime.Encode(codec, barPod) + if err != nil { + t.Fatalf("Encode failed: %v", err) + } + + tests := []struct { + response *etcd.Response + expRV string + }{{ // Delete event + response: &etcd.Response{ + Action: EtcdDelete, + Node: &etcd.Node{ + ModifiedIndex: 2, + }, + PrevNode: &etcd.Node{ + Value: string(fooBytes), + ModifiedIndex: 1, + }, + }, + expRV: "2", + }, { // Modify event with uninterested data + response: &etcd.Response{ + Action: EtcdSet, + Node: &etcd.Node{ + Value: string(barBytes), + ModifiedIndex: 2, + }, + PrevNode: &etcd.Node{ + Value: string(fooBytes), + ModifiedIndex: 1, + }, + }, + expRV: "2", + }} + + for i, tt := range tests { + w.sendResult(tt.response) + ev := <-eventChan + if ev.Type != watch.Deleted { + t.Errorf("#%d: event type want=Deleted, get=%s", i, ev.Type) + return + } + rv := ev.Object.(*example.Pod).ResourceVersion + if rv != tt.expRV { + t.Errorf("#%d: resource version want=%s, get=%s", i, tt.expRV, rv) + } + } + w.Stop() +} + +func TestWatch(t *testing.T) { + scheme, codecs := testScheme(t) + codec := apitesting.TestCodec(codecs, examplev1.SchemeGroupVersion) + server := etcdtesting.NewEtcdTestClientServer(t) + defer server.Terminate(t) + key := "/some/key" + h := newEtcdHelper(server.Client, scheme, codec, etcdtest.PathPrefix()) + + watching, err := h.Watch(context.TODO(), key, "0", storage.Everything) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + // watching is explicitly closed below. + + // Test normal case + pod := &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo"}} + returnObj := &example.Pod{} + err = h.Create(context.TODO(), key, pod, returnObj, 0) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + event := <-watching.ResultChan() + if e, a := watch.Added, event.Type; e != a { + t.Errorf("Expected %v, got %v", e, a) + } + if e, a := pod, event.Object; !apiequality.Semantic.DeepDerivative(e, a) { + t.Errorf("Expected %v, got %v", e, a) + } + + watching.Stop() + + // There is a race in etcdWatcher so that after calling Stop() one of + // two things can happen: + // - ResultChan() may be closed (triggered by closing userStop channel) + // - an Error "context cancelled" may be emitted (triggered by cancelling request + // to etcd and putting that error to etcdError channel) + // We need to be prepared for both here. + event, open := <-watching.ResultChan() + if open && event.Type != watch.Error { + t.Errorf("Unexpected event from stopped watcher: %#v", event) + } +} + +func TestWatchEtcdState(t *testing.T) { + scheme, codecs := testScheme(t) + codec := apitesting.TestCodec(codecs, examplev1.SchemeGroupVersion) + key := "/somekey/foo" + server := etcdtesting.NewEtcdTestClientServer(t) + defer server.Terminate(t) + + h := newEtcdHelper(server.Client, scheme, codec, etcdtest.PathPrefix()) + watching, err := h.Watch(context.TODO(), key, "0", storage.Everything) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + defer watching.Stop() + + pod := &example.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "foo"}, + } + + err = h.Create(context.TODO(), key, pod, pod, 0) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + event := <-watching.ResultChan() + if event.Type != watch.Added { + t.Errorf("Unexpected event %#v", event) + } + + pod.ResourceVersion = "" + pod.Status = example.PodStatus{ + Phase: example.PodPhase("Running"), + } + + // CAS the previous value + updateFn := func(input runtime.Object, res storage.ResponseMeta) (runtime.Object, *uint64, error) { + newObj, err := scheme.DeepCopy(pod) + if err != nil { + t.Errorf("unexpected error: %v", err) + return nil, nil, err + } + return newObj.(*example.Pod), nil, nil + } + err = h.GuaranteedUpdate(context.TODO(), key, &example.Pod{}, false, nil, updateFn) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + event = <-watching.ResultChan() + if event.Type != watch.Modified { + t.Errorf("Unexpected event %#v", event) + } + + if e, a := pod, event.Object; !apiequality.Semantic.DeepDerivative(e, a) { + t.Errorf("Unexpected error: expected %#v, got %#v", e, a) + } +} + +func TestWatchFromZeroIndex(t *testing.T) { + scheme, codecs := testScheme(t) + codec := apitesting.TestCodec(codecs, examplev1.SchemeGroupVersion) + pod := &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo"}} + + key := "/somekey/foo" + server := etcdtesting.NewEtcdTestClientServer(t) + defer server.Terminate(t) + + h := newEtcdHelper(server.Client, scheme, codec, etcdtest.PathPrefix()) + + // set before the watch and verify events + err := h.Create(context.TODO(), key, pod, pod, 0) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + pod.ResourceVersion = "" + + watching, err := h.Watch(context.TODO(), key, "0", storage.Everything) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + // The create trigger ADDED event when watching from 0 + event := <-watching.ResultChan() + watching.Stop() + if event.Type != watch.Added { + t.Errorf("Unexpected event %#v", event) + } + + // check for concatenation on watch event with CAS + updateFn := func(input runtime.Object, res storage.ResponseMeta) (runtime.Object, *uint64, error) { + pod := input.(*example.Pod) + pod.Name = "bar" + return pod, nil, nil + } + err = h.GuaranteedUpdate(context.TODO(), key, &example.Pod{}, false, nil, updateFn) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + watching, err = h.Watch(context.TODO(), key, "0", storage.Everything) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + defer watching.Stop() + + // because we watch from 0, first event that we receive will always be ADDED + event = <-watching.ResultChan() + if event.Type != watch.Added { + t.Errorf("Unexpected event %#v", event) + } + + pod.Name = "baz" + updateFn = func(input runtime.Object, res storage.ResponseMeta) (runtime.Object, *uint64, error) { + pod := input.(*example.Pod) + pod.Name = "baz" + return pod, nil, nil + } + err = h.GuaranteedUpdate(context.TODO(), key, &example.Pod{}, false, nil, updateFn) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + event = <-watching.ResultChan() + if event.Type != watch.Modified { + t.Errorf("Unexpected event %#v", event) + } + + if e, a := pod, event.Object; a == nil || !apiequality.Semantic.DeepDerivative(e, a) { + t.Errorf("Unexpected error: expected %#v, got %#v", e, a) + } +} + +func TestWatchListFromZeroIndex(t *testing.T) { + scheme, codecs := testScheme(t) + codec := apitesting.TestCodec(codecs, examplev1.SchemeGroupVersion) + prefix := "/some/key" + server := etcdtesting.NewEtcdTestClientServer(t) + defer server.Terminate(t) + h := newEtcdHelper(server.Client, scheme, codec, prefix) + + watching, err := h.WatchList(context.TODO(), "/", "0", storage.Everything) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + defer watching.Stop() + + // creates foo which should trigger the WatchList for "/" + pod := &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo"}} + err = h.Create(context.TODO(), pod.Name, pod, pod, 0) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + event, _ := <-watching.ResultChan() + if event.Type != watch.Added { + t.Errorf("Unexpected event %#v", event) + } + + if e, a := pod, event.Object; !apiequality.Semantic.DeepDerivative(e, a) { + t.Errorf("Unexpected error: expected %v, got %v", e, a) + } +} + +func TestWatchListIgnoresRootKey(t *testing.T) { + scheme, codecs := testScheme(t) + codec := apitesting.TestCodec(codecs, examplev1.SchemeGroupVersion) + pod := &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo"}} + key := "/some/key" + server := etcdtesting.NewEtcdTestClientServer(t) + defer server.Terminate(t) + h := newEtcdHelper(server.Client, scheme, codec, key) + + watching, err := h.WatchList(context.TODO(), key, "0", storage.Everything) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + defer watching.Stop() + + // creates key/foo which should trigger the WatchList for "key" + err = h.Create(context.TODO(), key, pod, pod, 0) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + // force context switch to ensure watches would catch and notify. + rt.Gosched() + + select { + case event, _ := <-watching.ResultChan(): + t.Fatalf("Unexpected event: %#v", event) + default: + // fall through, expected behavior + } +} + +func TestWatchPurposefulShutdown(t *testing.T) { + scheme, codecs := testScheme(t) + codec := codecs.LegacyCodec(schema.GroupVersion{Version: "v1"}) + server := etcdtesting.NewEtcdTestClientServer(t) + defer server.Terminate(t) + key := "/some/key" + h := newEtcdHelper(server.Client, scheme, codec, etcdtest.PathPrefix()) + + // Test purposeful shutdown + watching, err := h.Watch(context.TODO(), key, "0", storage.Everything) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + watching.Stop() + rt.Gosched() + + // There is a race in etcdWatcher so that after calling Stop() one of + // two things can happen: + // - ResultChan() may be closed (triggered by closing userStop channel) + // - an Error "context cancelled" may be emitted (triggered by cancelling request + // to etcd and putting that error to etcdError channel) + // We need to be prepared for both here. + event, open := <-watching.ResultChan() + if open && event.Type != watch.Error { + t.Errorf("Unexpected event from stopped watcher: %#v", event) + } +} diff --git a/pkg/storage/etcd/etcdtest/doc.go b/pkg/storage/etcd/etcdtest/doc.go new file mode 100644 index 000000000..23f749e5e --- /dev/null +++ b/pkg/storage/etcd/etcdtest/doc.go @@ -0,0 +1,17 @@ +/* +Copyright 2014 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package etcdtest // import "k8s.io/kubernetes/pkg/storage/etcd/etcdtest" diff --git a/pkg/storage/etcd/etcdtest/etcdtest.go b/pkg/storage/etcd/etcdtest/etcdtest.go new file mode 100644 index 000000000..a4a9ce243 --- /dev/null +++ b/pkg/storage/etcd/etcdtest/etcdtest.go @@ -0,0 +1,39 @@ +/* +Copyright 2014 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package etcdtest + +import ( + "os" + "path" +) + +// Cache size to use for tests. +const DeserializationCacheSize = 150 + +// Returns the prefix set via the ETCD_PREFIX environment variable (if any). +func PathPrefix() string { + pref := os.Getenv("ETCD_PREFIX") + if pref == "" { + pref = "registry" + } + return path.Join("/", pref) +} + +// Adds the ETCD_PREFIX to the provided key +func AddPrefix(in string) string { + return path.Join(PathPrefix(), in) +} diff --git a/pkg/storage/etcd/testing/testingcert/certificates.go b/pkg/storage/etcd/testing/testingcert/certificates.go new file mode 100644 index 000000000..3eecfda60 --- /dev/null +++ b/pkg/storage/etcd/testing/testingcert/certificates.go @@ -0,0 +1,113 @@ +/* +Copyright 2014 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package testingcert + +// You can use cfssl tool to generate certificates, please refer +// https://github.com/coreos/etcd/tree/master/hack/tls-setup for more details. +// +// ca-config.json: +// expiry was changed from 1 year to 100 years (876000h) +// ca-csr.json: +// ca expiry was set to 100 years (876000h) ("ca":{"expiry":"876000h"}) +// key was changed from ecdsa,384 to rsa,2048 +// req-csr.json: +// key was changed from ecdsa,384 to rsa,2048 +// hosts were changed to "localhost","127.0.0.1" +const CAFileContent = ` +-----BEGIN CERTIFICATE----- +MIIEUDCCAzigAwIBAgIUKfV5+qwlw3JneAPdJS7JCO8xIlYwDQYJKoZIhvcNAQEL +BQAwgawxCzAJBgNVBAYTAlVTMSowKAYDVQQKEyFIb25lc3QgQWNobWVkJ3MgVXNl +ZCBDZXJ0aWZpY2F0ZXMxKTAnBgNVBAsTIEhhc3RpbHktR2VuZXJhdGVkIFZhbHVl +cyBEaXZpc29uMRYwFAYDVQQHEw1TYW4gRnJhbmNpc2NvMRMwEQYDVQQIEwpDYWxp +Zm9ybmlhMRkwFwYDVQQDExBBdXRvZ2VuZXJhdGVkIENBMCAXDTE2MDMxMjIzMTQw +MFoYDzIxMTYwMjE3MjMxNDAwWjCBrDELMAkGA1UEBhMCVVMxKjAoBgNVBAoTIUhv +bmVzdCBBY2htZWQncyBVc2VkIENlcnRpZmljYXRlczEpMCcGA1UECxMgSGFzdGls +eS1HZW5lcmF0ZWQgVmFsdWVzIERpdmlzb24xFjAUBgNVBAcTDVNhbiBGcmFuY2lz +Y28xEzARBgNVBAgTCkNhbGlmb3JuaWExGTAXBgNVBAMTEEF1dG9nZW5lcmF0ZWQg +Q0EwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDP+acpr1USrObZFu+6 +v+Bk6rYw+sWynP373cNUUiHfnZ3D7f9yJsDscV0Mo4R8DddqkxawrA5fK2Fm2Z9G +vvY5par4/JbwRIEkXmeM4e52Mqv0Yuoz62O+0jQvRawnCCJMcKuo+ijHMjmm0AF1 +JdhTpTgvUwEP9WtY9JVTkfMCnDqZiqOU5D+d4YWUtkKqgQNvbZRs6wGubhMCZe8X +m+3bK8YAsWWtoFgr7plxXk4D8MLh+PqJ3oJjfxfW5A9dHbnSEmdZ3vrYwrKgyfNf +bvHE5qQmiSZUbUaCw3mKfaEMCNesPT46nBHxhAWc5aiL1tOXzvV5Uze7A7huPoI9 +a3etAgMBAAGjZjBkMA4GA1UdDwEB/wQEAwIBBjASBgNVHRMBAf8ECDAGAQH/AgEC +MB0GA1UdDgQWBBQYc0xXQ6VNjFvIOqWfXorxx9rKRzAfBgNVHSMEGDAWgBQYc0xX +Q6VNjFvIOqWfXorxx9rKRzANBgkqhkiG9w0BAQsFAAOCAQEAaKyHDWYVjEyEKTXJ +qS9r46ehL5FZlWD2ZytBP8aHE307l9AfQ+DFWldCNaqMXLZozsresVaSzSOI6UUD +lCIQLDpPyxbpR320u8mC08+lhhwR/YRkrEqKHk56Wl4OaqoyWmguqYU9p0DiQeTU +sZsxOwG7cyEEvvs+XmZ/vBLBOr59xyjwn4seQqzwZj3VYeiKLw40iQt1yT442rcP +CfdlE9wTEONvWT+kBGMt0JlalXH3jFvlfcGQdDfRmDeTJtA+uIbvJhwJuGCNHHAc +xqC+4mAGBPN/dMPXpjayHD5dOXIKLfrNpqse6jImYlY9zduvwIHRDK/zvqTyPlNZ +uR84Nw== +-----END CERTIFICATE----- +` +const CertFileContent = ` +-----BEGIN CERTIFICATE----- +MIIELzCCAxegAwIBAgIUcjkJA3cmHeoBQggaKZmfKebFL9cwDQYJKoZIhvcNAQEL +BQAwgawxCzAJBgNVBAYTAlVTMSowKAYDVQQKEyFIb25lc3QgQWNobWVkJ3MgVXNl +ZCBDZXJ0aWZpY2F0ZXMxKTAnBgNVBAsTIEhhc3RpbHktR2VuZXJhdGVkIFZhbHVl +cyBEaXZpc29uMRYwFAYDVQQHEw1TYW4gRnJhbmNpc2NvMRMwEQYDVQQIEwpDYWxp +Zm9ybmlhMRkwFwYDVQQDExBBdXRvZ2VuZXJhdGVkIENBMCAXDTE2MDMxMjIzMTQw +MFoYDzIxMTYwMjE3MjMxNDAwWjBVMRYwFAYDVQQKEw1hdXRvZ2VuZXJhdGVkMRUw +EwYDVQQLEwxldGNkIGNsdXN0ZXIxFTATBgNVBAcTDHRoZSBpbnRlcm5ldDENMAsG +A1UEAxMEZXRjZDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAOiW5A65 +hWGbnwceoZHM0+OexU4cPF/FpP+7BOK5i7ymSWAqfKfNuio2TB1lAErC1oX7bgTX +ieP10uz3FYWQNrlDn0I4KSA888rFPtx8GwoxH/52fGlE80BUV9PNeOVP+mYza0ih +oFj2+PhXVL/JZbx9P/2RLSNbEnq+OPk8AN82SkNtpFzanwtpb3f+kt73878KNoQu +xYZaCF1sK45Kn7mjKSDu/b3xUbTrNwnyVAGOdLzI7CCWOu+ECoZYAH4ZNHHakbyY +eWQ7U9leocEOPlqxsQAKodaCYjuAaOFIcz8/W81q+3qNw/6GbZ4znjRKQ3OtIPZ4 +JH1iNofCudWDp+0CAwEAAaOBnDCBmTAOBgNVHQ8BAf8EBAMCBaAwHQYDVR0lBBYw +FAYIKwYBBQUHAwEGCCsGAQUFBwMCMAwGA1UdEwEB/wQCMAAwHQYDVR0OBBYEFMJE +43qLCWhyZAE/wxNneSJw7aUVMB8GA1UdIwQYMBaAFBhzTFdDpU2MW8g6pZ9eivHH +2spHMBoGA1UdEQQTMBGCCWxvY2FsaG9zdIcEfwAAATANBgkqhkiG9w0BAQsFAAOC +AQEAuELC8tbmpyKlA4HLSDHOUquypNyiE6ftBIifJtp8bvBd+jiv4Pr8oVGxHoqq +48X7lamvDirLV5gmK0CxO+EXkIUHhULzPyYPynqsR7KZlk1PWghqsF65nwqcjS3b +tykLttD1AUDIozYvujVYBKXGxb6jcGM1rBF1XtslciFZ5qQnj6dTUujo9/xBA2ql +kOKiVXBNU8KFzq4c20RzHFLfWkbc30Q4XG4dTDVBeGupnFQRkZ0y2dSSU82QcLA/ +HgAyQSO7+csN13r84zbmDuRpUgo6eTXzJ+77G19KDkEL7XEtlw2jB2L6/o+3RGtw +JLOpEsgi7hsvOYCuTA3Krw52Mw== +-----END CERTIFICATE----- +` +const KeyFileContent = ` +-----BEGIN RSA PRIVATE KEY----- +MIIEpAIBAAKCAQEA6JbkDrmFYZufBx6hkczT457FThw8X8Wk/7sE4rmLvKZJYCp8 +p826KjZMHWUASsLWhftuBNeJ4/XS7PcVhZA2uUOfQjgpIDzzysU+3HwbCjEf/nZ8 +aUTzQFRX08145U/6ZjNrSKGgWPb4+FdUv8llvH0//ZEtI1sSer44+TwA3zZKQ22k +XNqfC2lvd/6S3vfzvwo2hC7FhloIXWwrjkqfuaMpIO79vfFRtOs3CfJUAY50vMjs +IJY674QKhlgAfhk0cdqRvJh5ZDtT2V6hwQ4+WrGxAAqh1oJiO4Bo4UhzPz9bzWr7 +eo3D/oZtnjOeNEpDc60g9ngkfWI2h8K51YOn7QIDAQABAoIBAQCj88Fc08++x0kp +ZqEzunPebsvcTLEOPa8aiUVfYLWszHbKsAhg7Pb+zHmI+upiyMcZeOvLw/eyVlVR +rrZgCRFaNN2texMaY3zigXnXSDBzVb+cyv7V4cGqpgmnBp7i3ia/Jh3I/A2gyK8l +t8HI03nAjXWvE0gDNS5okXBt16sxq6ZWyzHHVbN3UYtCDxnyh2Ibck4b+K8I8Bn1 +mwMsSqPXJS1UQ3U5UqcaMs7WOEGx+xmaPJTWm5Lb//BkakGuBTQj+7wotyXQYG5U +uZdPPcFRk6cqgjzUeKVUtGkdmfgHSTdIwZowkKibB4rdrudsRnSwfeB+83Jp9JwG +JPrGvsbNAoGBAPULIO+vVBZXVpUEAhvNSXtmOi/hAbQhOuix8iwHbJ5EbrWaDn4B +Reb2cw/fZGgGG4jtAOXdiY8R1XGGP8+RPZ5El10ZWnNrKQfpZ27gK/5yeq5dfGBG +4JLUpcrT180FJo00rgiQYJnHCk1fWrnzXNV6K08ZZHGr6yv4S/jbq/7vAoGBAPL9 +NTN/UWXWFlSHVcb2dFHcvIiPwRj9KwhuMu90b/CilBbSJ1av13xtf2ar5zkrEtWH +CB3q3wBaklQP9MfOqEWGZeOUcd9AbYWtxHjHmP5fJA9RjErjlTtqGkusNtZJbchU +UWfT/Tl9pREpCvJ/8iawc1hx7sHHKzYwnDnMaQbjAoGAfJdd9cBltr5NjZLuJ4in +dhCyQSncnePPegUQJwbXWVleGQPtnm+zRQ3Fzyo8eQ+x7Frk+/s6N/5PUlt6EmW8 +uL4TYAjGDq1LvXQVXTCp7cPzULjDxogDI2Tvr0MrFFksEtvYKQ6Pr2CeglybWrS8 +XOazIpK8mXdaKY8jwbKfrw0CgYAFnfrb3OaZzxAnFhXSiqH3vn2RPpl9JWUYRcvh +ozRvQKLhwCvuohP+KV3XlsO6m5dM3lk+r85F6NIXJWNINyvGp6u1ThovygJ+I502 +GY8c2kAwJndyx74MaJCBDVMbMwlZpzFWkBz7dj8ZnXRGVNTZNh0Ef2XAjwUdtJP3 +9hS7dwKBgQDCzq0RIxFyy3F5baGHWLVICxmhNExQ2+Vebh+DvsPKtnz6OrWdRbGX +wgGVLrn53s6eCblnXLtKr/Li+t7fS8IkQkvu5guOvI9VeVUmZhFET3GVmUxu+JTb +iQY4uBgaf8Fgay4dkOfjvlOpFDR4E7UbJpg8/cFKTrpwgOiUVyFVdQ== +-----END RSA PRIVATE KEY----- +` diff --git a/pkg/storage/etcd/testing/utils.go b/pkg/storage/etcd/testing/utils.go new file mode 100644 index 000000000..9e570243d --- /dev/null +++ b/pkg/storage/etcd/testing/utils.go @@ -0,0 +1,327 @@ +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package testing + +import ( + "fmt" + "io/ioutil" + "net" + "net/http" + "net/http/httptest" + "os" + "path" + "testing" + "time" + + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/apiserver/pkg/storage/storagebackend" + "k8s.io/kubernetes/pkg/storage/etcd/etcdtest" + "k8s.io/kubernetes/pkg/storage/etcd/testing/testingcert" + + etcd "github.com/coreos/etcd/client" + "github.com/coreos/etcd/clientv3" + "github.com/coreos/etcd/etcdserver" + "github.com/coreos/etcd/etcdserver/api/v2http" + "github.com/coreos/etcd/integration" + "github.com/coreos/etcd/pkg/testutil" + "github.com/coreos/etcd/pkg/transport" + "github.com/coreos/etcd/pkg/types" + "github.com/golang/glog" + "golang.org/x/net/context" +) + +// EtcdTestServer encapsulates the datastructures needed to start local instance for testing +type EtcdTestServer struct { + // The following are lumped etcd2 test server params + // TODO: Deprecate in a post 1.5 release + etcdserver.ServerConfig + PeerListeners, ClientListeners []net.Listener + Client etcd.Client + + CertificatesDir string + CertFile string + KeyFile string + CAFile string + + raftHandler http.Handler + s *etcdserver.EtcdServer + hss []*httptest.Server + + // The following are lumped etcd3 test server params + v3Cluster *integration.ClusterV3 + V3Client *clientv3.Client +} + +// newLocalListener opens a port localhost using any port +func newLocalListener(t *testing.T) net.Listener { + l, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatal(err) + } + return l +} + +// newSecuredLocalListener opens a port localhost using any port +// with SSL enable +func newSecuredLocalListener(t *testing.T, certFile, keyFile, caFile string) net.Listener { + var l net.Listener + l, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatal(err) + } + tlsInfo := transport.TLSInfo{ + CertFile: certFile, + KeyFile: keyFile, + CAFile: caFile, + } + tlscfg, err := tlsInfo.ServerConfig() + if err != nil { + t.Fatalf("unexpected serverConfig error: %v", err) + } + l, err = transport.NewKeepAliveListener(l, "https", tlscfg) + if err != nil { + t.Fatal(err) + } + return l +} + +func newHttpTransport(t *testing.T, certFile, keyFile, caFile string) etcd.CancelableTransport { + tlsInfo := transport.TLSInfo{ + CertFile: certFile, + KeyFile: keyFile, + CAFile: caFile, + } + tr, err := transport.NewTransport(tlsInfo, time.Second) + if err != nil { + t.Fatal(err) + } + return tr +} + +// configureTestCluster will set the params to start an etcd server +func configureTestCluster(t *testing.T, name string, https bool) *EtcdTestServer { + var err error + m := &EtcdTestServer{} + + pln := newLocalListener(t) + m.PeerListeners = []net.Listener{pln} + m.PeerURLs, err = types.NewURLs([]string{"http://" + pln.Addr().String()}) + if err != nil { + t.Fatal(err) + } + + // Allow test launches to control where etcd data goes, for space or performance reasons + baseDir := os.Getenv("TEST_ETCD_DIR") + if len(baseDir) == 0 { + baseDir = os.TempDir() + } + + if https { + m.CertificatesDir, err = ioutil.TempDir(baseDir, "etcd_certificates") + if err != nil { + t.Fatal(err) + } + m.CertFile = path.Join(m.CertificatesDir, "etcdcert.pem") + if err = ioutil.WriteFile(m.CertFile, []byte(testingcert.CertFileContent), 0644); err != nil { + t.Fatal(err) + } + m.KeyFile = path.Join(m.CertificatesDir, "etcdkey.pem") + if err = ioutil.WriteFile(m.KeyFile, []byte(testingcert.KeyFileContent), 0644); err != nil { + t.Fatal(err) + } + m.CAFile = path.Join(m.CertificatesDir, "ca.pem") + if err = ioutil.WriteFile(m.CAFile, []byte(testingcert.CAFileContent), 0644); err != nil { + t.Fatal(err) + } + + cln := newSecuredLocalListener(t, m.CertFile, m.KeyFile, m.CAFile) + m.ClientListeners = []net.Listener{cln} + m.ClientURLs, err = types.NewURLs([]string{"https://" + cln.Addr().String()}) + if err != nil { + t.Fatal(err) + } + } else { + cln := newLocalListener(t) + m.ClientListeners = []net.Listener{cln} + m.ClientURLs, err = types.NewURLs([]string{"http://" + cln.Addr().String()}) + if err != nil { + t.Fatal(err) + } + } + + m.Name = name + m.DataDir, err = ioutil.TempDir(baseDir, "etcd") + if err != nil { + t.Fatal(err) + } + + clusterStr := fmt.Sprintf("%s=http://%s", name, pln.Addr().String()) + m.InitialPeerURLsMap, err = types.NewURLsMap(clusterStr) + if err != nil { + t.Fatal(err) + } + m.InitialClusterToken = "TestEtcd" + m.NewCluster = true + m.ForceNewCluster = false + m.ElectionTicks = 10 + m.TickMs = uint(10) + + return m +} + +// launch will attempt to start the etcd server +func (m *EtcdTestServer) launch(t *testing.T) error { + var err error + if m.s, err = etcdserver.NewServer(&m.ServerConfig); err != nil { + return fmt.Errorf("failed to initialize the etcd server: %v", err) + } + m.s.SyncTicker = time.Tick(500 * time.Millisecond) + m.s.Start() + m.raftHandler = &testutil.PauseableHandler{Next: v2http.NewPeerHandler(m.s)} + for _, ln := range m.PeerListeners { + hs := &httptest.Server{ + Listener: ln, + Config: &http.Server{Handler: m.raftHandler}, + } + hs.Start() + m.hss = append(m.hss, hs) + } + for _, ln := range m.ClientListeners { + hs := &httptest.Server{ + Listener: ln, + Config: &http.Server{Handler: v2http.NewClientHandler(m.s, m.ServerConfig.ReqTimeout())}, + } + hs.Start() + m.hss = append(m.hss, hs) + } + return nil +} + +// waitForEtcd wait until etcd is propagated correctly +func (m *EtcdTestServer) waitUntilUp() error { + membersAPI := etcd.NewMembersAPI(m.Client) + for start := time.Now(); time.Since(start) < wait.ForeverTestTimeout; time.Sleep(10 * time.Millisecond) { + members, err := membersAPI.List(context.TODO()) + if err != nil { + glog.Errorf("Error when getting etcd cluster members") + continue + } + if len(members) == 1 && len(members[0].ClientURLs) > 0 { + return nil + } + } + return fmt.Errorf("timeout on waiting for etcd cluster") +} + +// Terminate will shutdown the running etcd server +func (m *EtcdTestServer) Terminate(t *testing.T) { + if m.v3Cluster != nil { + m.v3Cluster.Terminate(t) + } else { + m.Client = nil + m.s.Stop() + // TODO: This is a pretty ugly hack to workaround races during closing + // in-memory etcd server in unit tests - see #18928 for more details. + // We should get rid of it as soon as we have a proper fix - etcd clients + // have overwritten transport counting opened connections (probably by + // overwriting Dial function) and termination function waiting for all + // connections to be closed and stopping accepting new ones. + time.Sleep(250 * time.Millisecond) + for _, hs := range m.hss { + hs.CloseClientConnections() + hs.Close() + } + if err := os.RemoveAll(m.ServerConfig.DataDir); err != nil { + t.Fatal(err) + } + if len(m.CertificatesDir) > 0 { + if err := os.RemoveAll(m.CertificatesDir); err != nil { + t.Fatal(err) + } + } + } +} + +// NewEtcdTestClientServer DEPRECATED creates a new client and server for testing +func NewEtcdTestClientServer(t *testing.T) *EtcdTestServer { + server := configureTestCluster(t, "foo", true) + err := server.launch(t) + if err != nil { + t.Fatalf("Failed to start etcd server error=%v", err) + return nil + } + + cfg := etcd.Config{ + Endpoints: server.ClientURLs.StringSlice(), + Transport: newHttpTransport(t, server.CertFile, server.KeyFile, server.CAFile), + } + server.Client, err = etcd.New(cfg) + if err != nil { + server.Terminate(t) + t.Fatalf("Unexpected error in NewEtcdTestClientServer (%v)", err) + return nil + } + if err := server.waitUntilUp(); err != nil { + server.Terminate(t) + t.Fatalf("Unexpected error in waitUntilUp (%v)", err) + return nil + } + return server +} + +// NewUnsecuredEtcdTestClientServer DEPRECATED creates a new client and server for testing +func NewUnsecuredEtcdTestClientServer(t *testing.T) *EtcdTestServer { + server := configureTestCluster(t, "foo", false) + err := server.launch(t) + if err != nil { + t.Fatalf("Failed to start etcd server error=%v", err) + return nil + } + cfg := etcd.Config{ + Endpoints: server.ClientURLs.StringSlice(), + Transport: newHttpTransport(t, server.CertFile, server.KeyFile, server.CAFile), + } + server.Client, err = etcd.New(cfg) + if err != nil { + t.Errorf("Unexpected error in NewUnsecuredEtcdTestClientServer (%v)", err) + server.Terminate(t) + return nil + } + if err := server.waitUntilUp(); err != nil { + t.Errorf("Unexpected error in waitUntilUp (%v)", err) + server.Terminate(t) + return nil + } + return server +} + +// NewEtcd3TestClientServer creates a new client and server for testing +func NewUnsecuredEtcd3TestClientServer(t *testing.T, scheme *runtime.Scheme) (*EtcdTestServer, *storagebackend.Config) { + server := &EtcdTestServer{ + v3Cluster: integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}), + } + server.V3Client = server.v3Cluster.RandClient() + config := &storagebackend.Config{ + Type: "etcd3", + Prefix: etcdtest.PathPrefix(), + ServerList: server.V3Client.Endpoints(), + DeserializationCacheSize: etcdtest.DeserializationCacheSize, + Copier: scheme, + } + return server, config +} diff --git a/pkg/storage/etcd/util/doc.go b/pkg/storage/etcd/util/doc.go new file mode 100644 index 000000000..d10b9a992 --- /dev/null +++ b/pkg/storage/etcd/util/doc.go @@ -0,0 +1,19 @@ +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package util holds generic etcd-related utility functions that any user of ectd might want to +// use, without pulling in kubernetes-specific code. +package util // import "k8s.io/kubernetes/pkg/storage/etcd/util" diff --git a/pkg/storage/etcd/util/etcd_util.go b/pkg/storage/etcd/util/etcd_util.go new file mode 100644 index 000000000..7c71fe24f --- /dev/null +++ b/pkg/storage/etcd/util/etcd_util.go @@ -0,0 +1,99 @@ +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package util + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "net/http" + + etcd "github.com/coreos/etcd/client" +) + +// IsEtcdNotFound returns true if and only if err is an etcd not found error. +func IsEtcdNotFound(err error) bool { + return isEtcdErrorNum(err, etcd.ErrorCodeKeyNotFound) +} + +// IsEtcdNodeExist returns true if and only if err is an etcd node already exist error. +func IsEtcdNodeExist(err error) bool { + return isEtcdErrorNum(err, etcd.ErrorCodeNodeExist) +} + +// IsEtcdTestFailed returns true if and only if err is an etcd write conflict. +func IsEtcdTestFailed(err error) bool { + return isEtcdErrorNum(err, etcd.ErrorCodeTestFailed) +} + +// IsEtcdWatchExpired returns true if and only if err indicates the watch has expired. +func IsEtcdWatchExpired(err error) bool { + // NOTE: This seems weird why it wouldn't be etcd.ErrorCodeWatcherCleared + // I'm using the previous matching value + return isEtcdErrorNum(err, etcd.ErrorCodeEventIndexCleared) +} + +// IsEtcdUnreachable returns true if and only if err indicates the server could not be reached. +func IsEtcdUnreachable(err error) bool { + // NOTE: The logic has changed previous error code no longer applies + return err == etcd.ErrClusterUnavailable +} + +// isEtcdErrorNum returns true if and only if err is an etcd error, whose errorCode matches errorCode +func isEtcdErrorNum(err error, errorCode int) bool { + if err != nil { + if etcdError, ok := err.(etcd.Error); ok { + return etcdError.Code == errorCode + } + // NOTE: There are other error types returned + } + return false +} + +// GetEtcdVersion performs a version check against the provided Etcd server, +// returning the string response, and error (if any). +func GetEtcdVersion(host string) (string, error) { + response, err := http.Get(host + "/version") + if err != nil { + return "", err + } + defer response.Body.Close() + if response.StatusCode != http.StatusOK { + return "", fmt.Errorf("unsuccessful response from etcd server %q: %v", host, err) + } + versionBytes, err := ioutil.ReadAll(response.Body) + if err != nil { + return "", err + } + return string(versionBytes), nil +} + +type etcdHealth struct { + // Note this has to be public so the json library can modify it. + Health string `json:"health"` +} + +func EtcdHealthCheck(data []byte) error { + obj := etcdHealth{} + if err := json.Unmarshal(data, &obj); err != nil { + return err + } + if obj.Health != "true" { + return fmt.Errorf("Unhealthy status: %s", obj.Health) + } + return nil +} diff --git a/pkg/storage/etcd/util/etcd_util_test.go b/pkg/storage/etcd/util/etcd_util_test.go new file mode 100644 index 000000000..de764795d --- /dev/null +++ b/pkg/storage/etcd/util/etcd_util_test.go @@ -0,0 +1,120 @@ +/* +Copyright 2014 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package util + +import ( + "fmt" + "math/rand" + "net" + "net/http" + "net/http/httptest" + "strconv" + "testing" + "time" + + etcd "github.com/coreos/etcd/client" + "github.com/stretchr/testify/assert" +) + +const validEtcdVersion = "etcd 2.0.9" + +func TestIsEtcdNotFound(t *testing.T) { + try := func(err error, isNotFound bool) { + if IsEtcdNotFound(err) != isNotFound { + t.Errorf("Expected %#v to return %v, but it did not", err, isNotFound) + } + } + try(&etcd.Error{Code: 101}, false) + try(nil, false) + try(fmt.Errorf("some other kind of error"), false) +} + +func TestGetEtcdVersion_ValidVersion(t *testing.T) { + testServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fmt.Fprint(w, validEtcdVersion) + })) + defer testServer.Close() + + var version string + var err error + if version, err = GetEtcdVersion(testServer.URL); err != nil { + t.Errorf("Unexpected error: %v", err) + } + assert.Equal(t, validEtcdVersion, version, "Unexpected version") + assert.Nil(t, err) +} + +func TestGetEtcdVersion_ErrorStatus(t *testing.T) { + testServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusServiceUnavailable) + })) + defer testServer.Close() + + _, err := GetEtcdVersion(testServer.URL) + assert.NotNil(t, err) +} + +func TestGetEtcdVersion_NotListening(t *testing.T) { + portIsOpen := func(port int) bool { + conn, err := net.DialTimeout("tcp", "127.0.0.1:"+strconv.Itoa(port), 1*time.Second) + if err == nil { + conn.Close() + return true + } + return false + } + + port := rand.Intn((1 << 16) - 1) + for tried := 0; portIsOpen(port); tried++ { + if tried >= 10 { + t.Fatal("Couldn't find a closed TCP port to continue testing") + } + port++ + } + + _, err := GetEtcdVersion("http://127.0.0.1:" + strconv.Itoa(port)) + assert.NotNil(t, err) +} + +func TestEtcdHealthCheck(t *testing.T) { + tests := []struct { + data string + expectErr bool + }{ + { + data: "{\"health\": \"true\"}", + expectErr: false, + }, + { + data: "{\"health\": \"false\"}", + expectErr: true, + }, + { + data: "invalid json", + expectErr: true, + }, + } + for _, test := range tests { + err := EtcdHealthCheck([]byte(test.data)) + if err != nil && !test.expectErr { + t.Errorf("unexpected error: %v", err) + } + if err == nil && test.expectErr { + t.Error("unexpected non-error") + } + } +} diff --git a/pkg/storage/etcd3/OWNERS b/pkg/storage/etcd3/OWNERS new file mode 100755 index 000000000..84c24e70c --- /dev/null +++ b/pkg/storage/etcd3/OWNERS @@ -0,0 +1,5 @@ +reviewers: +- wojtek-t +- timothysc +- madhusudancs +- hongchaodeng diff --git a/pkg/storage/etcd3/compact.go b/pkg/storage/etcd3/compact.go new file mode 100644 index 000000000..b9f626d3b --- /dev/null +++ b/pkg/storage/etcd3/compact.go @@ -0,0 +1,161 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package etcd3 + +import ( + "strconv" + "sync" + "time" + + "github.com/coreos/etcd/clientv3" + "github.com/golang/glog" + "golang.org/x/net/context" +) + +const ( + compactInterval = 5 * time.Minute + compactRevKey = "compact_rev_key" +) + +var ( + endpointsMapMu sync.Mutex + endpointsMap map[string]struct{} +) + +func init() { + endpointsMap = make(map[string]struct{}) +} + +// StartCompactor starts a compactor in the background to compact old version of keys that's not needed. +// By default, we save the most recent 10 minutes data and compact versions > 10minutes ago. +// It should be enough for slow watchers and to tolerate burst. +// TODO: We might keep a longer history (12h) in the future once storage API can take advantage of past version of keys. +func StartCompactor(ctx context.Context, client *clientv3.Client) { + endpointsMapMu.Lock() + defer endpointsMapMu.Unlock() + + // In one process, we can have only one compactor for one cluster. + // Currently we rely on endpoints to differentiate clusters. + for _, ep := range client.Endpoints() { + if _, ok := endpointsMap[ep]; ok { + glog.V(4).Infof("compactor already exists for endpoints %v", client.Endpoints()) + return + } + } + for _, ep := range client.Endpoints() { + endpointsMap[ep] = struct{}{} + } + + go compactor(ctx, client, compactInterval) +} + +// compactor periodically compacts historical versions of keys in etcd. +// It will compact keys with versions older than given interval. +// In other words, after compaction, it will only contain keys set during last interval. +// Any API call for the older versions of keys will return error. +// Interval is the time interval between each compaction. The first compaction happens after "interval". +func compactor(ctx context.Context, client *clientv3.Client, interval time.Duration) { + // Technical definitions: + // We have a special key in etcd defined as *compactRevKey*. + // compactRevKey's value will be set to the string of last compacted revision. + // compactRevKey's version will be used as logical time for comparison. THe version is referred as compact time. + // Initially, because the key doesn't exist, the compact time (version) is 0. + // + // Algorithm: + // - Compare to see if (local compact_time) = (remote compact_time). + // - If yes, increment both local and remote compact_time, and do a compaction. + // - If not, set local to remote compact_time. + // + // Technical details/insights: + // + // The protocol here is lease based. If one compactor CAS successfully, the others would know it when they fail in + // CAS later and would try again in 10 minutes. If an APIServer crashed, another one would "take over" the lease. + // + // For example, in the following diagram, we have a compactor C1 doing compaction in t1, t2. Another compactor C2 + // at t1' (t1 < t1' < t2) would CAS fail, set its known oldRev to rev at t1', and try again in t2' (t2' > t2). + // If C1 crashed and wouldn't compact at t2, C2 would CAS successfully at t2'. + // + // oldRev(t2) curRev(t2) + // + + // oldRev curRev | + // + + | + // | | | + // | | t1' | t2' + // +---v-------------v----^---------v------^----> + // t0 t1 t2 + // + // We have the guarantees: + // - in normal cases, the interval is 10 minutes. + // - in failover, the interval is >10m and <20m + // + // FAQ: + // - What if time is not accurate? We don't care as long as someone did the compaction. Atomicity is ensured using + // etcd API. + // - What happened under heavy load scenarios? Initially, each apiserver will do only one compaction + // every 10 minutes. This is very unlikely affecting or affected w.r.t. server load. + + var compactTime int64 + var rev int64 + var err error + for { + select { + case <-time.After(interval): + case <-ctx.Done(): + return + } + + compactTime, rev, err = compact(ctx, client, compactTime, rev) + if err != nil { + glog.Errorf("etcd: endpoint (%v) compact failed: %v", client.Endpoints(), err) + continue + } + } +} + +// compact compacts etcd store and returns current rev. +// It will return the current compact time and global revision if no error occurred. +// Note that CAS fail will not incur any error. +func compact(ctx context.Context, client *clientv3.Client, t, rev int64) (int64, int64, error) { + resp, err := client.KV.Txn(ctx).If( + clientv3.Compare(clientv3.Version(compactRevKey), "=", t), + ).Then( + clientv3.OpPut(compactRevKey, strconv.FormatInt(rev, 10)), // Expect side effect: increment Version + ).Else( + clientv3.OpGet(compactRevKey), + ).Commit() + if err != nil { + return t, rev, err + } + + curRev := resp.Header.Revision + + if !resp.Succeeded { + curTime := resp.Responses[0].GetResponseRange().Kvs[0].Version + return curTime, curRev, nil + } + curTime := t + 1 + + if rev == 0 { + // We don't compact on bootstrap. + return curTime, curRev, nil + } + if _, err = client.Compact(ctx, rev); err != nil { + return curTime, curRev, err + } + glog.Infof("etcd: compacted rev (%d), endpoints (%v)", rev, client.Endpoints()) + return curTime, curRev, nil +} diff --git a/pkg/storage/etcd3/compact_test.go b/pkg/storage/etcd3/compact_test.go new file mode 100644 index 000000000..021b8c451 --- /dev/null +++ b/pkg/storage/etcd3/compact_test.go @@ -0,0 +1,87 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package etcd3 + +import ( + "testing" + + "github.com/coreos/etcd/clientv3" + etcdrpc "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes" + "github.com/coreos/etcd/integration" + "golang.org/x/net/context" +) + +func TestCompact(t *testing.T) { + cluster := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) + defer cluster.Terminate(t) + client := cluster.RandClient() + ctx := context.Background() + + putResp, err := client.Put(ctx, "/somekey", "data") + if err != nil { + t.Fatalf("Put failed: %v", err) + } + + putResp1, err := client.Put(ctx, "/somekey", "data2") + if err != nil { + t.Fatalf("Put failed: %v", err) + } + + _, _, err = compact(ctx, client, 0, putResp1.Header.Revision) + if err != nil { + t.Fatalf("compact failed: %v", err) + } + + obj, err := client.Get(ctx, "/somekey", clientv3.WithRev(putResp.Header.Revision)) + if err != etcdrpc.ErrCompacted { + t.Errorf("Expecting ErrCompacted, but get=%v err=%v", obj, err) + } +} + +// TestCompactConflict tests that two compactors (Let's use C1, C2) are trying to compact etcd cluster with the same +// logical time. +// - C1 compacts first. It will succeed. +// - C2 compacts after. It will fail. But it will get latest logical time, which should be larger by one. +func TestCompactConflict(t *testing.T) { + cluster := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) + defer cluster.Terminate(t) + client := cluster.RandClient() + ctx := context.Background() + + putResp, err := client.Put(ctx, "/somekey", "data") + if err != nil { + t.Fatalf("Put failed: %v", err) + } + + // Compact first. It would do the compaction and return compact time which is incremented by 1. + curTime, _, err := compact(ctx, client, 0, putResp.Header.Revision) + if err != nil { + t.Fatalf("compact failed: %v", err) + } + if curTime != 1 { + t.Errorf("Expect current logical time = 1, get = %v", curTime) + } + + // Compact again with the same parameters. It won't do compaction but return the latest compact time. + curTime2, _, err := compact(ctx, client, 0, putResp.Header.Revision) + if err != nil { + t.Fatalf("compact failed: %v", err) + } + if curTime != curTime2 { + t.Errorf("Unexpected curTime (%v) != curTime2 (%v)", curTime, curTime2) + } +} diff --git a/pkg/storage/etcd3/event.go b/pkg/storage/etcd3/event.go new file mode 100644 index 000000000..7dc9175bc --- /dev/null +++ b/pkg/storage/etcd3/event.go @@ -0,0 +1,57 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package etcd3 + +import ( + "github.com/coreos/etcd/clientv3" + "github.com/coreos/etcd/mvcc/mvccpb" +) + +type event struct { + key string + value []byte + prevValue []byte + rev int64 + isDeleted bool + isCreated bool +} + +// parseKV converts a KeyValue retrieved from an initial sync() listing to a synthetic isCreated event. +func parseKV(kv *mvccpb.KeyValue) *event { + return &event{ + key: string(kv.Key), + value: kv.Value, + prevValue: nil, + rev: kv.ModRevision, + isDeleted: false, + isCreated: true, + } +} + +func parseEvent(e *clientv3.Event) *event { + ret := &event{ + key: string(e.Kv.Key), + value: e.Kv.Value, + rev: e.Kv.ModRevision, + isDeleted: e.Type == clientv3.EventTypeDelete, + isCreated: e.IsCreate(), + } + if e.PrevKv != nil { + ret.prevValue = e.PrevKv.Value + } + return ret +} diff --git a/pkg/storage/etcd3/store.go b/pkg/storage/etcd3/store.go new file mode 100644 index 000000000..be7a00d46 --- /dev/null +++ b/pkg/storage/etcd3/store.go @@ -0,0 +1,513 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package etcd3 + +import ( + "bytes" + "errors" + "fmt" + "path" + "reflect" + "strings" + "time" + + "github.com/coreos/etcd/clientv3" + "github.com/golang/glog" + "golang.org/x/net/context" + + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/conversion" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/watch" + "k8s.io/apiserver/pkg/storage" + utiltrace "k8s.io/apiserver/pkg/util/trace" + "k8s.io/kubernetes/pkg/storage/etcd" +) + +type store struct { + client *clientv3.Client + // getOpts contains additional options that should be passed + // to all Get() calls. + getOps []clientv3.OpOption + codec runtime.Codec + versioner storage.Versioner + pathPrefix string + watcher *watcher +} + +type elemForDecode struct { + data []byte + rev uint64 +} + +type objState struct { + obj runtime.Object + meta *storage.ResponseMeta + rev int64 + data []byte +} + +// New returns an etcd3 implementation of storage.Interface. +func New(c *clientv3.Client, codec runtime.Codec, prefix string) storage.Interface { + return newStore(c, true, codec, prefix) +} + +// NewWithNoQuorumRead returns etcd3 implementation of storage.Interface +// where Get operations don't require quorum read. +func NewWithNoQuorumRead(c *clientv3.Client, codec runtime.Codec, prefix string) storage.Interface { + return newStore(c, false, codec, prefix) +} + +func newStore(c *clientv3.Client, quorumRead bool, codec runtime.Codec, prefix string) *store { + versioner := etcd.APIObjectVersioner{} + result := &store{ + client: c, + versioner: versioner, + codec: codec, + pathPrefix: prefix, + watcher: newWatcher(c, codec, versioner), + } + if !quorumRead { + // In case of non-quorum reads, we can set WithSerializable() + // options for all Get operations. + result.getOps = append(result.getOps, clientv3.WithSerializable()) + } + return result +} + +// Versioner implements storage.Interface.Versioner. +func (s *store) Versioner() storage.Versioner { + return s.versioner +} + +// Get implements storage.Interface.Get. +func (s *store) Get(ctx context.Context, key string, resourceVersion string, out runtime.Object, ignoreNotFound bool) error { + key = path.Join(s.pathPrefix, key) + getResp, err := s.client.KV.Get(ctx, key, s.getOps...) + if err != nil { + return err + } + + if len(getResp.Kvs) == 0 { + if ignoreNotFound { + return runtime.SetZeroValue(out) + } + return storage.NewKeyNotFoundError(key, 0) + } + kv := getResp.Kvs[0] + return decode(s.codec, s.versioner, kv.Value, out, kv.ModRevision) +} + +// Create implements storage.Interface.Create. +func (s *store) Create(ctx context.Context, key string, obj, out runtime.Object, ttl uint64) error { + if version, err := s.versioner.ObjectResourceVersion(obj); err == nil && version != 0 { + return errors.New("resourceVersion should not be set on objects to be created") + } + data, err := runtime.Encode(s.codec, obj) + if err != nil { + return err + } + key = path.Join(s.pathPrefix, key) + + opts, err := s.ttlOpts(ctx, int64(ttl)) + if err != nil { + return err + } + + txnResp, err := s.client.KV.Txn(ctx).If( + notFound(key), + ).Then( + clientv3.OpPut(key, string(data), opts...), + ).Commit() + if err != nil { + return err + } + if !txnResp.Succeeded { + return storage.NewKeyExistsError(key, 0) + } + + if out != nil { + putResp := txnResp.Responses[0].GetResponsePut() + return decode(s.codec, s.versioner, data, out, putResp.Header.Revision) + } + return nil +} + +// Delete implements storage.Interface.Delete. +func (s *store) Delete(ctx context.Context, key string, out runtime.Object, precondtions *storage.Preconditions) error { + v, err := conversion.EnforcePtr(out) + if err != nil { + panic("unable to convert output object to pointer") + } + key = path.Join(s.pathPrefix, key) + if precondtions == nil { + return s.unconditionalDelete(ctx, key, out) + } + return s.conditionalDelete(ctx, key, out, v, precondtions) +} + +func (s *store) unconditionalDelete(ctx context.Context, key string, out runtime.Object) error { + // We need to do get and delete in single transaction in order to + // know the value and revision before deleting it. + txnResp, err := s.client.KV.Txn(ctx).If().Then( + clientv3.OpGet(key), + clientv3.OpDelete(key), + ).Commit() + if err != nil { + return err + } + getResp := txnResp.Responses[0].GetResponseRange() + if len(getResp.Kvs) == 0 { + return storage.NewKeyNotFoundError(key, 0) + } + + kv := getResp.Kvs[0] + return decode(s.codec, s.versioner, kv.Value, out, kv.ModRevision) +} + +func (s *store) conditionalDelete(ctx context.Context, key string, out runtime.Object, v reflect.Value, precondtions *storage.Preconditions) error { + getResp, err := s.client.KV.Get(ctx, key) + if err != nil { + return err + } + for { + origState, err := s.getState(getResp, key, v, false) + if err != nil { + return err + } + if err := checkPreconditions(key, precondtions, origState.obj); err != nil { + return err + } + txnResp, err := s.client.KV.Txn(ctx).If( + clientv3.Compare(clientv3.ModRevision(key), "=", origState.rev), + ).Then( + clientv3.OpDelete(key), + ).Else( + clientv3.OpGet(key), + ).Commit() + if err != nil { + return err + } + if !txnResp.Succeeded { + getResp = (*clientv3.GetResponse)(txnResp.Responses[0].GetResponseRange()) + glog.V(4).Infof("deletion of %s failed because of a conflict, going to retry", key) + continue + } + return decode(s.codec, s.versioner, origState.data, out, origState.rev) + } +} + +// GuaranteedUpdate implements storage.Interface.GuaranteedUpdate. +func (s *store) GuaranteedUpdate( + ctx context.Context, key string, out runtime.Object, ignoreNotFound bool, + precondtions *storage.Preconditions, tryUpdate storage.UpdateFunc, suggestion ...runtime.Object) error { + trace := utiltrace.New(fmt.Sprintf("GuaranteedUpdate etcd3: %s", reflect.TypeOf(out).String())) + defer trace.LogIfLong(500 * time.Millisecond) + + v, err := conversion.EnforcePtr(out) + if err != nil { + panic("unable to convert output object to pointer") + } + key = path.Join(s.pathPrefix, key) + + var origState *objState + if len(suggestion) == 1 && suggestion[0] != nil { + origState, err = s.getStateFromObject(suggestion[0]) + if err != nil { + return err + } + } else { + getResp, err := s.client.KV.Get(ctx, key, s.getOps...) + if err != nil { + return err + } + origState, err = s.getState(getResp, key, v, ignoreNotFound) + if err != nil { + return err + } + } + trace.Step("initial value restored") + + for { + if err := checkPreconditions(key, precondtions, origState.obj); err != nil { + return err + } + + ret, ttl, err := s.updateState(origState, tryUpdate) + if err != nil { + return err + } + + data, err := runtime.Encode(s.codec, ret) + if err != nil { + return err + } + if bytes.Equal(data, origState.data) { + return decode(s.codec, s.versioner, origState.data, out, origState.rev) + } + + opts, err := s.ttlOpts(ctx, int64(ttl)) + if err != nil { + return err + } + trace.Step("Transaction prepared") + + txnResp, err := s.client.KV.Txn(ctx).If( + clientv3.Compare(clientv3.ModRevision(key), "=", origState.rev), + ).Then( + clientv3.OpPut(key, string(data), opts...), + ).Else( + clientv3.OpGet(key), + ).Commit() + if err != nil { + return err + } + trace.Step("Transaction committed") + if !txnResp.Succeeded { + getResp := (*clientv3.GetResponse)(txnResp.Responses[0].GetResponseRange()) + glog.V(4).Infof("GuaranteedUpdate of %s failed because of a conflict, going to retry", key) + origState, err = s.getState(getResp, key, v, ignoreNotFound) + if err != nil { + return err + } + trace.Step("Retry value restored") + continue + } + putResp := txnResp.Responses[0].GetResponsePut() + return decode(s.codec, s.versioner, data, out, putResp.Header.Revision) + } +} + +// GetToList implements storage.Interface.GetToList. +func (s *store) GetToList(ctx context.Context, key string, resourceVersion string, pred storage.SelectionPredicate, listObj runtime.Object) error { + listPtr, err := meta.GetItemsPtr(listObj) + if err != nil { + return err + } + key = path.Join(s.pathPrefix, key) + + getResp, err := s.client.KV.Get(ctx, key, s.getOps...) + if err != nil { + return err + } + if len(getResp.Kvs) == 0 { + return nil + } + elems := []*elemForDecode{{ + data: getResp.Kvs[0].Value, + rev: uint64(getResp.Kvs[0].ModRevision), + }} + if err := decodeList(elems, storage.SimpleFilter(pred), listPtr, s.codec, s.versioner); err != nil { + return err + } + // update version with cluster level revision + return s.versioner.UpdateList(listObj, uint64(getResp.Header.Revision)) +} + +// List implements storage.Interface.List. +func (s *store) List(ctx context.Context, key, resourceVersion string, pred storage.SelectionPredicate, listObj runtime.Object) error { + listPtr, err := meta.GetItemsPtr(listObj) + if err != nil { + return err + } + key = path.Join(s.pathPrefix, key) + // We need to make sure the key ended with "/" so that we only get children "directories". + // e.g. if we have key "/a", "/a/b", "/ab", getting keys with prefix "/a" will return all three, + // while with prefix "/a/" will return only "/a/b" which is the correct answer. + if !strings.HasSuffix(key, "/") { + key += "/" + } + getResp, err := s.client.KV.Get(ctx, key, clientv3.WithPrefix()) + if err != nil { + return err + } + + elems := make([]*elemForDecode, len(getResp.Kvs)) + for i, kv := range getResp.Kvs { + elems[i] = &elemForDecode{ + data: kv.Value, + rev: uint64(kv.ModRevision), + } + } + if err := decodeList(elems, storage.SimpleFilter(pred), listPtr, s.codec, s.versioner); err != nil { + return err + } + // update version with cluster level revision + return s.versioner.UpdateList(listObj, uint64(getResp.Header.Revision)) +} + +// Watch implements storage.Interface.Watch. +func (s *store) Watch(ctx context.Context, key string, resourceVersion string, pred storage.SelectionPredicate) (watch.Interface, error) { + return s.watch(ctx, key, resourceVersion, pred, false) +} + +// WatchList implements storage.Interface.WatchList. +func (s *store) WatchList(ctx context.Context, key string, resourceVersion string, pred storage.SelectionPredicate) (watch.Interface, error) { + return s.watch(ctx, key, resourceVersion, pred, true) +} + +func (s *store) watch(ctx context.Context, key string, rv string, pred storage.SelectionPredicate, recursive bool) (watch.Interface, error) { + rev, err := storage.ParseWatchResourceVersion(rv) + if err != nil { + return nil, err + } + key = path.Join(s.pathPrefix, key) + return s.watcher.Watch(ctx, key, int64(rev), recursive, pred) +} + +func (s *store) getState(getResp *clientv3.GetResponse, key string, v reflect.Value, ignoreNotFound bool) (*objState, error) { + state := &objState{ + obj: reflect.New(v.Type()).Interface().(runtime.Object), + meta: &storage.ResponseMeta{}, + } + if len(getResp.Kvs) == 0 { + if !ignoreNotFound { + return nil, storage.NewKeyNotFoundError(key, 0) + } + if err := runtime.SetZeroValue(state.obj); err != nil { + return nil, err + } + } else { + state.rev = getResp.Kvs[0].ModRevision + state.meta.ResourceVersion = uint64(state.rev) + state.data = getResp.Kvs[0].Value + if err := decode(s.codec, s.versioner, state.data, state.obj, state.rev); err != nil { + return nil, err + } + } + return state, nil +} + +func (s *store) getStateFromObject(obj runtime.Object) (*objState, error) { + state := &objState{ + obj: obj, + meta: &storage.ResponseMeta{}, + } + + rv, err := s.versioner.ObjectResourceVersion(obj) + if err != nil { + return nil, fmt.Errorf("couldn't get resource version: %v", err) + } + state.rev = int64(rv) + state.meta.ResourceVersion = uint64(state.rev) + + // Compute the serialized form - for that we need to temporarily clean + // its resource version field (those are not stored in etcd). + if err := s.versioner.UpdateObject(obj, 0); err != nil { + return nil, errors.New("resourceVersion cannot be set on objects store in etcd") + } + state.data, err = runtime.Encode(s.codec, obj) + if err != nil { + return nil, err + } + s.versioner.UpdateObject(state.obj, uint64(rv)) + return state, nil +} + +func (s *store) updateState(st *objState, userUpdate storage.UpdateFunc) (runtime.Object, uint64, error) { + ret, ttlPtr, err := userUpdate(st.obj, *st.meta) + if err != nil { + return nil, 0, err + } + + version, err := s.versioner.ObjectResourceVersion(ret) + if err != nil { + return nil, 0, err + } + if version != 0 { + // We cannot store object with resourceVersion in etcd. We need to reset it. + if err := s.versioner.UpdateObject(ret, 0); err != nil { + return nil, 0, fmt.Errorf("UpdateObject failed: %v", err) + } + } + var ttl uint64 + if ttlPtr != nil { + ttl = *ttlPtr + } + return ret, ttl, nil +} + +// ttlOpts returns client options based on given ttl. +// ttl: if ttl is non-zero, it will attach the key to a lease with ttl of roughly the same length +func (s *store) ttlOpts(ctx context.Context, ttl int64) ([]clientv3.OpOption, error) { + if ttl == 0 { + return nil, nil + } + // TODO: one lease per ttl key is expensive. Based on current use case, we can have a long window to + // put keys within into same lease. We shall benchmark this and optimize the performance. + lcr, err := s.client.Lease.Grant(ctx, ttl) + if err != nil { + return nil, err + } + return []clientv3.OpOption{clientv3.WithLease(clientv3.LeaseID(lcr.ID))}, nil +} + +// decode decodes value of bytes into object. It will also set the object resource version to rev. +// On success, objPtr would be set to the object. +func decode(codec runtime.Codec, versioner storage.Versioner, value []byte, objPtr runtime.Object, rev int64) error { + if _, err := conversion.EnforcePtr(objPtr); err != nil { + panic("unable to convert output object to pointer") + } + _, _, err := codec.Decode(value, nil, objPtr) + if err != nil { + return err + } + // being unable to set the version does not prevent the object from being extracted + versioner.UpdateObject(objPtr, uint64(rev)) + return nil +} + +// decodeList decodes a list of values into a list of objects, with resource version set to corresponding rev. +// On success, ListPtr would be set to the list of objects. +func decodeList(elems []*elemForDecode, filter storage.FilterFunc, ListPtr interface{}, codec runtime.Codec, versioner storage.Versioner) error { + v, err := conversion.EnforcePtr(ListPtr) + if err != nil || v.Kind() != reflect.Slice { + panic("need ptr to slice") + } + for _, elem := range elems { + obj, _, err := codec.Decode(elem.data, nil, reflect.New(v.Type().Elem()).Interface().(runtime.Object)) + if err != nil { + return err + } + // being unable to set the version does not prevent the object from being extracted + versioner.UpdateObject(obj, elem.rev) + if filter(obj) { + v.Set(reflect.Append(v, reflect.ValueOf(obj).Elem())) + } + } + return nil +} + +func checkPreconditions(key string, preconditions *storage.Preconditions, out runtime.Object) error { + if preconditions == nil { + return nil + } + objMeta, err := metav1.ObjectMetaFor(out) + if err != nil { + return storage.NewInternalErrorf("can't enforce preconditions %v on un-introspectable object %v, got error: %v", *preconditions, out, err) + } + if preconditions.UID != nil && *preconditions.UID != objMeta.UID { + errMsg := fmt.Sprintf("Precondition failed: UID in precondition: %v, UID in object meta: %v", *preconditions.UID, objMeta.UID) + return storage.NewInvalidObjError(key, errMsg) + } + return nil +} + +func notFound(key string) clientv3.Cmp { + return clientv3.Compare(clientv3.ModRevision(key), "=", 0) +} diff --git a/pkg/storage/etcd3/store_test.go b/pkg/storage/etcd3/store_test.go new file mode 100644 index 000000000..ce379a338 --- /dev/null +++ b/pkg/storage/etcd3/store_test.go @@ -0,0 +1,571 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package etcd3 + +import ( + "fmt" + "reflect" + "sync" + "testing" + + apitesting "k8s.io/apimachinery/pkg/api/testing" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/serializer" + "k8s.io/apimachinery/pkg/watch" + "k8s.io/apiserver/pkg/apis/example" + examplev1 "k8s.io/apiserver/pkg/apis/example/v1" + "k8s.io/apiserver/pkg/storage" + + "github.com/coreos/etcd/integration" + "golang.org/x/net/context" +) + +var scheme = runtime.NewScheme() +var codecs = serializer.NewCodecFactory(scheme) + +func init() { + metav1.AddToGroupVersion(scheme, metav1.SchemeGroupVersion) + example.AddToScheme(scheme) + examplev1.AddToScheme(scheme) +} + +func TestCreate(t *testing.T) { + ctx, store, cluster := testSetup(t) + defer cluster.Terminate(t) + etcdClient := cluster.RandClient() + + key := "/testkey" + out := &example.Pod{} + obj := &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo"}} + + // verify that kv pair is empty before set + getResp, err := etcdClient.KV.Get(ctx, key) + if err != nil { + t.Fatalf("etcdClient.KV.Get failed: %v", err) + } + if len(getResp.Kvs) != 0 { + t.Fatalf("expecting empty result on key: %s", key) + } + + err = store.Create(ctx, key, obj, out, 0) + if err != nil { + t.Fatalf("Set failed: %v", err) + } + // basic tests of the output + if obj.ObjectMeta.Name != out.ObjectMeta.Name { + t.Errorf("pod name want=%s, get=%s", obj.ObjectMeta.Name, out.ObjectMeta.Name) + } + if out.ResourceVersion == "" { + t.Errorf("output should have non-empty resource version") + } + + // verify that kv pair is not empty after set + getResp, err = etcdClient.KV.Get(ctx, key) + if err != nil { + t.Fatalf("etcdClient.KV.Get failed: %v", err) + } + if len(getResp.Kvs) == 0 { + t.Fatalf("expecting non empty result on key: %s", key) + } +} + +func TestCreateWithTTL(t *testing.T) { + ctx, store, cluster := testSetup(t) + defer cluster.Terminate(t) + + input := &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo"}} + key := "/somekey" + + out := &example.Pod{} + if err := store.Create(ctx, key, input, out, 1); err != nil { + t.Fatalf("Create failed: %v", err) + } + + w, err := store.Watch(ctx, key, out.ResourceVersion, storage.Everything) + if err != nil { + t.Fatalf("Watch failed: %v", err) + } + testCheckEventType(t, watch.Deleted, w) +} + +func TestCreateWithKeyExist(t *testing.T) { + ctx, store, cluster := testSetup(t) + defer cluster.Terminate(t) + obj := &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo"}} + key, _ := testPropogateStore(ctx, t, store, obj) + out := &example.Pod{} + err := store.Create(ctx, key, obj, out, 0) + if err == nil || !storage.IsNodeExist(err) { + t.Errorf("expecting key exists error, but get: %s", err) + } +} + +func TestGet(t *testing.T) { + ctx, store, cluster := testSetup(t) + defer cluster.Terminate(t) + key, storedObj := testPropogateStore(ctx, t, store, &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo"}}) + + tests := []struct { + key string + ignoreNotFound bool + expectNotFoundErr bool + expectedOut *example.Pod + }{{ // test get on existing item + key: key, + ignoreNotFound: false, + expectNotFoundErr: false, + expectedOut: storedObj, + }, { // test get on non-existing item with ignoreNotFound=false + key: "/non-existing", + ignoreNotFound: false, + expectNotFoundErr: true, + }, { // test get on non-existing item with ignoreNotFound=true + key: "/non-existing", + ignoreNotFound: true, + expectNotFoundErr: false, + expectedOut: &example.Pod{}, + }} + + for i, tt := range tests { + out := &example.Pod{} + err := store.Get(ctx, tt.key, "", out, tt.ignoreNotFound) + if tt.expectNotFoundErr { + if err == nil || !storage.IsNotFound(err) { + t.Errorf("#%d: expecting not found error, but get: %s", i, err) + } + continue + } + if err != nil { + t.Fatalf("Get failed: %v", err) + } + if !reflect.DeepEqual(tt.expectedOut, out) { + t.Errorf("#%d: pod want=%#v, get=%#v", i, tt.expectedOut, out) + } + } +} + +func TestUnconditionalDelete(t *testing.T) { + ctx, store, cluster := testSetup(t) + defer cluster.Terminate(t) + key, storedObj := testPropogateStore(ctx, t, store, &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo"}}) + + tests := []struct { + key string + expectedObj *example.Pod + expectNotFoundErr bool + }{{ // test unconditional delete on existing key + key: key, + expectedObj: storedObj, + expectNotFoundErr: false, + }, { // test unconditional delete on non-existing key + key: "/non-existing", + expectedObj: nil, + expectNotFoundErr: true, + }} + + for i, tt := range tests { + out := &example.Pod{} // reset + err := store.Delete(ctx, tt.key, out, nil) + if tt.expectNotFoundErr { + if err == nil || !storage.IsNotFound(err) { + t.Errorf("#%d: expecting not found error, but get: %s", i, err) + } + continue + } + if err != nil { + t.Fatalf("Delete failed: %v", err) + } + if !reflect.DeepEqual(tt.expectedObj, out) { + t.Errorf("#%d: pod want=%#v, get=%#v", i, tt.expectedObj, out) + } + } +} + +func TestConditionalDelete(t *testing.T) { + ctx, store, cluster := testSetup(t) + defer cluster.Terminate(t) + key, storedObj := testPropogateStore(ctx, t, store, &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo", UID: "A"}}) + + tests := []struct { + precondition *storage.Preconditions + expectInvalidObjErr bool + }{{ // test conditional delete with UID match + precondition: storage.NewUIDPreconditions("A"), + expectInvalidObjErr: false, + }, { // test conditional delete with UID mismatch + precondition: storage.NewUIDPreconditions("B"), + expectInvalidObjErr: true, + }} + + for i, tt := range tests { + out := &example.Pod{} + err := store.Delete(ctx, key, out, tt.precondition) + if tt.expectInvalidObjErr { + if err == nil || !storage.IsInvalidObj(err) { + t.Errorf("#%d: expecting invalid UID error, but get: %s", i, err) + } + continue + } + if err != nil { + t.Fatalf("Delete failed: %v", err) + } + if !reflect.DeepEqual(storedObj, out) { + t.Errorf("#%d: pod want=%#v, get=%#v", i, storedObj, out) + } + key, storedObj = testPropogateStore(ctx, t, store, &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo", UID: "A"}}) + } +} + +func TestGetToList(t *testing.T) { + ctx, store, cluster := testSetup(t) + defer cluster.Terminate(t) + key, storedObj := testPropogateStore(ctx, t, store, &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo"}}) + + tests := []struct { + key string + pred storage.SelectionPredicate + expectedOut []*example.Pod + }{{ // test GetToList on existing key + key: key, + pred: storage.Everything, + expectedOut: []*example.Pod{storedObj}, + }, { // test GetToList on non-existing key + key: "/non-existing", + pred: storage.Everything, + expectedOut: nil, + }, { // test GetToList with matching pod name + key: "/non-existing", + pred: storage.SelectionPredicate{ + Label: labels.Everything(), + Field: fields.ParseSelectorOrDie("metadata.name!=" + storedObj.Name), + GetAttrs: func(obj runtime.Object) (labels.Set, fields.Set, error) { + pod := obj.(*example.Pod) + return nil, fields.Set{"metadata.name": pod.Name}, nil + }, + }, + expectedOut: nil, + }} + + for i, tt := range tests { + out := &example.PodList{} + err := store.GetToList(ctx, tt.key, "", tt.pred, out) + if err != nil { + t.Fatalf("GetToList failed: %v", err) + } + if len(out.Items) != len(tt.expectedOut) { + t.Errorf("#%d: length of list want=%d, get=%d", i, len(tt.expectedOut), len(out.Items)) + continue + } + for j, wantPod := range tt.expectedOut { + getPod := &out.Items[j] + if !reflect.DeepEqual(wantPod, getPod) { + t.Errorf("#%d: pod want=%#v, get=%#v", i, wantPod, getPod) + } + } + } +} + +func TestGuaranteedUpdate(t *testing.T) { + ctx, store, cluster := testSetup(t) + defer cluster.Terminate(t) + key, storeObj := testPropogateStore(ctx, t, store, &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo", UID: "A"}}) + + tests := []struct { + key string + ignoreNotFound bool + precondition *storage.Preconditions + expectNotFoundErr bool + expectInvalidObjErr bool + expectNoUpdate bool + }{{ // GuaranteedUpdate on non-existing key with ignoreNotFound=false + key: "/non-existing", + ignoreNotFound: false, + precondition: nil, + expectNotFoundErr: true, + expectInvalidObjErr: false, + expectNoUpdate: false, + }, { // GuaranteedUpdate on non-existing key with ignoreNotFound=true + key: "/non-existing", + ignoreNotFound: true, + precondition: nil, + expectNotFoundErr: false, + expectInvalidObjErr: false, + expectNoUpdate: false, + }, { // GuaranteedUpdate on existing key + key: key, + ignoreNotFound: false, + precondition: nil, + expectNotFoundErr: false, + expectInvalidObjErr: false, + expectNoUpdate: false, + }, { // GuaranteedUpdate with same data + key: key, + ignoreNotFound: false, + precondition: nil, + expectNotFoundErr: false, + expectInvalidObjErr: false, + expectNoUpdate: true, + }, { // GuaranteedUpdate with UID match + key: key, + ignoreNotFound: false, + precondition: storage.NewUIDPreconditions("A"), + expectNotFoundErr: false, + expectInvalidObjErr: false, + expectNoUpdate: true, + }, { // GuaranteedUpdate with UID mismatch + key: key, + ignoreNotFound: false, + precondition: storage.NewUIDPreconditions("B"), + expectNotFoundErr: false, + expectInvalidObjErr: true, + expectNoUpdate: true, + }} + + for i, tt := range tests { + out := &example.Pod{} + name := fmt.Sprintf("foo-%d", i) + if tt.expectNoUpdate { + name = storeObj.Name + } + version := storeObj.ResourceVersion + err := store.GuaranteedUpdate(ctx, tt.key, out, tt.ignoreNotFound, tt.precondition, + storage.SimpleUpdate(func(obj runtime.Object) (runtime.Object, error) { + if tt.expectNotFoundErr && tt.ignoreNotFound { + if pod := obj.(*example.Pod); pod.Name != "" { + t.Errorf("#%d: expecting zero value, but get=%#v", i, pod) + } + } + pod := *storeObj + pod.Name = name + return &pod, nil + })) + + if tt.expectNotFoundErr { + if err == nil || !storage.IsNotFound(err) { + t.Errorf("#%d: expecting not found error, but get: %v", i, err) + } + continue + } + if tt.expectInvalidObjErr { + if err == nil || !storage.IsInvalidObj(err) { + t.Errorf("#%d: expecting invalid UID error, but get: %s", i, err) + } + continue + } + if err != nil { + t.Fatalf("GuaranteedUpdate failed: %v", err) + } + if out.ObjectMeta.Name != name { + t.Errorf("#%d: pod name want=%s, get=%s", i, name, out.ObjectMeta.Name) + } + switch tt.expectNoUpdate { + case true: + if version != out.ResourceVersion { + t.Errorf("#%d: expect no version change, before=%s, after=%s", i, version, out.ResourceVersion) + } + case false: + if version == out.ResourceVersion { + t.Errorf("#%d: expect version change, but get the same version=%s", i, version) + } + } + storeObj = out + } +} + +func TestGuaranteedUpdateWithTTL(t *testing.T) { + ctx, store, cluster := testSetup(t) + defer cluster.Terminate(t) + + input := &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo"}} + key := "/somekey" + + out := &example.Pod{} + err := store.GuaranteedUpdate(ctx, key, out, true, nil, + func(_ runtime.Object, _ storage.ResponseMeta) (runtime.Object, *uint64, error) { + ttl := uint64(1) + return input, &ttl, nil + }) + if err != nil { + t.Fatalf("Create failed: %v", err) + } + + w, err := store.Watch(ctx, key, out.ResourceVersion, storage.Everything) + if err != nil { + t.Fatalf("Watch failed: %v", err) + } + testCheckEventType(t, watch.Deleted, w) +} + +func TestGuaranteedUpdateWithConflict(t *testing.T) { + ctx, store, cluster := testSetup(t) + defer cluster.Terminate(t) + key, _ := testPropogateStore(ctx, t, store, &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo"}}) + + errChan := make(chan error, 1) + var firstToFinish sync.WaitGroup + var secondToEnter sync.WaitGroup + firstToFinish.Add(1) + secondToEnter.Add(1) + + go func() { + err := store.GuaranteedUpdate(ctx, key, &example.Pod{}, false, nil, + storage.SimpleUpdate(func(obj runtime.Object) (runtime.Object, error) { + pod := obj.(*example.Pod) + pod.Name = "foo-1" + secondToEnter.Wait() + return pod, nil + })) + firstToFinish.Done() + errChan <- err + }() + + updateCount := 0 + err := store.GuaranteedUpdate(ctx, key, &example.Pod{}, false, nil, + storage.SimpleUpdate(func(obj runtime.Object) (runtime.Object, error) { + if updateCount == 0 { + secondToEnter.Done() + firstToFinish.Wait() + } + updateCount++ + pod := obj.(*example.Pod) + pod.Name = "foo-2" + return pod, nil + })) + if err != nil { + t.Fatalf("Second GuaranteedUpdate error %#v", err) + } + if err := <-errChan; err != nil { + t.Fatalf("First GuaranteedUpdate error %#v", err) + } + + if updateCount != 2 { + t.Errorf("Should have conflict and called update func twice") + } +} + +func TestList(t *testing.T) { + codec := apitesting.TestCodec(codecs, examplev1.SchemeGroupVersion) + cluster := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) + defer cluster.Terminate(t) + store := newStore(cluster.RandClient(), false, codec, "") + ctx := context.Background() + + // Setup storage with the following structure: + // / + // - one-level/ + // | - test + // | + // - two-level/ + // - 1/ + // | - test + // | + // - 2/ + // - test + preset := []struct { + key string + obj *example.Pod + storedObj *example.Pod + }{{ + key: "/one-level/test", + obj: &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo"}}, + }, { + key: "/two-level/1/test", + obj: &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo"}}, + }, { + key: "/two-level/2/test", + obj: &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "bar"}}, + }} + + for i, ps := range preset { + preset[i].storedObj = &example.Pod{} + err := store.Create(ctx, ps.key, ps.obj, preset[i].storedObj, 0) + if err != nil { + t.Fatalf("Set failed: %v", err) + } + } + + tests := []struct { + prefix string + pred storage.SelectionPredicate + expectedOut []*example.Pod + }{{ // test List on existing key + prefix: "/one-level/", + pred: storage.Everything, + expectedOut: []*example.Pod{preset[0].storedObj}, + }, { // test List on non-existing key + prefix: "/non-existing/", + pred: storage.Everything, + expectedOut: nil, + }, { // test List with pod name matching + prefix: "/one-level/", + pred: storage.SelectionPredicate{ + Label: labels.Everything(), + Field: fields.ParseSelectorOrDie("metadata.name!=" + preset[0].storedObj.Name), + GetAttrs: func(obj runtime.Object) (labels.Set, fields.Set, error) { + pod := obj.(*example.Pod) + return nil, fields.Set{"metadata.name": pod.Name}, nil + }, + }, + expectedOut: nil, + }, { // test List with multiple levels of directories and expect flattened result + prefix: "/two-level/", + pred: storage.Everything, + expectedOut: []*example.Pod{preset[1].storedObj, preset[2].storedObj}, + }} + + for i, tt := range tests { + out := &example.PodList{} + err := store.List(ctx, tt.prefix, "0", tt.pred, out) + if err != nil { + t.Fatalf("List failed: %v", err) + } + if len(tt.expectedOut) != len(out.Items) { + t.Errorf("#%d: length of list want=%d, get=%d", i, len(tt.expectedOut), len(out.Items)) + continue + } + for j, wantPod := range tt.expectedOut { + getPod := &out.Items[j] + if !reflect.DeepEqual(wantPod, getPod) { + t.Errorf("#%d: pod want=%#v, get=%#v", i, wantPod, getPod) + } + } + } +} + +func testSetup(t *testing.T) (context.Context, *store, *integration.ClusterV3) { + codec := apitesting.TestCodec(codecs, examplev1.SchemeGroupVersion) + cluster := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) + store := newStore(cluster.RandClient(), false, codec, "") + ctx := context.Background() + return ctx, store, cluster +} + +// testPropogateStore helps propogates store with objects, automates key generation, and returns +// keys and stored objects. +func testPropogateStore(ctx context.Context, t *testing.T, store *store, obj *example.Pod) (string, *example.Pod) { + // Setup store with a key and grab the output for returning. + key := "/testkey" + setOutput := &example.Pod{} + err := store.Create(ctx, key, obj, setOutput, 0) + if err != nil { + t.Fatalf("Set failed: %v", err) + } + return key, setOutput +} diff --git a/pkg/storage/etcd3/watcher.go b/pkg/storage/etcd3/watcher.go new file mode 100644 index 000000000..e75d4b981 --- /dev/null +++ b/pkg/storage/etcd3/watcher.go @@ -0,0 +1,375 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package etcd3 + +import ( + "fmt" + "net/http" + "strings" + "sync" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/watch" + "k8s.io/apiserver/pkg/storage" + + "github.com/coreos/etcd/clientv3" + etcdrpc "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes" + "github.com/golang/glog" + "golang.org/x/net/context" +) + +const ( + // We have set a buffer in order to reduce times of context switches. + incomingBufSize = 100 + outgoingBufSize = 100 +) + +type watcher struct { + client *clientv3.Client + codec runtime.Codec + versioner storage.Versioner +} + +// watchChan implements watch.Interface. +type watchChan struct { + watcher *watcher + key string + initialRev int64 + recursive bool + internalFilter storage.FilterFunc + ctx context.Context + cancel context.CancelFunc + incomingEventChan chan *event + resultChan chan watch.Event + errChan chan error +} + +func newWatcher(client *clientv3.Client, codec runtime.Codec, versioner storage.Versioner) *watcher { + return &watcher{ + client: client, + codec: codec, + versioner: versioner, + } +} + +// Watch watches on a key and returns a watch.Interface that transfers relevant notifications. +// If rev is zero, it will return the existing object(s) and then start watching from +// the maximum revision+1 from returned objects. +// If rev is non-zero, it will watch events happened after given revision. +// If recursive is false, it watches on given key. +// If recursive is true, it watches any children and directories under the key, excluding the root key itself. +// pred must be non-nil. Only if pred matches the change, it will be returned. +func (w *watcher) Watch(ctx context.Context, key string, rev int64, recursive bool, pred storage.SelectionPredicate) (watch.Interface, error) { + if recursive && !strings.HasSuffix(key, "/") { + key += "/" + } + wc := w.createWatchChan(ctx, key, rev, recursive, pred) + go wc.run() + return wc, nil +} + +func (w *watcher) createWatchChan(ctx context.Context, key string, rev int64, recursive bool, pred storage.SelectionPredicate) *watchChan { + wc := &watchChan{ + watcher: w, + key: key, + initialRev: rev, + recursive: recursive, + internalFilter: storage.SimpleFilter(pred), + incomingEventChan: make(chan *event, incomingBufSize), + resultChan: make(chan watch.Event, outgoingBufSize), + errChan: make(chan error, 1), + } + if pred.Label.Empty() && pred.Field.Empty() { + // The filter doesn't filter out any object. + wc.internalFilter = nil + } + wc.ctx, wc.cancel = context.WithCancel(ctx) + return wc +} + +func (wc *watchChan) run() { + watchClosedCh := make(chan struct{}) + go wc.startWatching(watchClosedCh) + + var resultChanWG sync.WaitGroup + resultChanWG.Add(1) + go wc.processEvent(&resultChanWG) + + select { + case err := <-wc.errChan: + if err == context.Canceled { + break + } + errResult := parseError(err) + if errResult != nil { + // error result is guaranteed to be received by user before closing ResultChan. + select { + case wc.resultChan <- *errResult: + case <-wc.ctx.Done(): // user has given up all results + } + } + case <-watchClosedCh: + case <-wc.ctx.Done(): // user cancel + } + + // We use wc.ctx to reap all goroutines. Under whatever condition, we should stop them all. + // It's fine to double cancel. + wc.cancel() + + // we need to wait until resultChan wouldn't be used anymore + resultChanWG.Wait() + close(wc.resultChan) +} + +func (wc *watchChan) Stop() { + wc.cancel() +} + +func (wc *watchChan) ResultChan() <-chan watch.Event { + return wc.resultChan +} + +// sync tries to retrieve existing data and send them to process. +// The revision to watch will be set to the revision in response. +// All events sent will have isCreated=true +func (wc *watchChan) sync() error { + opts := []clientv3.OpOption{} + if wc.recursive { + opts = append(opts, clientv3.WithPrefix()) + } + getResp, err := wc.watcher.client.Get(wc.ctx, wc.key, opts...) + if err != nil { + return err + } + wc.initialRev = getResp.Header.Revision + for _, kv := range getResp.Kvs { + wc.sendEvent(parseKV(kv)) + } + return nil +} + +// startWatching does: +// - get current objects if initialRev=0; set initialRev to current rev +// - watch on given key and send events to process. +func (wc *watchChan) startWatching(watchClosedCh chan struct{}) { + if wc.initialRev == 0 { + if err := wc.sync(); err != nil { + glog.Errorf("failed to sync with latest state: %v", err) + wc.sendError(err) + return + } + } + opts := []clientv3.OpOption{clientv3.WithRev(wc.initialRev + 1), clientv3.WithPrevKV()} + if wc.recursive { + opts = append(opts, clientv3.WithPrefix()) + } + wch := wc.watcher.client.Watch(wc.ctx, wc.key, opts...) + for wres := range wch { + if wres.Err() != nil { + err := wres.Err() + // If there is an error on server (e.g. compaction), the channel will return it before closed. + glog.Errorf("watch chan error: %v", err) + wc.sendError(err) + return + } + for _, e := range wres.Events { + wc.sendEvent(parseEvent(e)) + } + } + // When we come to this point, it's only possible that client side ends the watch. + // e.g. cancel the context, close the client. + // If this watch chan is broken and context isn't cancelled, other goroutines will still hang. + // We should notify the main thread that this goroutine has exited. + close(watchClosedCh) +} + +// processEvent processes events from etcd watcher and sends results to resultChan. +func (wc *watchChan) processEvent(wg *sync.WaitGroup) { + defer wg.Done() + + for { + select { + case e := <-wc.incomingEventChan: + res := wc.transform(e) + if res == nil { + continue + } + if len(wc.resultChan) == outgoingBufSize { + glog.Warningf("Fast watcher, slow processing. Number of buffered events: %d."+ + "Probably caused by slow dispatching events to watchers", outgoingBufSize) + } + // If user couldn't receive results fast enough, we also block incoming events from watcher. + // Because storing events in local will cause more memory usage. + // The worst case would be closing the fast watcher. + select { + case wc.resultChan <- *res: + case <-wc.ctx.Done(): + return + } + case <-wc.ctx.Done(): + return + } + } +} + +func (wc *watchChan) filter(obj runtime.Object) bool { + if wc.internalFilter == nil { + return true + } + return wc.internalFilter(obj) +} + +func (wc *watchChan) acceptAll() bool { + return wc.internalFilter == nil +} + +// transform transforms an event into a result for user if not filtered. +func (wc *watchChan) transform(e *event) (res *watch.Event) { + curObj, oldObj, err := wc.prepareObjs(e) + if err != nil { + glog.Errorf("failed to prepare current and previous objects: %v", err) + wc.sendError(err) + return nil + } + + switch { + case e.isDeleted: + if !wc.filter(oldObj) { + return nil + } + res = &watch.Event{ + Type: watch.Deleted, + Object: oldObj, + } + case e.isCreated: + if !wc.filter(curObj) { + return nil + } + res = &watch.Event{ + Type: watch.Added, + Object: curObj, + } + default: + if wc.acceptAll() { + res = &watch.Event{ + Type: watch.Modified, + Object: curObj, + } + return res + } + curObjPasses := wc.filter(curObj) + oldObjPasses := wc.filter(oldObj) + switch { + case curObjPasses && oldObjPasses: + res = &watch.Event{ + Type: watch.Modified, + Object: curObj, + } + case curObjPasses && !oldObjPasses: + res = &watch.Event{ + Type: watch.Added, + Object: curObj, + } + case !curObjPasses && oldObjPasses: + res = &watch.Event{ + Type: watch.Deleted, + Object: oldObj, + } + } + } + return res +} + +func parseError(err error) *watch.Event { + var status *metav1.Status + switch { + case err == etcdrpc.ErrCompacted: + status = &metav1.Status{ + Status: metav1.StatusFailure, + Message: err.Error(), + Code: http.StatusGone, + Reason: metav1.StatusReasonExpired, + } + default: + status = &metav1.Status{ + Status: metav1.StatusFailure, + Message: err.Error(), + Code: http.StatusInternalServerError, + Reason: metav1.StatusReasonInternalError, + } + } + + return &watch.Event{ + Type: watch.Error, + Object: status, + } +} + +func (wc *watchChan) sendError(err error) { + select { + case wc.errChan <- err: + case <-wc.ctx.Done(): + } +} + +func (wc *watchChan) sendEvent(e *event) { + if len(wc.incomingEventChan) == incomingBufSize { + glog.Warningf("Fast watcher, slow processing. Number of buffered events: %d."+ + "Probably caused by slow decoding, user not receiving fast, or other processing logic", + incomingBufSize) + } + select { + case wc.incomingEventChan <- e: + case <-wc.ctx.Done(): + } +} + +func (wc *watchChan) prepareObjs(e *event) (curObj runtime.Object, oldObj runtime.Object, err error) { + if !e.isDeleted { + curObj, err = decodeObj(wc.watcher.codec, wc.watcher.versioner, e.value, e.rev) + if err != nil { + return nil, nil, err + } + } + // We need to decode prevValue, only if this is deletion event or + // the underlying filter doesn't accept all objects (otherwise we + // know that the filter for previous object will return true and + // we need the object only to compute whether it was filtered out + // before). + if len(e.prevValue) > 0 && (e.isDeleted || !wc.acceptAll()) { + // Note that this sends the *old* object with the etcd revision for the time at + // which it gets deleted. + oldObj, err = decodeObj(wc.watcher.codec, wc.watcher.versioner, e.prevValue, e.rev) + if err != nil { + return nil, nil, err + } + } + return curObj, oldObj, nil +} + +func decodeObj(codec runtime.Codec, versioner storage.Versioner, data []byte, rev int64) (runtime.Object, error) { + obj, err := runtime.Decode(codec, []byte(data)) + if err != nil { + return nil, err + } + // ensure resource version is set on the object we load from etcd + if err := versioner.UpdateObject(obj, uint64(rev)); err != nil { + return nil, fmt.Errorf("failure to version api object (%d) %#v: %v", rev, obj, err) + } + return obj, nil +} diff --git a/pkg/storage/etcd3/watcher_test.go b/pkg/storage/etcd3/watcher_test.go new file mode 100644 index 000000000..753c546c3 --- /dev/null +++ b/pkg/storage/etcd3/watcher_test.go @@ -0,0 +1,375 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package etcd3 + +import ( + "errors" + "fmt" + "reflect" + "strconv" + "sync" + "testing" + "time" + + "github.com/coreos/etcd/clientv3" + "github.com/coreos/etcd/integration" + "golang.org/x/net/context" + + apitesting "k8s.io/apimachinery/pkg/api/testing" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/apimachinery/pkg/watch" + "k8s.io/apiserver/pkg/apis/example" + examplev1 "k8s.io/apiserver/pkg/apis/example/v1" + "k8s.io/apiserver/pkg/storage" +) + +func TestWatch(t *testing.T) { + testWatch(t, false) +} + +func TestWatchList(t *testing.T) { + testWatch(t, true) +} + +// It tests that +// - first occurrence of objects should notify Add event +// - update should trigger Modified event +// - update that gets filtered should trigger Deleted event +func testWatch(t *testing.T, recursive bool) { + ctx, store, cluster := testSetup(t) + defer cluster.Terminate(t) + podFoo := &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo"}} + podBar := &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "bar"}} + + tests := []struct { + key string + pred storage.SelectionPredicate + watchTests []*testWatchStruct + }{{ // create a key + key: "/somekey-1", + watchTests: []*testWatchStruct{{podFoo, true, watch.Added}}, + pred: storage.Everything, + }, { // create a key but obj gets filtered. Then update it with unfiltered obj + key: "/somekey-3", + watchTests: []*testWatchStruct{{podFoo, false, ""}, {podBar, true, watch.Added}}, + pred: storage.SelectionPredicate{ + Label: labels.Everything(), + Field: fields.ParseSelectorOrDie("metadata.name=bar"), + GetAttrs: func(obj runtime.Object) (labels.Set, fields.Set, error) { + pod := obj.(*example.Pod) + return nil, fields.Set{"metadata.name": pod.Name}, nil + }, + }, + }, { // update + key: "/somekey-4", + watchTests: []*testWatchStruct{{podFoo, true, watch.Added}, {podBar, true, watch.Modified}}, + pred: storage.Everything, + }, { // delete because of being filtered + key: "/somekey-5", + watchTests: []*testWatchStruct{{podFoo, true, watch.Added}, {podBar, true, watch.Deleted}}, + pred: storage.SelectionPredicate{ + Label: labels.Everything(), + Field: fields.ParseSelectorOrDie("metadata.name!=bar"), + GetAttrs: func(obj runtime.Object) (labels.Set, fields.Set, error) { + pod := obj.(*example.Pod) + return nil, fields.Set{"metadata.name": pod.Name}, nil + }, + }, + }} + for i, tt := range tests { + w, err := store.watch(ctx, tt.key, "0", tt.pred, recursive) + if err != nil { + t.Fatalf("Watch failed: %v", err) + } + var prevObj *example.Pod + for _, watchTest := range tt.watchTests { + out := &example.Pod{} + key := tt.key + if recursive { + key = key + "/item" + } + err := store.GuaranteedUpdate(ctx, key, out, true, nil, storage.SimpleUpdate( + func(runtime.Object) (runtime.Object, error) { + return watchTest.obj, nil + })) + if err != nil { + t.Fatalf("GuaranteedUpdate failed: %v", err) + } + if watchTest.expectEvent { + expectObj := out + if watchTest.watchType == watch.Deleted { + expectObj = prevObj + expectObj.ResourceVersion = out.ResourceVersion + } + testCheckResult(t, i, watchTest.watchType, w, expectObj) + } + prevObj = out + } + w.Stop() + testCheckStop(t, i, w) + } +} + +func TestDeleteTriggerWatch(t *testing.T) { + ctx, store, cluster := testSetup(t) + defer cluster.Terminate(t) + key, storedObj := testPropogateStore(ctx, t, store, &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo"}}) + w, err := store.Watch(ctx, key, storedObj.ResourceVersion, storage.Everything) + if err != nil { + t.Fatalf("Watch failed: %v", err) + } + if err := store.Delete(ctx, key, &example.Pod{}, nil); err != nil { + t.Fatalf("Delete failed: %v", err) + } + testCheckEventType(t, watch.Deleted, w) +} + +// TestWatchFromZero tests that +// - watch from 0 should sync up and grab the object added before +// - watch from 0 is able to return events for objects whose previous version has been compacted +func TestWatchFromZero(t *testing.T) { + ctx, store, cluster := testSetup(t) + defer cluster.Terminate(t) + key, storedObj := testPropogateStore(ctx, t, store, &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo", Namespace: "ns"}}) + + w, err := store.Watch(ctx, key, "0", storage.Everything) + if err != nil { + t.Fatalf("Watch failed: %v", err) + } + testCheckResult(t, 0, watch.Added, w, storedObj) + w.Stop() + + // Update + out := &example.Pod{} + err = store.GuaranteedUpdate(ctx, key, out, true, nil, storage.SimpleUpdate( + func(runtime.Object) (runtime.Object, error) { + return &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo", Namespace: "ns", Annotations: map[string]string{"a": "1"}}}, nil + })) + if err != nil { + t.Fatalf("GuaranteedUpdate failed: %v", err) + } + + // Make sure when we watch from 0 we receive an ADDED event + w, err = store.Watch(ctx, key, "0", storage.Everything) + if err != nil { + t.Fatalf("Watch failed: %v", err) + } + testCheckResult(t, 1, watch.Added, w, out) + w.Stop() + + // Update again + out = &example.Pod{} + err = store.GuaranteedUpdate(ctx, key, out, true, nil, storage.SimpleUpdate( + func(runtime.Object) (runtime.Object, error) { + return &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo", Namespace: "ns"}}, nil + })) + if err != nil { + t.Fatalf("GuaranteedUpdate failed: %v", err) + } + + // Compact previous versions + revToCompact, err := strconv.Atoi(out.ResourceVersion) + if err != nil { + t.Fatalf("Error converting %q to an int: %v", storedObj.ResourceVersion, err) + } + _, err = cluster.RandClient().Compact(ctx, int64(revToCompact), clientv3.WithCompactPhysical()) + if err != nil { + t.Fatalf("Error compacting: %v", err) + } + + // Make sure we can still watch from 0 and receive an ADDED event + w, err = store.Watch(ctx, key, "0", storage.Everything) + if err != nil { + t.Fatalf("Watch failed: %v", err) + } + testCheckResult(t, 2, watch.Added, w, out) +} + +// TestWatchFromNoneZero tests that +// - watch from non-0 should just watch changes after given version +func TestWatchFromNoneZero(t *testing.T) { + ctx, store, cluster := testSetup(t) + defer cluster.Terminate(t) + key, storedObj := testPropogateStore(ctx, t, store, &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo"}}) + + w, err := store.Watch(ctx, key, storedObj.ResourceVersion, storage.Everything) + if err != nil { + t.Fatalf("Watch failed: %v", err) + } + out := &example.Pod{} + store.GuaranteedUpdate(ctx, key, out, true, nil, storage.SimpleUpdate( + func(runtime.Object) (runtime.Object, error) { + return &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "bar"}}, err + })) + testCheckResult(t, 0, watch.Modified, w, out) +} + +func TestWatchError(t *testing.T) { + codec := &testCodec{apitesting.TestCodec(codecs, examplev1.SchemeGroupVersion)} + cluster := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) + defer cluster.Terminate(t) + invalidStore := newStore(cluster.RandClient(), false, codec, "") + ctx := context.Background() + w, err := invalidStore.Watch(ctx, "/abc", "0", storage.Everything) + if err != nil { + t.Fatalf("Watch failed: %v", err) + } + validStore := newStore(cluster.RandClient(), false, codec, "") + validStore.GuaranteedUpdate(ctx, "/abc", &example.Pod{}, true, nil, storage.SimpleUpdate( + func(runtime.Object) (runtime.Object, error) { + return &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo"}}, nil + })) + testCheckEventType(t, watch.Error, w) +} + +func TestWatchContextCancel(t *testing.T) { + ctx, store, cluster := testSetup(t) + defer cluster.Terminate(t) + canceledCtx, cancel := context.WithCancel(ctx) + cancel() + // When we watch with a canceled context, we should detect that it's context canceled. + // We won't take it as error and also close the watcher. + w, err := store.watcher.Watch(canceledCtx, "/abc", 0, false, storage.Everything) + if err != nil { + t.Fatal(err) + } + + select { + case _, ok := <-w.ResultChan(): + if ok { + t.Error("ResultChan() should be closed") + } + case <-time.After(wait.ForeverTestTimeout): + t.Errorf("timeout after %v", wait.ForeverTestTimeout) + } +} + +func TestWatchErrResultNotBlockAfterCancel(t *testing.T) { + origCtx, store, cluster := testSetup(t) + defer cluster.Terminate(t) + ctx, cancel := context.WithCancel(origCtx) + w := store.watcher.createWatchChan(ctx, "/abc", 0, false, storage.Everything) + // make resutlChan and errChan blocking to ensure ordering. + w.resultChan = make(chan watch.Event) + w.errChan = make(chan error) + // The event flow goes like: + // - first we send an error, it should block on resultChan. + // - Then we cancel ctx. The blocking on resultChan should be freed up + // and run() goroutine should return. + var wg sync.WaitGroup + wg.Add(1) + go func() { + w.run() + wg.Done() + }() + w.errChan <- fmt.Errorf("some error") + cancel() + wg.Wait() +} + +func TestWatchDeleteEventObjectHaveLatestRV(t *testing.T) { + ctx, store, cluster := testSetup(t) + defer cluster.Terminate(t) + key, storedObj := testPropogateStore(ctx, t, store, &example.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo"}}) + + w, err := store.Watch(ctx, key, storedObj.ResourceVersion, storage.Everything) + if err != nil { + t.Fatalf("Watch failed: %v", err) + } + etcdW := cluster.RandClient().Watch(ctx, "/", clientv3.WithPrefix()) + + if err := store.Delete(ctx, key, &example.Pod{}, &storage.Preconditions{}); err != nil { + t.Fatalf("Delete failed: %v", err) + } + + e := <-w.ResultChan() + watchedDeleteObj := e.Object.(*example.Pod) + var wres clientv3.WatchResponse + wres = <-etcdW + + watchedDeleteRev, err := storage.ParseWatchResourceVersion(watchedDeleteObj.ResourceVersion) + if err != nil { + t.Fatalf("ParseWatchResourceVersion failed: %v", err) + } + if int64(watchedDeleteRev) != wres.Events[0].Kv.ModRevision { + t.Errorf("Object from delete event have version: %v, should be the same as etcd delete's mod rev: %d", + watchedDeleteRev, wres.Events[0].Kv.ModRevision) + } +} + +type testWatchStruct struct { + obj *example.Pod + expectEvent bool + watchType watch.EventType +} + +type testCodec struct { + runtime.Codec +} + +func (c *testCodec) Decode(data []byte, defaults *schema.GroupVersionKind, into runtime.Object) (runtime.Object, *schema.GroupVersionKind, error) { + return nil, nil, errors.New("Expected decoding failure") +} + +func testCheckEventType(t *testing.T, expectEventType watch.EventType, w watch.Interface) { + select { + case res := <-w.ResultChan(): + if res.Type != expectEventType { + t.Errorf("event type want=%v, get=%v", expectEventType, res.Type) + } + case <-time.After(wait.ForeverTestTimeout): + t.Errorf("time out after waiting %v on ResultChan", wait.ForeverTestTimeout) + } +} + +func testCheckResult(t *testing.T, i int, expectEventType watch.EventType, w watch.Interface, expectObj *example.Pod) { + select { + case res := <-w.ResultChan(): + if res.Type != expectEventType { + t.Errorf("#%d: event type want=%v, get=%v", i, expectEventType, res.Type) + return + } + if !reflect.DeepEqual(expectObj, res.Object) { + t.Errorf("#%d: obj want=\n%#v\nget=\n%#v", i, expectObj, res.Object) + } + case <-time.After(wait.ForeverTestTimeout): + t.Errorf("#%d: time out after waiting %v on ResultChan", i, wait.ForeverTestTimeout) + } +} + +func testCheckStop(t *testing.T, i int, w watch.Interface) { + select { + case e, ok := <-w.ResultChan(): + if ok { + var obj string + switch e.Object.(type) { + case *example.Pod: + obj = e.Object.(*example.Pod).Name + case *metav1.Status: + obj = e.Object.(*metav1.Status).Message + } + t.Errorf("#%d: ResultChan should have been closed. Event: %s. Object: %s", i, e.Type, obj) + } + case <-time.After(wait.ForeverTestTimeout): + t.Errorf("#%d: time out after waiting 1s on ResultChan", i) + } +} diff --git a/pkg/storage/storagebackend/factory/etcd2.go b/pkg/storage/storagebackend/factory/etcd2.go new file mode 100644 index 000000000..c6e5f52c4 --- /dev/null +++ b/pkg/storage/storagebackend/factory/etcd2.go @@ -0,0 +1,81 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package factory + +import ( + "net" + "net/http" + "time" + + etcd2client "github.com/coreos/etcd/client" + "github.com/coreos/etcd/pkg/transport" + + utilnet "k8s.io/apimachinery/pkg/util/net" + "k8s.io/apiserver/pkg/storage" + "k8s.io/apiserver/pkg/storage/storagebackend" + "k8s.io/kubernetes/pkg/storage/etcd" +) + +func newETCD2Storage(c storagebackend.Config) (storage.Interface, DestroyFunc, error) { + tr, err := newTransportForETCD2(c.CertFile, c.KeyFile, c.CAFile) + if err != nil { + return nil, nil, err + } + client, err := newETCD2Client(tr, c.ServerList) + if err != nil { + return nil, nil, err + } + s := etcd.NewEtcdStorage(client, c.Codec, c.Prefix, c.Quorum, c.DeserializationCacheSize, c.Copier) + return s, tr.CloseIdleConnections, nil +} + +func newETCD2Client(tr *http.Transport, serverList []string) (etcd2client.Client, error) { + cli, err := etcd2client.New(etcd2client.Config{ + Endpoints: serverList, + Transport: tr, + }) + if err != nil { + return nil, err + } + + return cli, nil +} + +func newTransportForETCD2(certFile, keyFile, caFile string) (*http.Transport, error) { + info := transport.TLSInfo{ + CertFile: certFile, + KeyFile: keyFile, + CAFile: caFile, + } + cfg, err := info.ClientConfig() + if err != nil { + return nil, err + } + // Copied from etcd.DefaultTransport declaration. + // TODO: Determine if transport needs optimization + tr := utilnet.SetTransportDefaults(&http.Transport{ + Proxy: http.ProxyFromEnvironment, + Dial: (&net.Dialer{ + Timeout: 30 * time.Second, + KeepAlive: 30 * time.Second, + }).Dial, + TLSHandshakeTimeout: 10 * time.Second, + MaxIdleConnsPerHost: 500, + TLSClientConfig: cfg, + }) + return tr, nil +} diff --git a/pkg/storage/storagebackend/factory/etcd3.go b/pkg/storage/storagebackend/factory/etcd3.go new file mode 100644 index 000000000..595474723 --- /dev/null +++ b/pkg/storage/storagebackend/factory/etcd3.go @@ -0,0 +1,62 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package factory + +import ( + "k8s.io/apiserver/pkg/storage" + "k8s.io/apiserver/pkg/storage/storagebackend" + "k8s.io/kubernetes/pkg/storage/etcd3" + + "github.com/coreos/etcd/clientv3" + "github.com/coreos/etcd/pkg/transport" + "golang.org/x/net/context" +) + +func newETCD3Storage(c storagebackend.Config) (storage.Interface, DestroyFunc, error) { + tlsInfo := transport.TLSInfo{ + CertFile: c.CertFile, + KeyFile: c.KeyFile, + CAFile: c.CAFile, + } + tlsConfig, err := tlsInfo.ClientConfig() + if err != nil { + return nil, nil, err + } + // NOTE: Client relies on nil tlsConfig + // for non-secure connections, update the implicit variable + if len(c.CertFile) == 0 && len(c.KeyFile) == 0 && len(c.CAFile) == 0 { + tlsConfig = nil + } + cfg := clientv3.Config{ + Endpoints: c.ServerList, + TLS: tlsConfig, + } + client, err := clientv3.New(cfg) + if err != nil { + return nil, nil, err + } + ctx, cancel := context.WithCancel(context.Background()) + etcd3.StartCompactor(ctx, client) + destroyFunc := func() { + cancel() + client.Close() + } + if c.Quorum { + return etcd3.New(client, c.Codec, c.Prefix), destroyFunc, nil + } + return etcd3.NewWithNoQuorumRead(client, c.Codec, c.Prefix), destroyFunc, nil +} diff --git a/pkg/storage/storagebackend/factory/factory.go b/pkg/storage/storagebackend/factory/factory.go new file mode 100644 index 000000000..101207b9f --- /dev/null +++ b/pkg/storage/storagebackend/factory/factory.go @@ -0,0 +1,43 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package factory + +import ( + "fmt" + + "k8s.io/apiserver/pkg/storage" + "k8s.io/apiserver/pkg/storage/storagebackend" +) + +// DestroyFunc is to destroy any resources used by the storage returned in Create() together. +type DestroyFunc func() + +// Create creates a storage backend based on given config. +func Create(c storagebackend.Config) (storage.Interface, DestroyFunc, error) { + switch c.Type { + case storagebackend.StorageTypeETCD2: + return newETCD2Storage(c) + case storagebackend.StorageTypeUnset, storagebackend.StorageTypeETCD3: + // TODO: We have the following features to implement: + // - Support secure connection by using key, cert, and CA files. + // - Honor "https" scheme to support secure connection in gRPC. + // - Support non-quorum read. + return newETCD3Storage(c) + default: + return nil, nil, fmt.Errorf("unknown storage type: %s", c.Type) + } +} diff --git a/pkg/storage/storagebackend/factory/tls_test.go b/pkg/storage/storagebackend/factory/tls_test.go new file mode 100644 index 000000000..b13a944e2 --- /dev/null +++ b/pkg/storage/storagebackend/factory/tls_test.go @@ -0,0 +1,106 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package factory + +import ( + "io/ioutil" + "os" + "path" + "path/filepath" + "testing" + + "github.com/coreos/etcd/integration" + "github.com/coreos/etcd/pkg/transport" + "golang.org/x/net/context" + + apitesting "k8s.io/apimachinery/pkg/api/testing" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/serializer" + "k8s.io/apiserver/pkg/apis/example" + examplev1 "k8s.io/apiserver/pkg/apis/example/v1" + "k8s.io/apiserver/pkg/storage/storagebackend" + "k8s.io/kubernetes/pkg/storage/etcd/testing/testingcert" +) + +var scheme = runtime.NewScheme() +var codecs = serializer.NewCodecFactory(scheme) + +func init() { + metav1.AddToGroupVersion(scheme, metav1.SchemeGroupVersion) + example.AddToScheme(scheme) + examplev1.AddToScheme(scheme) +} + +func TestTLSConnection(t *testing.T) { + codec := apitesting.TestCodec(codecs, examplev1.SchemeGroupVersion) + + certFile, keyFile, caFile := configureTLSCerts(t) + defer os.RemoveAll(filepath.Dir(certFile)) + + tlsInfo := &transport.TLSInfo{ + CertFile: certFile, + KeyFile: keyFile, + CAFile: caFile, + } + + cluster := integration.NewClusterV3(t, &integration.ClusterConfig{ + Size: 1, + ClientTLS: tlsInfo, + }) + defer cluster.Terminate(t) + + cfg := storagebackend.Config{ + Type: storagebackend.StorageTypeETCD3, + ServerList: []string{cluster.Members[0].GRPCAddr()}, + CertFile: certFile, + KeyFile: keyFile, + CAFile: caFile, + Codec: codec, + Copier: scheme, + } + storage, destroyFunc, err := newETCD3Storage(cfg) + defer destroyFunc() + if err != nil { + t.Fatal(err) + } + err = storage.Create(context.TODO(), "/abc", &example.Pod{}, nil, 0) + if err != nil { + t.Fatalf("Create failed: %v", err) + } +} + +func configureTLSCerts(t *testing.T) (certFile, keyFile, caFile string) { + baseDir := os.TempDir() + tempDir, err := ioutil.TempDir(baseDir, "etcd_certificates") + if err != nil { + t.Fatal(err) + } + certFile = path.Join(tempDir, "etcdcert.pem") + if err := ioutil.WriteFile(certFile, []byte(testingcert.CertFileContent), 0644); err != nil { + t.Fatal(err) + } + keyFile = path.Join(tempDir, "etcdkey.pem") + if err := ioutil.WriteFile(keyFile, []byte(testingcert.KeyFileContent), 0644); err != nil { + t.Fatal(err) + } + caFile = path.Join(tempDir, "ca.pem") + if err := ioutil.WriteFile(caFile, []byte(testingcert.CAFileContent), 0644); err != nil { + t.Fatal(err) + } + return certFile, keyFile, caFile +} diff --git a/pkg/storage/testing/OWNERS b/pkg/storage/testing/OWNERS new file mode 100755 index 000000000..604c71718 --- /dev/null +++ b/pkg/storage/testing/OWNERS @@ -0,0 +1,9 @@ +reviewers: +- smarterclayton +- wojtek-t +- liggitt +- erictune +- timothysc +- soltysh +- mml +- feihujiang diff --git a/pkg/storage/testing/types.generated.go b/pkg/storage/testing/types.generated.go new file mode 100644 index 000000000..cf7e8aef4 --- /dev/null +++ b/pkg/storage/testing/types.generated.go @@ -0,0 +1,423 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// ************************************************************ +// DO NOT EDIT. +// THIS FILE IS AUTO-GENERATED BY codecgen. +// ************************************************************ + +package testing + +import ( + "errors" + "fmt" + codec1978 "github.com/ugorji/go/codec" + pkg1_v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + pkg2_types "k8s.io/apimachinery/pkg/types" + "reflect" + "runtime" + time "time" +) + +const ( + // ----- content types ---- + codecSelferC_UTF81234 = 1 + codecSelferC_RAW1234 = 0 + // ----- value types used ---- + codecSelferValueTypeArray1234 = 10 + codecSelferValueTypeMap1234 = 9 + // ----- containerStateValues ---- + codecSelfer_containerMapKey1234 = 2 + codecSelfer_containerMapValue1234 = 3 + codecSelfer_containerMapEnd1234 = 4 + codecSelfer_containerArrayElem1234 = 6 + codecSelfer_containerArrayEnd1234 = 7 +) + +var ( + codecSelferBitsize1234 = uint8(reflect.TypeOf(uint(0)).Bits()) + codecSelferOnlyMapOrArrayEncodeToStructErr1234 = errors.New(`only encoded map or array can be decoded into a struct`) +) + +type codecSelfer1234 struct{} + +func init() { + if codec1978.GenVersion != 5 { + _, file, _, _ := runtime.Caller(0) + err := fmt.Errorf("codecgen version mismatch: current: %v, need %v. Re-generate file: %v", + 5, codec1978.GenVersion, file) + panic(err) + } + if false { // reference the types, but skip this branch at build/run time + var v0 pkg1_v1.TypeMeta + var v1 pkg2_types.UID + var v2 time.Time + _, _, _ = v0, v1, v2 + } +} + +func (x *TestResource) CodecEncodeSelf(e *codec1978.Encoder) { + var h codecSelfer1234 + z, r := codec1978.GenHelperEncoder(e) + _, _, _ = h, z, r + if x == nil { + r.EncodeNil() + } else { + yym1 := z.EncBinary() + _ = yym1 + if false { + } else if z.HasExtensions() && z.EncExt(x) { + } else { + yysep2 := !z.EncBinary() + yy2arr2 := z.EncBasicHandle().StructToArray + var yyq2 [4]bool + _, _, _ = yysep2, yyq2, yy2arr2 + const yyr2 bool = false + yyq2[0] = x.Kind != "" + yyq2[1] = x.APIVersion != "" + var yynn2 int + if yyr2 || yy2arr2 { + r.EncodeArrayStart(4) + } else { + yynn2 = 2 + for _, b := range yyq2 { + if b { + yynn2++ + } + } + r.EncodeMapStart(yynn2) + yynn2 = 0 + } + if yyr2 || yy2arr2 { + z.EncSendContainerState(codecSelfer_containerArrayElem1234) + if yyq2[0] { + yym4 := z.EncBinary() + _ = yym4 + if false { + } else { + r.EncodeString(codecSelferC_UTF81234, string(x.Kind)) + } + } else { + r.EncodeString(codecSelferC_UTF81234, "") + } + } else { + if yyq2[0] { + z.EncSendContainerState(codecSelfer_containerMapKey1234) + r.EncodeString(codecSelferC_UTF81234, string("kind")) + z.EncSendContainerState(codecSelfer_containerMapValue1234) + yym5 := z.EncBinary() + _ = yym5 + if false { + } else { + r.EncodeString(codecSelferC_UTF81234, string(x.Kind)) + } + } + } + if yyr2 || yy2arr2 { + z.EncSendContainerState(codecSelfer_containerArrayElem1234) + if yyq2[1] { + yym7 := z.EncBinary() + _ = yym7 + if false { + } else { + r.EncodeString(codecSelferC_UTF81234, string(x.APIVersion)) + } + } else { + r.EncodeString(codecSelferC_UTF81234, "") + } + } else { + if yyq2[1] { + z.EncSendContainerState(codecSelfer_containerMapKey1234) + r.EncodeString(codecSelferC_UTF81234, string("apiVersion")) + z.EncSendContainerState(codecSelfer_containerMapValue1234) + yym8 := z.EncBinary() + _ = yym8 + if false { + } else { + r.EncodeString(codecSelferC_UTF81234, string(x.APIVersion)) + } + } + } + if yyr2 || yy2arr2 { + z.EncSendContainerState(codecSelfer_containerArrayElem1234) + yy10 := &x.ObjectMeta + yym11 := z.EncBinary() + _ = yym11 + if false { + } else if z.HasExtensions() && z.EncExt(yy10) { + } else { + z.EncFallback(yy10) + } + } else { + z.EncSendContainerState(codecSelfer_containerMapKey1234) + r.EncodeString(codecSelferC_UTF81234, string("metadata")) + z.EncSendContainerState(codecSelfer_containerMapValue1234) + yy12 := &x.ObjectMeta + yym13 := z.EncBinary() + _ = yym13 + if false { + } else if z.HasExtensions() && z.EncExt(yy12) { + } else { + z.EncFallback(yy12) + } + } + if yyr2 || yy2arr2 { + z.EncSendContainerState(codecSelfer_containerArrayElem1234) + yym15 := z.EncBinary() + _ = yym15 + if false { + } else { + r.EncodeInt(int64(x.Value)) + } + } else { + z.EncSendContainerState(codecSelfer_containerMapKey1234) + r.EncodeString(codecSelferC_UTF81234, string("value")) + z.EncSendContainerState(codecSelfer_containerMapValue1234) + yym16 := z.EncBinary() + _ = yym16 + if false { + } else { + r.EncodeInt(int64(x.Value)) + } + } + if yyr2 || yy2arr2 { + z.EncSendContainerState(codecSelfer_containerArrayEnd1234) + } else { + z.EncSendContainerState(codecSelfer_containerMapEnd1234) + } + } + } +} + +func (x *TestResource) CodecDecodeSelf(d *codec1978.Decoder) { + var h codecSelfer1234 + z, r := codec1978.GenHelperDecoder(d) + _, _, _ = h, z, r + yym1 := z.DecBinary() + _ = yym1 + if false { + } else if z.HasExtensions() && z.DecExt(x) { + } else { + yyct2 := r.ContainerType() + if yyct2 == codecSelferValueTypeMap1234 { + yyl2 := r.ReadMapStart() + if yyl2 == 0 { + z.DecSendContainerState(codecSelfer_containerMapEnd1234) + } else { + x.codecDecodeSelfFromMap(yyl2, d) + } + } else if yyct2 == codecSelferValueTypeArray1234 { + yyl2 := r.ReadArrayStart() + if yyl2 == 0 { + z.DecSendContainerState(codecSelfer_containerArrayEnd1234) + } else { + x.codecDecodeSelfFromArray(yyl2, d) + } + } else { + panic(codecSelferOnlyMapOrArrayEncodeToStructErr1234) + } + } +} + +func (x *TestResource) codecDecodeSelfFromMap(l int, d *codec1978.Decoder) { + var h codecSelfer1234 + z, r := codec1978.GenHelperDecoder(d) + _, _, _ = h, z, r + var yys3Slc = z.DecScratchBuffer() // default slice to decode into + _ = yys3Slc + var yyhl3 bool = l >= 0 + for yyj3 := 0; ; yyj3++ { + if yyhl3 { + if yyj3 >= l { + break + } + } else { + if r.CheckBreak() { + break + } + } + z.DecSendContainerState(codecSelfer_containerMapKey1234) + yys3Slc = r.DecodeBytes(yys3Slc, true, true) + yys3 := string(yys3Slc) + z.DecSendContainerState(codecSelfer_containerMapValue1234) + switch yys3 { + case "kind": + if r.TryDecodeAsNil() { + x.Kind = "" + } else { + yyv4 := &x.Kind + yym5 := z.DecBinary() + _ = yym5 + if false { + } else { + *((*string)(yyv4)) = r.DecodeString() + } + } + case "apiVersion": + if r.TryDecodeAsNil() { + x.APIVersion = "" + } else { + yyv6 := &x.APIVersion + yym7 := z.DecBinary() + _ = yym7 + if false { + } else { + *((*string)(yyv6)) = r.DecodeString() + } + } + case "metadata": + if r.TryDecodeAsNil() { + x.ObjectMeta = pkg1_v1.ObjectMeta{} + } else { + yyv8 := &x.ObjectMeta + yym9 := z.DecBinary() + _ = yym9 + if false { + } else if z.HasExtensions() && z.DecExt(yyv8) { + } else { + z.DecFallback(yyv8, false) + } + } + case "value": + if r.TryDecodeAsNil() { + x.Value = 0 + } else { + yyv10 := &x.Value + yym11 := z.DecBinary() + _ = yym11 + if false { + } else { + *((*int)(yyv10)) = int(r.DecodeInt(codecSelferBitsize1234)) + } + } + default: + z.DecStructFieldNotFound(-1, yys3) + } // end switch yys3 + } // end for yyj3 + z.DecSendContainerState(codecSelfer_containerMapEnd1234) +} + +func (x *TestResource) codecDecodeSelfFromArray(l int, d *codec1978.Decoder) { + var h codecSelfer1234 + z, r := codec1978.GenHelperDecoder(d) + _, _, _ = h, z, r + var yyj12 int + var yyb12 bool + var yyhl12 bool = l >= 0 + yyj12++ + if yyhl12 { + yyb12 = yyj12 > l + } else { + yyb12 = r.CheckBreak() + } + if yyb12 { + z.DecSendContainerState(codecSelfer_containerArrayEnd1234) + return + } + z.DecSendContainerState(codecSelfer_containerArrayElem1234) + if r.TryDecodeAsNil() { + x.Kind = "" + } else { + yyv13 := &x.Kind + yym14 := z.DecBinary() + _ = yym14 + if false { + } else { + *((*string)(yyv13)) = r.DecodeString() + } + } + yyj12++ + if yyhl12 { + yyb12 = yyj12 > l + } else { + yyb12 = r.CheckBreak() + } + if yyb12 { + z.DecSendContainerState(codecSelfer_containerArrayEnd1234) + return + } + z.DecSendContainerState(codecSelfer_containerArrayElem1234) + if r.TryDecodeAsNil() { + x.APIVersion = "" + } else { + yyv15 := &x.APIVersion + yym16 := z.DecBinary() + _ = yym16 + if false { + } else { + *((*string)(yyv15)) = r.DecodeString() + } + } + yyj12++ + if yyhl12 { + yyb12 = yyj12 > l + } else { + yyb12 = r.CheckBreak() + } + if yyb12 { + z.DecSendContainerState(codecSelfer_containerArrayEnd1234) + return + } + z.DecSendContainerState(codecSelfer_containerArrayElem1234) + if r.TryDecodeAsNil() { + x.ObjectMeta = pkg1_v1.ObjectMeta{} + } else { + yyv17 := &x.ObjectMeta + yym18 := z.DecBinary() + _ = yym18 + if false { + } else if z.HasExtensions() && z.DecExt(yyv17) { + } else { + z.DecFallback(yyv17, false) + } + } + yyj12++ + if yyhl12 { + yyb12 = yyj12 > l + } else { + yyb12 = r.CheckBreak() + } + if yyb12 { + z.DecSendContainerState(codecSelfer_containerArrayEnd1234) + return + } + z.DecSendContainerState(codecSelfer_containerArrayElem1234) + if r.TryDecodeAsNil() { + x.Value = 0 + } else { + yyv19 := &x.Value + yym20 := z.DecBinary() + _ = yym20 + if false { + } else { + *((*int)(yyv19)) = int(r.DecodeInt(codecSelferBitsize1234)) + } + } + for { + yyj12++ + if yyhl12 { + yyb12 = yyj12 > l + } else { + yyb12 = r.CheckBreak() + } + if yyb12 { + break + } + z.DecSendContainerState(codecSelfer_containerArrayElem1234) + z.DecStructFieldNotFound(yyj12-1, "") + } + z.DecSendContainerState(codecSelfer_containerArrayEnd1234) +} diff --git a/pkg/storage/testing/types.go b/pkg/storage/testing/types.go new file mode 100644 index 000000000..6606f2e8e --- /dev/null +++ b/pkg/storage/testing/types.go @@ -0,0 +1,30 @@ +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package testing + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +type TestResource struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata"` + Value int `json:"value"` +} + +func (obj *TestResource) GetObjectKind() schema.ObjectKind { return &obj.TypeMeta } diff --git a/pkg/storage/testing/utils.go b/pkg/storage/testing/utils.go new file mode 100644 index 000000000..f6718dd52 --- /dev/null +++ b/pkg/storage/testing/utils.go @@ -0,0 +1,61 @@ +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package testing + +import ( + "path" + + "golang.org/x/net/context" + "k8s.io/apimachinery/pkg/api/meta" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apiserver/pkg/storage" +) + +// CreateObj will create a single object using the storage interface +func CreateObj(helper storage.Interface, name string, obj, out runtime.Object, ttl uint64) error { + return helper.Create(context.TODO(), name, obj, out, ttl) +} + +//CreateObjList will create a list from the array of objects +func CreateObjList(prefix string, helper storage.Interface, items []runtime.Object) error { + for i := range items { + obj := items[i] + meta, err := meta.Accessor(obj) + if err != nil { + return err + } + err = CreateObj(helper, path.Join(prefix, meta.GetName()), obj, obj, 0) + if err != nil { + return err + } + items[i] = obj + } + return nil +} + +// CreateList will properly create a list using the storage interface +func CreateList(prefix string, helper storage.Interface, list runtime.Object) error { + items, err := meta.ExtractList(list) + if err != nil { + return err + } + err = CreateObjList(prefix, helper, items) + if err != nil { + return err + } + return meta.SetList(list, items) +} diff --git a/pkg/storage/tests/cacher_test.go b/pkg/storage/tests/cacher_test.go new file mode 100644 index 000000000..7cebc6e07 --- /dev/null +++ b/pkg/storage/tests/cacher_test.go @@ -0,0 +1,579 @@ +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package tests + +import ( + "fmt" + "reflect" + goruntime "runtime" + "strconv" + "testing" + "time" + + apiequality "k8s.io/apimachinery/pkg/api/equality" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + apitesting "k8s.io/apimachinery/pkg/api/testing" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/apimachinery/pkg/watch" + "k8s.io/apiserver/pkg/apis/example" + examplev1 "k8s.io/apiserver/pkg/apis/example/v1" + "k8s.io/apiserver/pkg/storage" + etcdstorage "k8s.io/kubernetes/pkg/storage/etcd" + "k8s.io/kubernetes/pkg/storage/etcd/etcdtest" + etcdtesting "k8s.io/kubernetes/pkg/storage/etcd/testing" + "k8s.io/kubernetes/pkg/storage/etcd3" + + "golang.org/x/net/context" + + "k8s.io/apimachinery/pkg/runtime/serializer" + _ "k8s.io/client-go/pkg/api/install" +) + +var ( + scheme = runtime.NewScheme() + codecs = serializer.NewCodecFactory(scheme) +) + +func init() { + metav1.AddToGroupVersion(scheme, metav1.SchemeGroupVersion) + example.AddToScheme(scheme) + examplev1.AddToScheme(scheme) +} + +// GetAttrs returns labels and fields of a given object for filtering purposes. +func GetAttrs(obj runtime.Object) (labels.Set, fields.Set, error) { + pod, ok := obj.(*example.Pod) + if !ok { + return nil, nil, fmt.Errorf("not a pod") + } + return labels.Set(pod.ObjectMeta.Labels), PodToSelectableFields(pod), nil +} + +// PodToSelectableFields returns a field set that represents the object +// TODO: fields are not labels, and the validation rules for them do not apply. +func PodToSelectableFields(pod *example.Pod) fields.Set { + // The purpose of allocation with a given number of elements is to reduce + // amount of allocations needed to create the fields.Set. If you add any + // field here or the number of object-meta related fields changes, this should + // be adjusted. + podSpecificFieldsSet := make(fields.Set, 5) + podSpecificFieldsSet["spec.nodeName"] = pod.Spec.NodeName + podSpecificFieldsSet["spec.restartPolicy"] = string(pod.Spec.RestartPolicy) + podSpecificFieldsSet["status.phase"] = string(pod.Status.Phase) + return AddObjectMetaFieldsSet(podSpecificFieldsSet, &pod.ObjectMeta, true) +} + +func AddObjectMetaFieldsSet(source fields.Set, objectMeta *metav1.ObjectMeta, hasNamespaceField bool) fields.Set { + source["metadata.name"] = objectMeta.Name + if hasNamespaceField { + source["metadata.namespace"] = objectMeta.Namespace + } + return source +} + +func newEtcdTestStorage(t *testing.T, prefix string) (*etcdtesting.EtcdTestServer, storage.Interface) { + server, _ := etcdtesting.NewUnsecuredEtcd3TestClientServer(t, scheme) + storage := etcd3.New(server.V3Client, apitesting.TestCodec(codecs, examplev1.SchemeGroupVersion), prefix) + return server, storage +} + +func newTestCacher(s storage.Interface, cap int) *storage.Cacher { + prefix := "pods" + config := storage.CacherConfig{ + CacheCapacity: cap, + Storage: s, + Versioner: etcdstorage.APIObjectVersioner{}, + Copier: scheme, + Type: &example.Pod{}, + ResourcePrefix: prefix, + KeyFunc: func(obj runtime.Object) (string, error) { return storage.NamespaceKeyFunc(prefix, obj) }, + GetAttrsFunc: GetAttrs, + NewListFunc: func() runtime.Object { return &example.PodList{} }, + Codec: codecs.LegacyCodec(examplev1.SchemeGroupVersion), + } + return storage.NewCacherFromConfig(config) +} + +func makeTestPod(name string) *example.Pod { + return &example.Pod{ + ObjectMeta: metav1.ObjectMeta{Namespace: "ns", Name: name}, + Spec: DeepEqualSafePodSpec(), + } +} + +func updatePod(t *testing.T, s storage.Interface, obj, old *example.Pod) *example.Pod { + updateFn := func(input runtime.Object, res storage.ResponseMeta) (runtime.Object, *uint64, error) { + newObj, err := scheme.DeepCopy(obj) + if err != nil { + t.Errorf("unexpected error: %v", err) + return nil, nil, err + } + return newObj.(*example.Pod), nil, nil + } + key := "pods/" + obj.Namespace + "/" + obj.Name + if err := s.GuaranteedUpdate(context.TODO(), key, &example.Pod{}, old == nil, nil, updateFn); err != nil { + t.Errorf("unexpected error: %v", err) + } + obj.ResourceVersion = "" + result := &example.Pod{} + if err := s.Get(context.TODO(), key, "", result, false); err != nil { + t.Errorf("unexpected error: %v", err) + } + return result +} + +func TestGet(t *testing.T) { + server, etcdStorage := newEtcdTestStorage(t, etcdtest.PathPrefix()) + defer server.Terminate(t) + cacher := newTestCacher(etcdStorage, 10) + defer cacher.Stop() + + podFoo := makeTestPod("foo") + fooCreated := updatePod(t, etcdStorage, podFoo, nil) + + // We pass the ResourceVersion from the above Create() operation. + result := &example.Pod{} + if err := cacher.Get(context.TODO(), "pods/ns/foo", fooCreated.ResourceVersion, result, true); err != nil { + t.Errorf("Unexpected error: %v", err) + } + if e, a := *fooCreated, *result; !reflect.DeepEqual(e, a) { + t.Errorf("Expected: %#v, got: %#v", e, a) + } + + if err := cacher.Get(context.TODO(), "pods/ns/bar", fooCreated.ResourceVersion, result, true); err != nil { + t.Errorf("Unexpected error: %v", err) + } + emptyPod := example.Pod{} + if e, a := emptyPod, *result; !reflect.DeepEqual(e, a) { + t.Errorf("Expected: %#v, got: %#v", e, a) + } + + if err := cacher.Get(context.TODO(), "pods/ns/bar", fooCreated.ResourceVersion, result, false); !storage.IsNotFound(err) { + t.Errorf("Unexpected error: %v", err) + } +} + +func TestList(t *testing.T) { + server, etcdStorage := newEtcdTestStorage(t, etcdtest.PathPrefix()) + defer server.Terminate(t) + cacher := newTestCacher(etcdStorage, 10) + defer cacher.Stop() + + podFoo := makeTestPod("foo") + podBar := makeTestPod("bar") + podBaz := makeTestPod("baz") + + podFooPrime := makeTestPod("foo") + podFooPrime.Spec.NodeName = "fakeNode" + + fooCreated := updatePod(t, etcdStorage, podFoo, nil) + _ = updatePod(t, etcdStorage, podBar, nil) + _ = updatePod(t, etcdStorage, podBaz, nil) + + _ = updatePod(t, etcdStorage, podFooPrime, fooCreated) + + // Create a pod in a namespace that contains "ns" as a prefix + // Make sure it is not returned in a watch of "ns" + podFooNS2 := makeTestPod("foo") + podFooNS2.Namespace += "2" + updatePod(t, etcdStorage, podFooNS2, nil) + + deleted := example.Pod{} + if err := etcdStorage.Delete(context.TODO(), "pods/ns/bar", &deleted, nil); err != nil { + t.Errorf("Unexpected error: %v", err) + } + + // We first List directly from etcd by passing empty resourceVersion, + // to get the current etcd resourceVersion. + rvResult := &example.PodList{} + if err := cacher.List(context.TODO(), "pods/ns", "", storage.Everything, rvResult); err != nil { + t.Errorf("Unexpected error: %v", err) + } + deletedPodRV := rvResult.ListMeta.ResourceVersion + + result := &example.PodList{} + // We pass the current etcd ResourceVersion received from the above List() operation, + // since there is not easy way to get ResourceVersion of barPod deletion operation. + if err := cacher.List(context.TODO(), "pods/ns", deletedPodRV, storage.Everything, result); err != nil { + t.Errorf("Unexpected error: %v", err) + } + if result.ListMeta.ResourceVersion != deletedPodRV { + t.Errorf("Incorrect resource version: %v", result.ListMeta.ResourceVersion) + } + if len(result.Items) != 2 { + t.Errorf("Unexpected list result: %d", len(result.Items)) + } + keys := sets.String{} + for _, item := range result.Items { + keys.Insert(item.Name) + } + if !keys.HasAll("foo", "baz") { + t.Errorf("Unexpected list result: %#v", result) + } + for _, item := range result.Items { + // unset fields that are set by the infrastructure + item.ResourceVersion = "" + item.CreationTimestamp = metav1.Time{} + + if item.Namespace != "ns" { + t.Errorf("Unexpected namespace: %s", item.Namespace) + } + + var expected *example.Pod + switch item.Name { + case "foo": + expected = podFooPrime + case "baz": + expected = podBaz + default: + t.Errorf("Unexpected item: %v", item) + } + if e, a := *expected, item; !reflect.DeepEqual(e, a) { + t.Errorf("Expected: %#v, got: %#v", e, a) + } + } +} + +func TestInfiniteList(t *testing.T) { + server, etcdStorage := newEtcdTestStorage(t, etcdtest.PathPrefix()) + defer server.Terminate(t) + cacher := newTestCacher(etcdStorage, 10) + defer cacher.Stop() + + podFoo := makeTestPod("foo") + fooCreated := updatePod(t, etcdStorage, podFoo, nil) + + // Set up List at fooCreated.ResourceVersion + 10 + rv, err := storage.ParseWatchResourceVersion(fooCreated.ResourceVersion) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + listRV := strconv.Itoa(int(rv + 10)) + + result := &example.PodList{} + err = cacher.List(context.TODO(), "pods/ns", listRV, storage.Everything, result) + if !errors.IsTimeout(err) { + t.Errorf("Unexpected error: %v", err) + } +} + +func verifyWatchEvent(t *testing.T, w watch.Interface, eventType watch.EventType, eventObject runtime.Object) { + _, _, line, _ := goruntime.Caller(1) + select { + case event := <-w.ResultChan(): + if e, a := eventType, event.Type; e != a { + t.Logf("(called from line %d)", line) + t.Errorf("Expected: %s, got: %s", eventType, event.Type) + } + if e, a := eventObject, event.Object; !apiequality.Semantic.DeepDerivative(e, a) { + t.Logf("(called from line %d)", line) + t.Errorf("Expected (%s): %#v, got: %#v", eventType, e, a) + } + case <-time.After(wait.ForeverTestTimeout): + t.Logf("(called from line %d)", line) + t.Errorf("Timed out waiting for an event") + } +} + +type injectListError struct { + errors int + storage.Interface +} + +func (self *injectListError) List(ctx context.Context, key string, resourceVersion string, p storage.SelectionPredicate, listObj runtime.Object) error { + if self.errors > 0 { + self.errors-- + return fmt.Errorf("injected error") + } + return self.Interface.List(ctx, key, resourceVersion, p, listObj) +} + +func TestWatch(t *testing.T) { + server, etcdStorage := newEtcdTestStorage(t, etcdtest.PathPrefix()) + // Inject one list error to make sure we test the relist case. + etcdStorage = &injectListError{errors: 1, Interface: etcdStorage} + defer server.Terminate(t) + cacher := newTestCacher(etcdStorage, 3) // small capacity to trigger "too old version" error + defer cacher.Stop() + + podFoo := makeTestPod("foo") + podBar := makeTestPod("bar") + + podFooPrime := makeTestPod("foo") + podFooPrime.Spec.NodeName = "fakeNode" + + podFooBis := makeTestPod("foo") + podFooBis.Spec.NodeName = "anotherFakeNode" + + podFooNS2 := makeTestPod("foo") + podFooNS2.Namespace += "2" + + // initialVersion is used to initate the watcher at the beginning of the world, + // which is not defined precisely in etcd. + initialVersion, err := cacher.LastSyncResourceVersion() + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + startVersion := strconv.Itoa(int(initialVersion)) + + // Set up Watch for object "podFoo". + watcher, err := cacher.Watch(context.TODO(), "pods/ns/foo", startVersion, storage.Everything) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + defer watcher.Stop() + + // Create in another namespace first to make sure events from other namespaces don't get delivered + updatePod(t, etcdStorage, podFooNS2, nil) + + fooCreated := updatePod(t, etcdStorage, podFoo, nil) + _ = updatePod(t, etcdStorage, podBar, nil) + fooUpdated := updatePod(t, etcdStorage, podFooPrime, fooCreated) + + verifyWatchEvent(t, watcher, watch.Added, podFoo) + verifyWatchEvent(t, watcher, watch.Modified, podFooPrime) + + // Check whether we get too-old error via the watch channel + tooOldWatcher, err := cacher.Watch(context.TODO(), "pods/ns/foo", "1", storage.Everything) + if err != nil { + t.Fatalf("Expected no direct error, got %v", err) + } + defer tooOldWatcher.Stop() + // Ensure we get a "Gone" error + expectedGoneError := errors.NewGone("").ErrStatus + verifyWatchEvent(t, tooOldWatcher, watch.Error, &expectedGoneError) + + initialWatcher, err := cacher.Watch(context.TODO(), "pods/ns/foo", fooCreated.ResourceVersion, storage.Everything) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + defer initialWatcher.Stop() + + verifyWatchEvent(t, initialWatcher, watch.Modified, podFooPrime) + + // Now test watch from "now". + nowWatcher, err := cacher.Watch(context.TODO(), "pods/ns/foo", "0", storage.Everything) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + defer nowWatcher.Stop() + + verifyWatchEvent(t, nowWatcher, watch.Added, podFooPrime) + + _ = updatePod(t, etcdStorage, podFooBis, fooUpdated) + + verifyWatchEvent(t, nowWatcher, watch.Modified, podFooBis) +} + +func TestWatcherTimeout(t *testing.T) { + server, etcdStorage := newEtcdTestStorage(t, etcdtest.PathPrefix()) + defer server.Terminate(t) + cacher := newTestCacher(etcdStorage, 10) + defer cacher.Stop() + + // initialVersion is used to initate the watcher at the beginning of the world, + // which is not defined precisely in etcd. + initialVersion, err := cacher.LastSyncResourceVersion() + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + startVersion := strconv.Itoa(int(initialVersion)) + + // Create a number of watchers that will not be reading any result. + nonReadingWatchers := 50 + for i := 0; i < nonReadingWatchers; i++ { + watcher, err := cacher.WatchList(context.TODO(), "pods/ns", startVersion, storage.Everything) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + defer watcher.Stop() + } + + // Create a second watcher that will be reading result. + readingWatcher, err := cacher.WatchList(context.TODO(), "pods/ns", startVersion, storage.Everything) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + defer readingWatcher.Stop() + + startTime := time.Now() + for i := 1; i <= 22; i++ { + pod := makeTestPod(strconv.Itoa(i)) + _ = updatePod(t, etcdStorage, pod, nil) + verifyWatchEvent(t, readingWatcher, watch.Added, pod) + } + if time.Since(startTime) > time.Duration(250*nonReadingWatchers)*time.Millisecond { + t.Errorf("waiting for events took too long: %v", time.Since(startTime)) + } +} + +func TestFiltering(t *testing.T) { + server, etcdStorage := newEtcdTestStorage(t, etcdtest.PathPrefix()) + defer server.Terminate(t) + cacher := newTestCacher(etcdStorage, 10) + defer cacher.Stop() + + // Ensure that the cacher is initialized, before creating any pods, + // so that we are sure that all events will be present in cacher. + syncWatcher, err := cacher.Watch(context.TODO(), "pods/ns/foo", "0", storage.Everything) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + syncWatcher.Stop() + + podFoo := makeTestPod("foo") + podFoo.Labels = map[string]string{"filter": "foo"} + podFooFiltered := makeTestPod("foo") + podFooPrime := makeTestPod("foo") + podFooPrime.Labels = map[string]string{"filter": "foo"} + podFooPrime.Spec.NodeName = "fakeNode" + + podFooNS2 := makeTestPod("foo") + podFooNS2.Namespace += "2" + podFooNS2.Labels = map[string]string{"filter": "foo"} + + // Create in another namespace first to make sure events from other namespaces don't get delivered + updatePod(t, etcdStorage, podFooNS2, nil) + + fooCreated := updatePod(t, etcdStorage, podFoo, nil) + fooFiltered := updatePod(t, etcdStorage, podFooFiltered, fooCreated) + fooUnfiltered := updatePod(t, etcdStorage, podFoo, fooFiltered) + _ = updatePod(t, etcdStorage, podFooPrime, fooUnfiltered) + + deleted := example.Pod{} + if err := etcdStorage.Delete(context.TODO(), "pods/ns/foo", &deleted, nil); err != nil { + t.Errorf("Unexpected error: %v", err) + } + + // Set up Watch for object "podFoo" with label filter set. + pred := storage.SelectionPredicate{ + Label: labels.SelectorFromSet(labels.Set{"filter": "foo"}), + Field: fields.Everything(), + GetAttrs: func(obj runtime.Object) (label labels.Set, field fields.Set, err error) { + metadata, err := meta.Accessor(obj) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + return labels.Set(metadata.GetLabels()), nil, nil + }, + } + watcher, err := cacher.Watch(context.TODO(), "pods/ns/foo", fooCreated.ResourceVersion, pred) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + defer watcher.Stop() + + verifyWatchEvent(t, watcher, watch.Deleted, podFooFiltered) + verifyWatchEvent(t, watcher, watch.Added, podFoo) + verifyWatchEvent(t, watcher, watch.Modified, podFooPrime) + verifyWatchEvent(t, watcher, watch.Deleted, podFooPrime) +} + +func TestStartingResourceVersion(t *testing.T) { + server, etcdStorage := newEtcdTestStorage(t, etcdtest.PathPrefix()) + defer server.Terminate(t) + cacher := newTestCacher(etcdStorage, 10) + defer cacher.Stop() + + // add 1 object + podFoo := makeTestPod("foo") + fooCreated := updatePod(t, etcdStorage, podFoo, nil) + + // Set up Watch starting at fooCreated.ResourceVersion + 10 + rv, err := storage.ParseWatchResourceVersion(fooCreated.ResourceVersion) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + rv += 10 + startVersion := strconv.Itoa(int(rv)) + + watcher, err := cacher.Watch(context.TODO(), "pods/ns/foo", startVersion, storage.Everything) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + defer watcher.Stop() + + lastFoo := fooCreated + for i := 0; i < 11; i++ { + podFooForUpdate := makeTestPod("foo") + podFooForUpdate.Labels = map[string]string{"foo": strconv.Itoa(i)} + lastFoo = updatePod(t, etcdStorage, podFooForUpdate, lastFoo) + } + + select { + case e := <-watcher.ResultChan(): + pod := e.Object.(*example.Pod) + podRV, err := storage.ParseWatchResourceVersion(pod.ResourceVersion) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // event should have at least rv + 1, since we're starting the watch at rv + if podRV <= rv { + t.Errorf("expected event with resourceVersion of at least %d, got %d", rv+1, podRV) + } + case <-time.After(wait.ForeverTestTimeout): + t.Errorf("timed out waiting for event") + } +} + +func TestRandomWatchDeliver(t *testing.T) { + server, etcdStorage := newEtcdTestStorage(t, etcdtest.PathPrefix()) + defer server.Terminate(t) + cacher := newTestCacher(etcdStorage, 10) + defer cacher.Stop() + + fooCreated := updatePod(t, etcdStorage, makeTestPod("foo"), nil) + rv, err := storage.ParseWatchResourceVersion(fooCreated.ResourceVersion) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + startVersion := strconv.Itoa(int(rv)) + + watcher, err := cacher.WatchList(context.TODO(), "pods/ns", startVersion, storage.Everything) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + // Now we can create exactly 21 events that should be delivered + // to the watcher, before it will completely block cacher and as + // a result will be dropped. + for i := 0; i < 21; i++ { + updatePod(t, etcdStorage, makeTestPod(fmt.Sprintf("foo-%d", i)), nil) + } + + // Now stop the watcher and check if the consecutive events are being delivered. + watcher.Stop() + + watched := 0 + for { + event, ok := <-watcher.ResultChan() + if !ok { + break + } + if a, e := event.Object.(*example.Pod).Name, fmt.Sprintf("foo-%d", watched); e != a { + t.Errorf("Unexpected object watched: %s, expected %s", a, e) + } + watched++ + } +} diff --git a/pkg/storage/tests/utils.go b/pkg/storage/tests/utils.go new file mode 100644 index 000000000..f1a5d95b8 --- /dev/null +++ b/pkg/storage/tests/utils.go @@ -0,0 +1,32 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package tests + +import ( + "k8s.io/apiserver/pkg/apis/example" + + _ "k8s.io/client-go/pkg/api/install" +) + +func DeepEqualSafePodSpec() example.PodSpec { + grace := int64(30) + return example.PodSpec{ + RestartPolicy: "Always", + TerminationGracePeriodSeconds: &grace, + SchedulerName: "default-scheduler", + } +}