From ebcf785f2fd81f52f06a0ea83baa4be698605876 Mon Sep 17 00:00:00 2001 From: Alessandro Boch Date: Sun, 8 May 2016 00:31:30 -0700 Subject: [PATCH 1/3] Update libnetwork dependencies for b66c038 Signed-off-by: Alessandro Boch --- hack/vendor.sh | 11 +- .../github.com/Sirupsen/logrus/CHANGELOG.md | 7 + .../src/github.com/Sirupsen/logrus/README.md | 30 +- .../src/github.com/Sirupsen/logrus/entry.go | 2 +- .../src/github.com/Sirupsen/logrus/logrus.go | 47 +- .../src/github.com/armon/go-radix/.gitignore | 22 + .../src/github.com/armon/go-radix/.travis.yml | 3 + vendor/src/github.com/armon/go-radix/LICENSE | 20 + .../src/github.com/armon/go-radix/README.md | 36 ++ vendor/src/github.com/armon/go-radix/radix.go | 467 ++++++++++++++++++ .../github.com/docker/go-events/.gitignore | 24 + .../src/github.com/docker/go-events/LICENSE | 201 ++++++++ .../src/github.com/docker/go-events/README.md | 112 +++++ .../github.com/docker/go-events/broadcast.go | 158 ++++++ .../github.com/docker/go-events/channel.go | 47 ++ .../src/github.com/docker/go-events/errors.go | 10 + .../src/github.com/docker/go-events/event.go | 15 + .../src/github.com/docker/go-events/filter.go | 52 ++ .../src/github.com/docker/go-events/queue.go | 104 ++++ .../src/github.com/docker/go-events/retry.go | 168 +++++++ .../src/github.com/docker/libkv/.travis.yml | 13 +- .../src/github.com/docker/libkv/LICENSE.code | 2 +- .../src/github.com/docker/libkv/MAINTAINERS | 46 ++ vendor/src/github.com/docker/libkv/README.md | 9 +- vendor/src/github.com/docker/libkv/libkv.go | 2 +- .../docker/libkv/store/boltdb/boltdb.go | 18 +- .../docker/libkv/store/consul/consul.go | 115 ++++- .../docker/libkv/store/etcd/etcd.go | 19 +- .../github.com/docker/libkv/store/store.go | 4 + .../docker/libkv/store/zookeeper/zookeeper.go | 16 +- .../hashicorp/go-multierror/LICENSE | 353 +++++++++++++ .../hashicorp/go-multierror/README.md | 91 ++++ .../hashicorp/go-multierror/append.go | 30 ++ .../hashicorp/go-multierror/format.go | 23 + .../hashicorp/go-multierror/multierror.go | 51 ++ .../github.com/hashicorp/memberlist/README.md | 33 +- .../hashicorp/memberlist/alive_delegate.go | 14 + .../github.com/hashicorp/memberlist/config.go | 31 +- .../hashicorp/memberlist/delegate.go | 3 +- .../hashicorp/memberlist/keyring.go | 11 +- .../hashicorp/memberlist/logging.go | 22 + .../hashicorp/memberlist/memberlist.go | 251 +++++++--- .../hashicorp/memberlist/merge_delegate.go | 5 +- .../github.com/hashicorp/memberlist/net.go | 451 ++++++++++++----- .../hashicorp/memberlist/ping_delegate.go | 14 + .../github.com/hashicorp/memberlist/state.go | 232 ++++++--- .../github.com/hashicorp/memberlist/util.go | 83 +++- .../hashicorp/serf/coordinate/client.go | 180 +++++++ .../hashicorp/serf/coordinate/config.go | 70 +++ .../hashicorp/serf/coordinate/coordinate.go | 183 +++++++ .../hashicorp/serf/coordinate/phantom.go | 187 +++++++ .../github.com/hashicorp/serf/serf/config.go | 17 + .../hashicorp/serf/serf/delegate.go | 7 + .../github.com/hashicorp/serf/serf/event.go | 4 +- .../hashicorp/serf/serf/merge_delegate.go | 39 +- .../hashicorp/serf/serf/ping_delegate.go | 89 ++++ .../github.com/hashicorp/serf/serf/serf.go | 112 ++++- .../hashicorp/serf/serf/snapshot.go | 62 ++- .../website/{LICENSE.md => source/LICENSE} | 0 59 files changed, 4049 insertions(+), 379 deletions(-) create mode 100644 vendor/src/github.com/armon/go-radix/.gitignore create mode 100644 vendor/src/github.com/armon/go-radix/.travis.yml create mode 100644 vendor/src/github.com/armon/go-radix/LICENSE create mode 100644 vendor/src/github.com/armon/go-radix/README.md create mode 100644 vendor/src/github.com/armon/go-radix/radix.go create mode 100644 vendor/src/github.com/docker/go-events/.gitignore create mode 100644 vendor/src/github.com/docker/go-events/LICENSE create mode 100644 vendor/src/github.com/docker/go-events/README.md create mode 100644 vendor/src/github.com/docker/go-events/broadcast.go create mode 100644 vendor/src/github.com/docker/go-events/channel.go create mode 100644 vendor/src/github.com/docker/go-events/errors.go create mode 100644 vendor/src/github.com/docker/go-events/event.go create mode 100644 vendor/src/github.com/docker/go-events/filter.go create mode 100644 vendor/src/github.com/docker/go-events/queue.go create mode 100644 vendor/src/github.com/docker/go-events/retry.go create mode 100644 vendor/src/github.com/docker/libkv/MAINTAINERS create mode 100644 vendor/src/github.com/hashicorp/go-multierror/LICENSE create mode 100644 vendor/src/github.com/hashicorp/go-multierror/README.md create mode 100644 vendor/src/github.com/hashicorp/go-multierror/append.go create mode 100644 vendor/src/github.com/hashicorp/go-multierror/format.go create mode 100644 vendor/src/github.com/hashicorp/go-multierror/multierror.go create mode 100644 vendor/src/github.com/hashicorp/memberlist/alive_delegate.go create mode 100644 vendor/src/github.com/hashicorp/memberlist/logging.go create mode 100644 vendor/src/github.com/hashicorp/memberlist/ping_delegate.go create mode 100644 vendor/src/github.com/hashicorp/serf/coordinate/client.go create mode 100644 vendor/src/github.com/hashicorp/serf/coordinate/config.go create mode 100644 vendor/src/github.com/hashicorp/serf/coordinate/coordinate.go create mode 100644 vendor/src/github.com/hashicorp/serf/coordinate/phantom.go create mode 100644 vendor/src/github.com/hashicorp/serf/serf/ping_delegate.go rename vendor/src/github.com/hashicorp/serf/website/{LICENSE.md => source/LICENSE} (100%) diff --git a/hack/vendor.sh b/hack/vendor.sh index 0fe174c105..3cf528c2f7 100755 --- a/hack/vendor.sh +++ b/hack/vendor.sh @@ -9,7 +9,7 @@ source 'hack/.vendor-helpers.sh' clone git github.com/Azure/go-ansiterm 388960b655244e76e24c75f48631564eaefade62 clone git github.com/Microsoft/hcsshim v0.2.2 clone git github.com/Microsoft/go-winio v0.3.4 -clone git github.com/Sirupsen/logrus v0.9.0 # logrus is a common dependency among multiple deps +clone git github.com/Sirupsen/logrus v0.10.0 # logrus is a common dependency among multiple deps clone git github.com/docker/libtrust 9cbd2a1374f46905c68a4eb3694a130610adc62a clone git github.com/go-check/check 03a4d9dcf2f92eae8e90ed42aa2656f63fdd0b14 https://github.com/cpuguy83/check.git clone git github.com/gorilla/context 14f550f51a @@ -30,11 +30,14 @@ clone git github.com/imdario/mergo 0.2.1 #get libnetwork packages clone git github.com/docker/libnetwork v0.8.0-dev.1 +clone git github.com/docker/go-events 2e7d352816128aa84f4d29b2a21d400133701a0d +clone git github.com/armon/go-radix e39d623f12e8e41c7b5529e9a9dd67a1e2261f80 clone git github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec clone git github.com/hashicorp/go-msgpack 71c2886f5a673a35f909803f38ece5810165097b -clone git github.com/hashicorp/memberlist 9a1e242e454d2443df330bdd51a436d5a9058fc4 -clone git github.com/hashicorp/serf 7151adcef72687bf95f451a2e0ba15cb19412bf2 -clone git github.com/docker/libkv c2aac5dbbaa5c872211edea7c0f32b3bd67e7410 +clone git github.com/hashicorp/memberlist 88ac4de0d1a0ca6def284b571342db3b777a4c37 +clone git github.com/hashicorp/go-multierror fcdddc395df1ddf4247c69bd436e84cfa0733f7e +clone git github.com/hashicorp/serf 598c54895cc5a7b1a24a398d635e8c0ea0959870 +clone git github.com/docker/libkv 7283ef27ed32fe267388510a91709b307bb9942c clone git github.com/vishvananda/netns 604eaf189ee867d8c147fafc28def2394e878d25 clone git github.com/vishvananda/netlink 631962935bff4f3d20ff32a72e8944f6d2836a26 clone git github.com/BurntSushi/toml f706d00e3de6abe700c994cdd545a1a4915af060 diff --git a/vendor/src/github.com/Sirupsen/logrus/CHANGELOG.md b/vendor/src/github.com/Sirupsen/logrus/CHANGELOG.md index 9e9e6009a0..f2c2bc2111 100644 --- a/vendor/src/github.com/Sirupsen/logrus/CHANGELOG.md +++ b/vendor/src/github.com/Sirupsen/logrus/CHANGELOG.md @@ -1,3 +1,10 @@ +# 0.10.0 + +* feature: Add a test hook (#180) +* feature: `ParseLevel` is now case-insensitive (#326) +* feature: `FieldLogger` interface that generalizes `Logger` and `Entry` (#308) +* performance: avoid re-allocations on `WithFields` (#335) + # 0.9.0 * logrus/text_formatter: don't emit empty msg diff --git a/vendor/src/github.com/Sirupsen/logrus/README.md b/vendor/src/github.com/Sirupsen/logrus/README.md index f8720c9a74..6e1721a743 100644 --- a/vendor/src/github.com/Sirupsen/logrus/README.md +++ b/vendor/src/github.com/Sirupsen/logrus/README.md @@ -1,4 +1,4 @@ -# Logrus :walrus: [![Build Status](https://travis-ci.org/Sirupsen/logrus.svg?branch=master)](https://travis-ci.org/Sirupsen/logrus) [![godoc reference](https://godoc.org/github.com/Sirupsen/logrus?status.png)][godoc] +# Logrus :walrus: [![Build Status](https://travis-ci.org/Sirupsen/logrus.svg?branch=master)](https://travis-ci.org/Sirupsen/logrus) [![GoDoc](https://godoc.org/github.com/Sirupsen/logrus?status.svg)](https://godoc.org/github.com/Sirupsen/logrus) Logrus is a structured logger for Go (golang), completely API compatible with the standard library logger. [Godoc][godoc]. **Please note the Logrus API is not @@ -12,7 +12,7 @@ plain text): ![Colored](http://i.imgur.com/PY7qMwd.png) -With `log.Formatter = new(logrus.JSONFormatter)`, for easy parsing by logstash +With `log.SetFormatter(&log.JSONFormatter{})`, for easy parsing by logstash or Splunk: ```json @@ -32,7 +32,7 @@ ocean","size":10,"time":"2014-03-10 19:57:38.562264131 -0400 EDT"} "time":"2014-03-10 19:57:38.562543128 -0400 EDT"} ``` -With the default `log.Formatter = new(&log.TextFormatter{})` when a TTY is not +With the default `log.SetFormatter(&log.TextFormatter{})` when a TTY is not attached, the output is compatible with the [logfmt](http://godoc.org/github.com/kr/logfmt) format: @@ -222,6 +222,11 @@ Note: Syslog hook also support connecting to local syslog (Ex. "/dev/log" or "/v | [Octokit](https://github.com/dorajistyle/logrus-octokit-hook) | Hook for logging to github via octokit | | [DeferPanic](https://github.com/deferpanic/dp-logrus) | Hook for logging to DeferPanic | | [Redis-Hook](https://github.com/rogierlommers/logrus-redis-hook) | Hook for logging to a ELK stack (through Redis) | +| [Amqp-Hook](https://github.com/vladoatanasov/logrus_amqp) | Hook for logging to Amqp broker (Like RabbitMQ) | +| [KafkaLogrus](https://github.com/goibibo/KafkaLogrus) | Hook for logging to kafka | +| [Typetalk](https://github.com/dragon3/logrus-typetalk-hook) | Hook for logging to [Typetalk](https://www.typetalk.in/) | +| [ElasticSearch](https://github.com/sohlich/elogrus) | Hook for logging to ElasticSearch| + #### Level logging @@ -363,4 +368,21 @@ entries. It should not be a feature of the application-level logger. | ---- | ----------- | |[Logrus Mate](https://github.com/gogap/logrus_mate)|Logrus mate is a tool for Logrus to manage loggers, you can initial logger's level, hook and formatter by config file, the logger will generated with different config at different environment.| -[godoc]: https://godoc.org/github.com/Sirupsen/logrus +#### Testing + +Logrus has a built in facility for asserting the presence of log messages. This is implemented through the `test` hook and provides: + +* decorators for existing logger (`test.NewLocal` and `test.NewGlobal`) which basically just add the `test` hook +* a test logger (`test.NewNullLogger`) that just records log messages (and does not output any): + +```go +logger, hook := NewNullLogger() +logger.Error("Hello error") + +assert.Equal(1, len(hook.Entries)) +assert.Equal(logrus.ErrorLevel, hook.LastEntry().Level) +assert.Equal("Hello error", hook.LastEntry().Message) + +hook.Reset() +assert.Nil(hook.LastEntry()) +``` diff --git a/vendor/src/github.com/Sirupsen/logrus/entry.go b/vendor/src/github.com/Sirupsen/logrus/entry.go index 9ae900bc5e..89e966e7bf 100644 --- a/vendor/src/github.com/Sirupsen/logrus/entry.go +++ b/vendor/src/github.com/Sirupsen/logrus/entry.go @@ -68,7 +68,7 @@ func (entry *Entry) WithField(key string, value interface{}) *Entry { // Add a map of fields to the Entry. func (entry *Entry) WithFields(fields Fields) *Entry { - data := Fields{} + data := make(Fields, len(entry.Data)+len(fields)) for k, v := range entry.Data { data[k] = v } diff --git a/vendor/src/github.com/Sirupsen/logrus/logrus.go b/vendor/src/github.com/Sirupsen/logrus/logrus.go index 0c09fbc264..e596691116 100644 --- a/vendor/src/github.com/Sirupsen/logrus/logrus.go +++ b/vendor/src/github.com/Sirupsen/logrus/logrus.go @@ -3,6 +3,7 @@ package logrus import ( "fmt" "log" + "strings" ) // Fields type, used to pass to `WithFields`. @@ -33,7 +34,7 @@ func (level Level) String() string { // ParseLevel takes a string level and returns the Logrus log level constant. func ParseLevel(lvl string) (Level, error) { - switch lvl { + switch strings.ToLower(lvl) { case "panic": return PanicLevel, nil case "fatal": @@ -52,6 +53,16 @@ func ParseLevel(lvl string) (Level, error) { return l, fmt.Errorf("not a valid logrus Level: %q", lvl) } +// A constant exposing all logging levels +var AllLevels = []Level{ + PanicLevel, + FatalLevel, + ErrorLevel, + WarnLevel, + InfoLevel, + DebugLevel, +} + // These are the different logging levels. You can set the logging level to log // on your instance of logger, obtained with `logrus.New()`. const ( @@ -96,3 +107,37 @@ type StdLogger interface { Panicf(string, ...interface{}) Panicln(...interface{}) } + +// The FieldLogger interface generalizes the Entry and Logger types +type FieldLogger interface { + WithField(key string, value interface{}) *Entry + WithFields(fields Fields) *Entry + WithError(err error) *Entry + + Debugf(format string, args ...interface{}) + Infof(format string, args ...interface{}) + Printf(format string, args ...interface{}) + Warnf(format string, args ...interface{}) + Warningf(format string, args ...interface{}) + Errorf(format string, args ...interface{}) + Fatalf(format string, args ...interface{}) + Panicf(format string, args ...interface{}) + + Debug(args ...interface{}) + Info(args ...interface{}) + Print(args ...interface{}) + Warn(args ...interface{}) + Warning(args ...interface{}) + Error(args ...interface{}) + Fatal(args ...interface{}) + Panic(args ...interface{}) + + Debugln(args ...interface{}) + Infoln(args ...interface{}) + Println(args ...interface{}) + Warnln(args ...interface{}) + Warningln(args ...interface{}) + Errorln(args ...interface{}) + Fatalln(args ...interface{}) + Panicln(args ...interface{}) +} diff --git a/vendor/src/github.com/armon/go-radix/.gitignore b/vendor/src/github.com/armon/go-radix/.gitignore new file mode 100644 index 0000000000..00268614f0 --- /dev/null +++ b/vendor/src/github.com/armon/go-radix/.gitignore @@ -0,0 +1,22 @@ +# Compiled Object files, Static and Dynamic libs (Shared Objects) +*.o +*.a +*.so + +# Folders +_obj +_test + +# Architecture specific extensions/prefixes +*.[568vq] +[568vq].out + +*.cgo1.go +*.cgo2.c +_cgo_defun.c +_cgo_gotypes.go +_cgo_export.* + +_testmain.go + +*.exe diff --git a/vendor/src/github.com/armon/go-radix/.travis.yml b/vendor/src/github.com/armon/go-radix/.travis.yml new file mode 100644 index 0000000000..1a0bbea6c7 --- /dev/null +++ b/vendor/src/github.com/armon/go-radix/.travis.yml @@ -0,0 +1,3 @@ +language: go +go: + - tip diff --git a/vendor/src/github.com/armon/go-radix/LICENSE b/vendor/src/github.com/armon/go-radix/LICENSE new file mode 100644 index 0000000000..a5df10e675 --- /dev/null +++ b/vendor/src/github.com/armon/go-radix/LICENSE @@ -0,0 +1,20 @@ +The MIT License (MIT) + +Copyright (c) 2014 Armon Dadgar + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/vendor/src/github.com/armon/go-radix/README.md b/vendor/src/github.com/armon/go-radix/README.md new file mode 100644 index 0000000000..c054fe86c0 --- /dev/null +++ b/vendor/src/github.com/armon/go-radix/README.md @@ -0,0 +1,36 @@ +go-radix [![Build Status](https://travis-ci.org/armon/go-radix.png)](https://travis-ci.org/armon/go-radix) +========= + +Provides the `radix` package that implements a [radix tree](http://en.wikipedia.org/wiki/Radix_tree). +The package only provides a single `Tree` implementation, optimized for sparse nodes. + +As a radix tree, it provides the following: + * O(k) operations. In many cases, this can be faster than a hash table since + the hash function is an O(k) operation, and hash tables have very poor cache locality. + * Minimum / Maximum value lookups + * Ordered iteration + +Documentation +============= + +The full documentation is available on [Godoc](http://godoc.org/github.com/armon/go-radix). + +Example +======= + +Below is a simple example of usage + +```go +// Create a tree +r := radix.New() +r.Insert("foo", 1) +r.Insert("bar", 2) +r.Insert("foobar", 2) + +// Find the longest prefix match +m, _, _ := r.LongestPrefix("foozip") +if m != "foo" { + panic("should be foo") +} +``` + diff --git a/vendor/src/github.com/armon/go-radix/radix.go b/vendor/src/github.com/armon/go-radix/radix.go new file mode 100644 index 0000000000..8c963c914a --- /dev/null +++ b/vendor/src/github.com/armon/go-radix/radix.go @@ -0,0 +1,467 @@ +package radix + +import ( + "sort" + "strings" +) + +// WalkFn is used when walking the tree. Takes a +// key and value, returning if iteration should +// be terminated. +type WalkFn func(s string, v interface{}) bool + +// leafNode is used to represent a value +type leafNode struct { + key string + val interface{} +} + +// edge is used to represent an edge node +type edge struct { + label byte + node *node +} + +type node struct { + // leaf is used to store possible leaf + leaf *leafNode + + // prefix is the common prefix we ignore + prefix string + + // Edges should be stored in-order for iteration. + // We avoid a fully materialized slice to save memory, + // since in most cases we expect to be sparse + edges edges +} + +func (n *node) isLeaf() bool { + return n.leaf != nil +} + +func (n *node) addEdge(e edge) { + n.edges = append(n.edges, e) + n.edges.Sort() +} + +func (n *node) replaceEdge(e edge) { + num := len(n.edges) + idx := sort.Search(num, func(i int) bool { + return n.edges[i].label >= e.label + }) + if idx < num && n.edges[idx].label == e.label { + n.edges[idx].node = e.node + return + } + panic("replacing missing edge") +} + +func (n *node) getEdge(label byte) *node { + num := len(n.edges) + idx := sort.Search(num, func(i int) bool { + return n.edges[i].label >= label + }) + if idx < num && n.edges[idx].label == label { + return n.edges[idx].node + } + return nil +} + +type edges []edge + +func (e edges) Len() int { + return len(e) +} + +func (e edges) Less(i, j int) bool { + return e[i].label < e[j].label +} + +func (e edges) Swap(i, j int) { + e[i], e[j] = e[j], e[i] +} + +func (e edges) Sort() { + sort.Sort(e) +} + +// Tree implements a radix tree. This can be treated as a +// Dictionary abstract data type. The main advantage over +// a standard hash map is prefix-based lookups and +// ordered iteration, +type Tree struct { + root *node + size int +} + +// New returns an empty Tree +func New() *Tree { + return NewFromMap(nil) +} + +// NewFromMap returns a new tree containing the keys +// from an existing map +func NewFromMap(m map[string]interface{}) *Tree { + t := &Tree{root: &node{}} + for k, v := range m { + t.Insert(k, v) + } + return t +} + +// Len is used to return the number of elements in the tree +func (t *Tree) Len() int { + return t.size +} + +// longestPrefix finds the length of the shared prefix +// of two strings +func longestPrefix(k1, k2 string) int { + max := len(k1) + if l := len(k2); l < max { + max = l + } + var i int + for i = 0; i < max; i++ { + if k1[i] != k2[i] { + break + } + } + return i +} + +// Insert is used to add a newentry or update +// an existing entry. Returns if updated. +func (t *Tree) Insert(s string, v interface{}) (interface{}, bool) { + var parent *node + n := t.root + search := s + for { + // Handle key exhaution + if len(search) == 0 { + if n.isLeaf() { + old := n.leaf.val + n.leaf.val = v + return old, true + } else { + n.leaf = &leafNode{ + key: s, + val: v, + } + t.size++ + return nil, false + } + } + + // Look for the edge + parent = n + n = n.getEdge(search[0]) + + // No edge, create one + if n == nil { + e := edge{ + label: search[0], + node: &node{ + leaf: &leafNode{ + key: s, + val: v, + }, + prefix: search, + }, + } + parent.addEdge(e) + t.size++ + return nil, false + } + + // Determine longest prefix of the search key on match + commonPrefix := longestPrefix(search, n.prefix) + if commonPrefix == len(n.prefix) { + search = search[commonPrefix:] + continue + } + + // Split the node + t.size++ + child := &node{ + prefix: search[:commonPrefix], + } + parent.replaceEdge(edge{ + label: search[0], + node: child, + }) + + // Restore the existing node + child.addEdge(edge{ + label: n.prefix[commonPrefix], + node: n, + }) + n.prefix = n.prefix[commonPrefix:] + + // Create a new leaf node + leaf := &leafNode{ + key: s, + val: v, + } + + // If the new key is a subset, add to to this node + search = search[commonPrefix:] + if len(search) == 0 { + child.leaf = leaf + return nil, false + } + + // Create a new edge for the node + child.addEdge(edge{ + label: search[0], + node: &node{ + leaf: leaf, + prefix: search, + }, + }) + return nil, false + } + return nil, false +} + +// Delete is used to delete a key, returning the previous +// value and if it was deleted +func (t *Tree) Delete(s string) (interface{}, bool) { + n := t.root + search := s + for { + // Check for key exhaution + if len(search) == 0 { + if !n.isLeaf() { + break + } + goto DELETE + } + + // Look for an edge + n = n.getEdge(search[0]) + if n == nil { + break + } + + // Consume the search prefix + if strings.HasPrefix(search, n.prefix) { + search = search[len(n.prefix):] + } else { + break + } + } + return nil, false + +DELETE: + // Delete the leaf + leaf := n.leaf + n.leaf = nil + t.size-- + + // Check if we should merge this node + if len(n.edges) == 1 { + e := n.edges[0] + child := e.node + n.prefix = n.prefix + child.prefix + n.leaf = child.leaf + n.edges = child.edges + } + return leaf.val, true +} + +// Get is used to lookup a specific key, returning +// the value and if it was found +func (t *Tree) Get(s string) (interface{}, bool) { + n := t.root + search := s + for { + // Check for key exhaution + if len(search) == 0 { + if n.isLeaf() { + return n.leaf.val, true + } + break + } + + // Look for an edge + n = n.getEdge(search[0]) + if n == nil { + break + } + + // Consume the search prefix + if strings.HasPrefix(search, n.prefix) { + search = search[len(n.prefix):] + } else { + break + } + } + return nil, false +} + +// LongestPrefix is like Get, but instead of an +// exact match, it will return the longest prefix match. +func (t *Tree) LongestPrefix(s string) (string, interface{}, bool) { + var last *leafNode + n := t.root + search := s + for { + // Look for a leaf node + if n.isLeaf() { + last = n.leaf + } + + // Check for key exhaution + if len(search) == 0 { + break + } + + // Look for an edge + n = n.getEdge(search[0]) + if n == nil { + break + } + + // Consume the search prefix + if strings.HasPrefix(search, n.prefix) { + search = search[len(n.prefix):] + } else { + break + } + } + if last != nil { + return last.key, last.val, true + } + return "", nil, false +} + +// Minimum is used to return the minimum value in the tree +func (t *Tree) Minimum() (string, interface{}, bool) { + n := t.root + for { + if n.isLeaf() { + return n.leaf.key, n.leaf.val, true + } + if len(n.edges) > 0 { + n = n.edges[0].node + } else { + break + } + } + return "", nil, false +} + +// Maximum is used to return the maximum value in the tree +func (t *Tree) Maximum() (string, interface{}, bool) { + n := t.root + for { + if num := len(n.edges); num > 0 { + n = n.edges[num-1].node + continue + } + if n.isLeaf() { + return n.leaf.key, n.leaf.val, true + } else { + break + } + } + return "", nil, false +} + +// Walk is used to walk the tree +func (t *Tree) Walk(fn WalkFn) { + recursiveWalk(t.root, fn) +} + +// WalkPrefix is used to walk the tree under a prefix +func (t *Tree) WalkPrefix(prefix string, fn WalkFn) { + n := t.root + search := prefix + for { + // Check for key exhaution + if len(search) == 0 { + recursiveWalk(n, fn) + return + } + + // Look for an edge + n = n.getEdge(search[0]) + if n == nil { + break + } + + // Consume the search prefix + if strings.HasPrefix(search, n.prefix) { + search = search[len(n.prefix):] + + } else if strings.HasPrefix(n.prefix, search) { + // Child may be under our search prefix + recursiveWalk(n, fn) + return + } else { + break + } + } + +} + +// WalkPath is used to walk the tree, but only visiting nodes +// from the root down to a given leaf. Where WalkPrefix walks +// all the entries *under* the given prefix, this walks the +// entries *above* the given prefix. +func (t *Tree) WalkPath(path string, fn WalkFn) { + n := t.root + search := path + for { + // Visit the leaf values if any + if n.leaf != nil && fn(n.leaf.key, n.leaf.val) { + return + } + + // Check for key exhaution + if len(search) == 0 { + return + } + + // Look for an edge + n = n.getEdge(search[0]) + if n == nil { + return + } + + // Consume the search prefix + if strings.HasPrefix(search, n.prefix) { + search = search[len(n.prefix):] + } else { + break + } + } +} + +// recursiveWalk is used to do a pre-order walk of a node +// recursively. Returns true if the walk should be aborted +func recursiveWalk(n *node, fn WalkFn) bool { + // Visit the leaf values if any + if n.leaf != nil && fn(n.leaf.key, n.leaf.val) { + return true + } + + // Recurse on the children + for _, e := range n.edges { + if recursiveWalk(e.node, fn) { + return true + } + } + return false +} + +// ToMap is used to walk the tree and convert it into a map +func (t *Tree) ToMap() map[string]interface{} { + out := make(map[string]interface{}, t.size) + t.Walk(func(k string, v interface{}) bool { + out[k] = v + return false + }) + return out +} diff --git a/vendor/src/github.com/docker/go-events/.gitignore b/vendor/src/github.com/docker/go-events/.gitignore new file mode 100644 index 0000000000..daf913b1b3 --- /dev/null +++ b/vendor/src/github.com/docker/go-events/.gitignore @@ -0,0 +1,24 @@ +# Compiled Object files, Static and Dynamic libs (Shared Objects) +*.o +*.a +*.so + +# Folders +_obj +_test + +# Architecture specific extensions/prefixes +*.[568vq] +[568vq].out + +*.cgo1.go +*.cgo2.c +_cgo_defun.c +_cgo_gotypes.go +_cgo_export.* + +_testmain.go + +*.exe +*.test +*.prof diff --git a/vendor/src/github.com/docker/go-events/LICENSE b/vendor/src/github.com/docker/go-events/LICENSE new file mode 100644 index 0000000000..8dada3edaf --- /dev/null +++ b/vendor/src/github.com/docker/go-events/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/src/github.com/docker/go-events/README.md b/vendor/src/github.com/docker/go-events/README.md new file mode 100644 index 0000000000..9e3625da9e --- /dev/null +++ b/vendor/src/github.com/docker/go-events/README.md @@ -0,0 +1,112 @@ +# Docker Events Package + +[![GoDoc](https://godoc.org/github.com/docker/go-events?status.svg)](https://godoc.org/github.com/docker/go-events) +[![Circle CI](https://circleci.com/gh/docker/go-events.svg?style=shield)](https://circleci.com/gh/docker/go-events) + +The Docker `events` package implements a composable event distribution package +for Go. + +Originally created to implement the [notifications in Docker Registry +2](https://github.com/docker/distribution/blob/master/docs/notifications.md), +we've found the pattern to be useful in other applications. This package is +most of the same code with slightly updated interfaces. Much of the internals +have been made available. + +## Usage + +The `events` package centers around a `Sink` type. Events are written with +calls to `Sink.Write(event Event)`. Sinks can be wired up in various +configurations to achieve interesting behavior. + +The canonical example is that employed by the +[docker/distribution/notifications](https://godoc.org/github.com/docker/distribution/notifications) +package. Let's say we have a type `httpSink` where we'd like to queue +notifications. As a rule, it should send a single http request and return an +error if it fails: + +```go +func (h *httpSink) Write(event Event) error { + p, err := json.Marshal(event) + if err != nil { + return err + } + body := bytes.NewReader(p) + resp, err := h.client.Post(h.url, "application/json", body) + if err != nil { + return err + } + defer resp.Body.Close() + + if resp.Status != 200 { + return errors.New("unexpected status") + } + + return nil +} + +// implement (*httpSink).Close() +``` + +With just that, we can start using components from this package. One can call +`(*httpSink).Write` to send events as the body of a post request to a +configured URL. + +### Retries + +HTTP can be unreliable. The first feature we'd like is to have some retry: + +```go +hs := newHTTPSink(/*...*/) +retry := NewRetryingSink(hs, NewBreaker(5, time.Second)) +``` + +We now have a sink that will retry events against the `httpSink` until they +succeed. The retry will backoff for one second after 5 consecutive failures +using the breaker strategy. + +### Queues + +This isn't quite enough. We we want a sink that doesn't block while we are +waiting for events to be sent. Let's add a `Queue`: + +```go +queue := NewQueue(retry) +``` + +Now, we have an unbounded queue that will work through all events sent with +`(*Queue).Write`. Events can be added asynchronously to the queue without +blocking the current execution path. This is ideal for use in an http request. + +### Broadcast + +It usually turns out that you want to send to more than one listener. We can +use `Broadcaster` to support this: + +```go +var broadcast = NewBroadcaster() // make it available somewhere in your application. +broadcast.Add(queue) // add your queue! +broadcast.Add(queue2) // and another! +``` + +With the above, we can now call `broadcast.Write` in our http handlers and have +all the events distributed to each queue. Because the events are queued, not +listener blocks another. + +### Extending + +For the most part, the above is sufficient for a lot of applications. However, +extending the above functionality can be done implementing your own `Sink`. The +behavior and semantics of the sink can be completely dependent on the +application requirements. The interface is provided below for reference: + +```go +type Sink { + Write(Event) error + Close() error +} +``` + +Application behavior can be controlled by how `Write` behaves. The examples +above are designed to queue the message and return as quickly as possible. +Other implementations may block until the event is committed to durable +storage. diff --git a/vendor/src/github.com/docker/go-events/broadcast.go b/vendor/src/github.com/docker/go-events/broadcast.go new file mode 100644 index 0000000000..e73d758bcf --- /dev/null +++ b/vendor/src/github.com/docker/go-events/broadcast.go @@ -0,0 +1,158 @@ +package events + +import "github.com/Sirupsen/logrus" + +// Broadcaster sends events to multiple, reliable Sinks. The goal of this +// component is to dispatch events to configured endpoints. Reliability can be +// provided by wrapping incoming sinks. +type Broadcaster struct { + sinks []Sink + events chan Event + adds chan configureRequest + removes chan configureRequest + closed chan chan struct{} +} + +// NewBroadcaster appends one or more sinks to the list of sinks. The +// broadcaster behavior will be affected by the properties of the sink. +// Generally, the sink should accept all messages and deal with reliability on +// its own. Use of EventQueue and RetryingSink should be used here. +func NewBroadcaster(sinks ...Sink) *Broadcaster { + b := Broadcaster{ + sinks: sinks, + events: make(chan Event), + adds: make(chan configureRequest), + removes: make(chan configureRequest), + closed: make(chan chan struct{}), + } + + // Start the broadcaster + go b.run() + + return &b +} + +// Write accepts an event to be dispatched to all sinks. This method will never +// fail and should never block (hopefully!). The caller cedes the memory to the +// broadcaster and should not modify it after calling write. +func (b *Broadcaster) Write(event Event) error { + select { + case b.events <- event: + case <-b.closed: + return ErrSinkClosed + } + return nil +} + +// Add the sink to the broadcaster. +// +// The provided sink must be comparable with equality. Typically, this just +// works with a regular pointer type. +func (b *Broadcaster) Add(sink Sink) error { + return b.configure(b.adds, sink) +} + +// Remove the provided sink. +func (b *Broadcaster) Remove(sink Sink) error { + return b.configure(b.removes, sink) +} + +type configureRequest struct { + sink Sink + response chan error +} + +func (b *Broadcaster) configure(ch chan configureRequest, sink Sink) error { + response := make(chan error, 1) + + for { + select { + case ch <- configureRequest{ + sink: sink, + response: response}: + ch = nil + case err := <-response: + return err + case <-b.closed: + return ErrSinkClosed + } + } +} + +// Close the broadcaster, ensuring that all messages are flushed to the +// underlying sink before returning. +func (b *Broadcaster) Close() error { + select { + case <-b.closed: + // already closed + return ErrSinkClosed + default: + // do a little chan handoff dance to synchronize closing + closed := make(chan struct{}) + b.closed <- closed + close(b.closed) + <-closed + return nil + } +} + +// run is the main broadcast loop, started when the broadcaster is created. +// Under normal conditions, it waits for events on the event channel. After +// Close is called, this goroutine will exit. +func (b *Broadcaster) run() { + remove := func(target Sink) { + for i, sink := range b.sinks { + if sink == target { + b.sinks = append(b.sinks[:i], b.sinks[i+1:]...) + break + } + } + } + + for { + select { + case event := <-b.events: + for _, sink := range b.sinks { + if err := sink.Write(event); err != nil { + if err == ErrSinkClosed { + // remove closed sinks + remove(sink) + continue + } + logrus.WithField("event", event).WithField("events.sink", sink).WithError(err). + Errorf("broadcaster: dropping event") + } + } + case request := <-b.adds: + // while we have to iterate for add/remove, common iteration for + // send is faster against slice. + + var found bool + for _, sink := range b.sinks { + if request.sink == sink { + found = true + break + } + } + + if !found { + b.sinks = append(b.sinks, request.sink) + } + // b.sinks[request.sink] = struct{}{} + request.response <- nil + case request := <-b.removes: + remove(request.sink) + request.response <- nil + case closing := <-b.closed: + // close all the underlying sinks + for _, sink := range b.sinks { + if err := sink.Close(); err != nil && err != ErrSinkClosed { + logrus.WithField("events.sink", sink).WithError(err). + Errorf("broadcaster: closing sink failed") + } + } + closing <- struct{}{} + return + } + } +} diff --git a/vendor/src/github.com/docker/go-events/channel.go b/vendor/src/github.com/docker/go-events/channel.go new file mode 100644 index 0000000000..7ee7ea5e22 --- /dev/null +++ b/vendor/src/github.com/docker/go-events/channel.go @@ -0,0 +1,47 @@ +package events + +// Channel provides a sink that can be listened on. The writer and channel +// listener must operate in separate goroutines. +// +// Consumers should listen on Channel.C until Closed is closed. +type Channel struct { + C chan Event + + closed chan struct{} +} + +// NewChannel returns a channel. If buffer is non-zero, the channel is +// unbuffered. +func NewChannel(buffer int) *Channel { + return &Channel{ + C: make(chan Event, buffer), + closed: make(chan struct{}), + } +} + +// Done returns a channel that will always proceed once the sink is closed. +func (ch *Channel) Done() chan struct{} { + return ch.closed +} + +// Write the event to the channel. Must be called in a separate goroutine from +// the listener. +func (ch *Channel) Write(event Event) error { + select { + case ch.C <- event: + return nil + case <-ch.closed: + return ErrSinkClosed + } +} + +// Close the channel sink. +func (ch *Channel) Close() error { + select { + case <-ch.closed: + return ErrSinkClosed + default: + close(ch.closed) + return nil + } +} diff --git a/vendor/src/github.com/docker/go-events/errors.go b/vendor/src/github.com/docker/go-events/errors.go new file mode 100644 index 0000000000..56db7c2510 --- /dev/null +++ b/vendor/src/github.com/docker/go-events/errors.go @@ -0,0 +1,10 @@ +package events + +import "fmt" + +var ( + // ErrSinkClosed is returned if a write is issued to a sink that has been + // closed. If encountered, the error should be considered terminal and + // retries will not be successful. + ErrSinkClosed = fmt.Errorf("events: sink closed") +) diff --git a/vendor/src/github.com/docker/go-events/event.go b/vendor/src/github.com/docker/go-events/event.go new file mode 100644 index 0000000000..f0f1d9ea5f --- /dev/null +++ b/vendor/src/github.com/docker/go-events/event.go @@ -0,0 +1,15 @@ +package events + +// Event marks items that can be sent as events. +type Event interface{} + +// Sink accepts and sends events. +type Sink interface { + // Write an event to the Sink. If no error is returned, the caller will + // assume that all events have been committed to the sink. If an error is + // received, the caller may retry sending the event. + Write(event Event) error + + // Close the sink, possibly waiting for pending events to flush. + Close() error +} diff --git a/vendor/src/github.com/docker/go-events/filter.go b/vendor/src/github.com/docker/go-events/filter.go new file mode 100644 index 0000000000..f2765cfe6b --- /dev/null +++ b/vendor/src/github.com/docker/go-events/filter.go @@ -0,0 +1,52 @@ +package events + +// Matcher matches events. +type Matcher interface { + Match(event Event) bool +} + +// MatcherFunc implements matcher with just a function. +type MatcherFunc func(event Event) bool + +// Match calls the wrapped function. +func (fn MatcherFunc) Match(event Event) bool { + return fn(event) +} + +// Filter provides an event sink that sends only events that are accepted by a +// Matcher. No methods on filter are goroutine safe. +type Filter struct { + dst Sink + matcher Matcher + closed bool +} + +// NewFilter returns a new filter that will send to events to dst that return +// true for Matcher. +func NewFilter(dst Sink, matcher Matcher) Sink { + return &Filter{dst: dst, matcher: matcher} +} + +// Write an event to the filter. +func (f *Filter) Write(event Event) error { + if f.closed { + return ErrSinkClosed + } + + if f.matcher.Match(event) { + return f.dst.Write(event) + } + + return nil +} + +// Close the filter and allow no more events to pass through. +func (f *Filter) Close() error { + // TODO(stevvooe): Not all sinks should have Close. + if f.closed { + return ErrSinkClosed + } + + f.closed = true + return f.dst.Close() +} diff --git a/vendor/src/github.com/docker/go-events/queue.go b/vendor/src/github.com/docker/go-events/queue.go new file mode 100644 index 0000000000..7c5fc8150e --- /dev/null +++ b/vendor/src/github.com/docker/go-events/queue.go @@ -0,0 +1,104 @@ +package events + +import ( + "container/list" + "sync" + + "github.com/Sirupsen/logrus" +) + +// Queue accepts all messages into a queue for asynchronous consumption +// by a sink. It is unbounded and thread safe but the sink must be reliable or +// events will be dropped. +type Queue struct { + dst Sink + events *list.List + cond *sync.Cond + mu sync.Mutex + closed bool +} + +// NewQueue returns a queue to the provided Sink dst. +func NewQueue(dst Sink) *Queue { + eq := Queue{ + dst: dst, + events: list.New(), + } + + eq.cond = sync.NewCond(&eq.mu) + go eq.run() + return &eq +} + +// Write accepts the events into the queue, only failing if the queue has +// beend closed. +func (eq *Queue) Write(event Event) error { + eq.mu.Lock() + defer eq.mu.Unlock() + + if eq.closed { + return ErrSinkClosed + } + + eq.events.PushBack(event) + eq.cond.Signal() // signal waiters + + return nil +} + +// Close shutsdown the event queue, flushing +func (eq *Queue) Close() error { + eq.mu.Lock() + defer eq.mu.Unlock() + + if eq.closed { + return ErrSinkClosed + } + + // set closed flag + eq.closed = true + eq.cond.Signal() // signal flushes queue + eq.cond.Wait() // wait for signal from last flush + return eq.dst.Close() +} + +// run is the main goroutine to flush events to the target sink. +func (eq *Queue) run() { + for { + event := eq.next() + + if event == nil { + return // nil block means event queue is closed. + } + + if err := eq.dst.Write(event); err != nil { + logrus.WithFields(logrus.Fields{ + "event": event, + "sink": eq.dst, + }).WithError(err).Warnf("eventqueue: dropped event") + } + } +} + +// next encompasses the critical section of the run loop. When the queue is +// empty, it will block on the condition. If new data arrives, it will wake +// and return a block. When closed, a nil slice will be returned. +func (eq *Queue) next() Event { + eq.mu.Lock() + defer eq.mu.Unlock() + + for eq.events.Len() < 1 { + if eq.closed { + eq.cond.Broadcast() + return nil + } + + eq.cond.Wait() + } + + front := eq.events.Front() + block := front.Value.(Event) + eq.events.Remove(front) + + return block +} diff --git a/vendor/src/github.com/docker/go-events/retry.go b/vendor/src/github.com/docker/go-events/retry.go new file mode 100644 index 0000000000..501deeb55f --- /dev/null +++ b/vendor/src/github.com/docker/go-events/retry.go @@ -0,0 +1,168 @@ +package events + +import ( + "sync" + "time" + + "github.com/Sirupsen/logrus" +) + +// RetryingSink retries the write until success or an ErrSinkClosed is +// returned. Underlying sink must have p > 0 of succeeding or the sink will +// block. Retry is configured with a RetryStrategy. Concurrent calls to a +// retrying sink are serialized through the sink, meaning that if one is +// in-flight, another will not proceed. +type RetryingSink struct { + sink Sink + strategy RetryStrategy + closed chan struct{} +} + +// NewRetryingSink returns a sink that will retry writes to a sink, backing +// off on failure. Parameters threshold and backoff adjust the behavior of the +// circuit breaker. +func NewRetryingSink(sink Sink, strategy RetryStrategy) *RetryingSink { + rs := &RetryingSink{ + sink: sink, + strategy: strategy, + closed: make(chan struct{}), + } + + return rs +} + +// Write attempts to flush the events to the downstream sink until it succeeds +// or the sink is closed. +func (rs *RetryingSink) Write(event Event) error { + logger := logrus.WithField("event", event) + var timer *time.Timer + +retry: + select { + case <-rs.closed: + return ErrSinkClosed + default: + } + + if backoff := rs.strategy.Proceed(event); backoff > 0 { + if timer == nil { + timer = time.NewTimer(backoff) + defer timer.Stop() + } else { + timer.Reset(backoff) + } + + select { + case <-timer.C: + goto retry + case <-rs.closed: + return ErrSinkClosed + } + } + + if err := rs.sink.Write(event); err != nil { + if err == ErrSinkClosed { + // terminal! + return err + } + + logger := logger.WithError(err) // shadow!! + + if rs.strategy.Failure(event, err) { + logger.Errorf("retryingsink: dropped event") + return nil + } + + logger.Errorf("retryingsink: error writing event, retrying") + goto retry + } + + rs.strategy.Success(event) + return nil +} + +// Close closes the sink and the underlying sink. +func (rs *RetryingSink) Close() error { + select { + case <-rs.closed: + return ErrSinkClosed + default: + close(rs.closed) + return rs.sink.Close() + } +} + +// RetryStrategy defines a strategy for retrying event sink writes. +// +// All methods should be goroutine safe. +type RetryStrategy interface { + // Proceed is called before every event send. If proceed returns a + // positive, non-zero integer, the retryer will back off by the provided + // duration. + // + // An event is provided, by may be ignored. + Proceed(event Event) time.Duration + + // Failure reports a failure to the strategy. If this method returns true, + // the event should be dropped. + Failure(event Event, err error) bool + + // Success should be called when an event is sent successfully. + Success(event Event) +} + +// TODO(stevvooe): We are using circuit breaker here. May want to provide +// bounded exponential backoff, as well. + +// Breaker implements a circuit breaker retry strategy. +// +// The current implementation never drops events. +type Breaker struct { + threshold int + recent int + last time.Time + backoff time.Duration // time after which we retry after failure. + mu sync.Mutex +} + +var _ RetryStrategy = &Breaker{} + +// NewBreaker returns a breaker that will backoff after the threshold has been +// tripped. A Breaker is thread safe and may be shared by many goroutines. +func NewBreaker(threshold int, backoff time.Duration) *Breaker { + return &Breaker{ + threshold: threshold, + backoff: backoff, + } +} + +// Proceed checks the failures against the threshold. +func (b *Breaker) Proceed(event Event) time.Duration { + b.mu.Lock() + defer b.mu.Unlock() + + if b.recent < b.threshold { + return 0 + } + + return b.last.Add(b.backoff).Sub(time.Now()) +} + +// Success resets the breaker. +func (b *Breaker) Success(event Event) { + b.mu.Lock() + defer b.mu.Unlock() + + b.recent = 0 + b.last = time.Time{} +} + +// Failure records the failure and latest failure time. +func (b *Breaker) Failure(event Event, err error) bool { + b.mu.Lock() + defer b.mu.Unlock() + + b.recent++ + b.last = time.Now().UTC() + return false // never drop events. +} diff --git a/vendor/src/github.com/docker/libkv/.travis.yml b/vendor/src/github.com/docker/libkv/.travis.yml index a9ef7c82bf..f7cecbdf9c 100644 --- a/vendor/src/github.com/docker/libkv/.travis.yml +++ b/vendor/src/github.com/docker/libkv/.travis.yml @@ -1,9 +1,7 @@ language: go go: - - 1.3 -# - 1.4 -# see https://github.com/moovweb/gvm/pull/116 for why Go 1.4 is currently disabled + - 1.5.3 # let us have speedy Docker-based Travis workers sudo: false @@ -11,19 +9,18 @@ sudo: false before_install: # Symlink below is needed for Travis CI to work correctly on personal forks of libkv - ln -s $HOME/gopath/src/github.com/${TRAVIS_REPO_SLUG///libkv/} $HOME/gopath/src/github.com/docker - - go get golang.org/x/tools/cmd/vet - go get golang.org/x/tools/cmd/cover - go get github.com/mattn/goveralls - go get github.com/golang/lint/golint - go get github.com/GeertJohan/fgt before_script: - - script/travis_consul.sh 0.5.2 - - script/travis_etcd.sh 2.2.0 - - script/travis_zk.sh 3.4.6 + - script/travis_consul.sh 0.6.3 + - script/travis_etcd.sh 2.2.5 + - script/travis_zk.sh 3.5.1-alpha script: - - ./consul agent -server -bootstrap-expect 1 -data-dir /tmp/consul -config-file=./config.json 1>/dev/null & + - ./consul agent -server -bootstrap -advertise=127.0.0.1 -data-dir /tmp/consul -config-file=./config.json 1>/dev/null & - ./etcd/etcd --listen-client-urls 'http://0.0.0.0:4001' --advertise-client-urls 'http://127.0.0.1:4001' >/dev/null 2>&1 & - ./zk/bin/zkServer.sh start ./zk/conf/zoo.cfg 1> /dev/null - script/validate-gofmt diff --git a/vendor/src/github.com/docker/libkv/LICENSE.code b/vendor/src/github.com/docker/libkv/LICENSE.code index 9e4bd4dbee..34c4ea7c50 100644 --- a/vendor/src/github.com/docker/libkv/LICENSE.code +++ b/vendor/src/github.com/docker/libkv/LICENSE.code @@ -176,7 +176,7 @@ END OF TERMS AND CONDITIONS - Copyright 2014-2015 Docker, Inc. + Copyright 2014-2016 Docker, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/vendor/src/github.com/docker/libkv/MAINTAINERS b/vendor/src/github.com/docker/libkv/MAINTAINERS new file mode 100644 index 0000000000..4dd59c7e27 --- /dev/null +++ b/vendor/src/github.com/docker/libkv/MAINTAINERS @@ -0,0 +1,46 @@ +# Libkv maintainers file +# +# This file describes who runs the docker/libkv project and how. +# This is a living document - if you see something out of date or missing, speak up! +# +# It is structured to be consumable by both humans and programs. +# To extract its contents programmatically, use any TOML-compliant parser. +# +# This file is compiled into the MAINTAINERS file in docker/opensource. +# +[Org] + [Org."Core maintainers"] + people = [ + "abronan", + "aluzzardi", + "sanimej", + "vieux", + ] + +[people] + +# A reference list of all people associated with the project. +# All other sections should refer to people by their canonical key +# in the people section. + + # ADD YOURSELF HERE IN ALPHABETICAL ORDER + + [people.abronan] + Name = "Alexandre Beslic" + Email = "abronan@docker.com" + GitHub = "abronan" + + [people.aluzzardi] + Name = "Andrea Luzzardi" + Email = "al@docker.com" + GitHub = "aluzzardi" + + [people.sanimej] + Name = "Santhosh Manohar" + Email = "santhosh@docker.com" + GitHub = "sanimej" + + [people.vieux] + Name = "Victor Vieux" + Email = "vieux@docker.com" + GitHub = "vieux" diff --git a/vendor/src/github.com/docker/libkv/README.md b/vendor/src/github.com/docker/libkv/README.md index 02cf3fd0ea..baad81b4f8 100644 --- a/vendor/src/github.com/docker/libkv/README.md +++ b/vendor/src/github.com/docker/libkv/README.md @@ -3,6 +3,7 @@ [![GoDoc](https://godoc.org/github.com/docker/libkv?status.png)](https://godoc.org/github.com/docker/libkv) [![Build Status](https://travis-ci.org/docker/libkv.svg?branch=master)](https://travis-ci.org/docker/libkv) [![Coverage Status](https://coveralls.io/repos/docker/libkv/badge.svg)](https://coveralls.io/r/docker/libkv) +[![Go Report Card](https://goreportcard.com/badge/github.com/docker/libkv)](https://goreportcard.com/report/github.com/docker/libkv) `libkv` provides a `Go` native library to store metadata. @@ -10,7 +11,7 @@ The goal of `libkv` is to abstract common store operations for multiple distribu For example, you can use it to store your metadata or for service discovery to register machines and endpoints inside your cluster. -You can also easily implement a generic *Leader Election* on top of it (see the [swarm/leadership](https://github.com/docker/swarm/tree/master/leadership) package). +You can also easily implement a generic *Leader Election* on top of it (see the [docker/leadership](https://github.com/docker/leadership) repository). As of now, `libkv` offers support for `Consul`, `Etcd`, `Zookeeper` (**Distributed** store) and `BoltDB` (**Local** store). @@ -30,7 +31,7 @@ You can find examples of usage for `libkv` under in `docs/examples.go`. Optional `libkv` supports: - Consul versions >= `0.5.1` because it uses Sessions with `Delete` behavior for the use of `TTLs` (mimics zookeeper's Ephemeral node support), If you don't plan to use `TTLs`: you can use Consul version `0.4.0+`. -- Etcd versions >= `2.0` because it uses the new `coreos/etcd/client`, this might change in the future as the support for `APIv3` comes along and adds mor capabilities. +- Etcd versions >= `2.0` because it uses the new `coreos/etcd/client`, this might change in the future as the support for `APIv3` comes along and adds more capabilities. - Zookeeper versions >= `3.4.5`. Although this might work with previous version but this remains untested as of now. - Boltdb, which shouldn't be subject to any version dependencies. @@ -83,7 +84,7 @@ Please refer to the `docs/compatibility.md` to see what are the special cases fo Other than those special cases, you should expect the same experience for basic operations like `Get`/`Put`, etc. -Calls like `WatchTree` may return different events (or number of events) depending on the backend (for now, `Etcd` and `Consul` will likely return more events than `Zookeeper` that you should triage properly). Although you should be able to use it successfully to watch on events in an interchangeable way (see the **swarm/leadership** or **swarm/discovery** packages in **docker/swarm**). +Calls like `WatchTree` may return different events (or number of events) depending on the backend (for now, `Etcd` and `Consul` will likely return more events than `Zookeeper` that you should triage properly). Although you should be able to use it successfully to watch on events in an interchangeable way (see the **docker/leadership** repository or the **pkg/discovery/kv** package in **docker/docker**). ## TLS @@ -103,4 +104,4 @@ Want to hack on libkv? [Docker's contributions guidelines](https://github.com/do ##Copyright and license -Copyright © 2014-2015 Docker, Inc. All rights reserved, except as follows. Code is released under the Apache 2.0 license. The README.md file, and files in the "docs" folder are licensed under the Creative Commons Attribution 4.0 International License under the terms and conditions set forth in the file "LICENSE.docs". You may obtain a duplicate copy of the same license, titled CC-BY-SA-4.0, at http://creativecommons.org/licenses/by/4.0/. +Copyright © 2014-2016 Docker, Inc. All rights reserved, except as follows. Code is released under the Apache 2.0 license. The README.md file, and files in the "docs" folder are licensed under the Creative Commons Attribution 4.0 International License under the terms and conditions set forth in the file "LICENSE.docs". You may obtain a duplicate copy of the same license, titled CC-BY-SA-4.0, at http://creativecommons.org/licenses/by/4.0/. diff --git a/vendor/src/github.com/docker/libkv/libkv.go b/vendor/src/github.com/docker/libkv/libkv.go index 2192cd14e8..bdb8c7529f 100644 --- a/vendor/src/github.com/docker/libkv/libkv.go +++ b/vendor/src/github.com/docker/libkv/libkv.go @@ -25,7 +25,7 @@ var ( }() ) -// NewStore creates a an instance of store +// NewStore creates an instance of store func NewStore(backend store.Backend, addrs []string, options *store.Config) (store.Store, error) { if init, exists := initializers[backend]; exists { return init(addrs, options) diff --git a/vendor/src/github.com/docker/libkv/store/boltdb/boltdb.go b/vendor/src/github.com/docker/libkv/store/boltdb/boltdb.go index 94d01b4b09..4026e0a20c 100644 --- a/vendor/src/github.com/docker/libkv/store/boltdb/boltdb.go +++ b/vendor/src/github.com/docker/libkv/store/boltdb/boltdb.go @@ -19,8 +19,6 @@ var ( // ErrMultipleEndpointsUnsupported is thrown when multiple endpoints specified for // BoltDB. Endpoint has to be a local file path ErrMultipleEndpointsUnsupported = errors.New("boltdb supports one endpoint and should be a file path") - // ErrBoltBucketNotFound is thrown when specified BoltBD bucket doesn't exist in the DB - ErrBoltBucketNotFound = errors.New("boltdb bucket doesn't exist") // ErrBoltBucketOptionMissing is thrown when boltBcuket config option is missing ErrBoltBucketOptionMissing = errors.New("boltBucket config option missing") ) @@ -141,7 +139,7 @@ func (b *BoltDB) Get(key string) (*store.KVPair, error) { err = db.View(func(tx *bolt.Tx) error { bucket := tx.Bucket(b.boltBucket) if bucket == nil { - return ErrBoltBucketNotFound + return store.ErrKeyNotFound } v := bucket.Get([]byte(key)) @@ -217,7 +215,7 @@ func (b *BoltDB) Delete(key string) error { err = db.Update(func(tx *bolt.Tx) error { bucket := tx.Bucket(b.boltBucket) if bucket == nil { - return ErrBoltBucketNotFound + return store.ErrKeyNotFound } err := bucket.Delete([]byte(key)) return err @@ -243,7 +241,7 @@ func (b *BoltDB) Exists(key string) (bool, error) { err = db.View(func(tx *bolt.Tx) error { bucket := tx.Bucket(b.boltBucket) if bucket == nil { - return ErrBoltBucketNotFound + return store.ErrKeyNotFound } val = bucket.Get([]byte(key)) @@ -276,7 +274,7 @@ func (b *BoltDB) List(keyPrefix string) ([]*store.KVPair, error) { err = db.View(func(tx *bolt.Tx) error { bucket := tx.Bucket(b.boltBucket) if bucket == nil { - return ErrBoltBucketNotFound + return store.ErrKeyNotFound } cursor := bucket.Cursor() @@ -326,7 +324,7 @@ func (b *BoltDB) AtomicDelete(key string, previous *store.KVPair) (bool, error) err = db.Update(func(tx *bolt.Tx) error { bucket := tx.Bucket(b.boltBucket) if bucket == nil { - return ErrBoltBucketNotFound + return store.ErrKeyNotFound } val = bucket.Get([]byte(key)) @@ -370,7 +368,7 @@ func (b *BoltDB) AtomicPut(key string, value []byte, previous *store.KVPair, opt bucket := tx.Bucket(b.boltBucket) if bucket == nil { if previous != nil { - return ErrBoltBucketNotFound + return store.ErrKeyNotFound } bucket, err = tx.CreateBucket(b.boltBucket) if err != nil { @@ -381,7 +379,7 @@ func (b *BoltDB) AtomicPut(key string, value []byte, previous *store.KVPair, opt // doesn't exist in the DB. val = bucket.Get([]byte(key)) if previous == nil && len(val) != 0 { - return store.ErrKeyModified + return store.ErrKeyExists } if previous != nil { if len(val) == 0 { @@ -440,7 +438,7 @@ func (b *BoltDB) DeleteTree(keyPrefix string) error { err = db.Update(func(tx *bolt.Tx) error { bucket := tx.Bucket(b.boltBucket) if bucket == nil { - return ErrBoltBucketNotFound + return store.ErrKeyNotFound } cursor := bucket.Cursor() diff --git a/vendor/src/github.com/docker/libkv/store/consul/consul.go b/vendor/src/github.com/docker/libkv/store/consul/consul.go index c7693ca441..cb64be72d6 100644 --- a/vendor/src/github.com/docker/libkv/store/consul/consul.go +++ b/vendor/src/github.com/docker/libkv/store/consul/consul.go @@ -22,6 +22,14 @@ const ( // RenewSessionRetryMax is the number of time we should try // to renew the session before giving up and throwing an error RenewSessionRetryMax = 5 + + // MaxSessionDestroyAttempts is the maximum times we will try + // to explicitely destroy the session attached to a lock after + // the connectivity to the store has been lost + MaxSessionDestroyAttempts = 5 + + // defaultLockTTL is the default ttl for the consul lock + defaultLockTTL = 20 * time.Second ) var ( @@ -186,6 +194,7 @@ func (s *Consul) Put(key string, value []byte, opts *store.WriteOptions) error { p := &api.KVPair{ Key: key, Value: value, + Flags: api.LockFlagValue, } if opts != nil && opts.TTL > 0 { @@ -378,44 +387,99 @@ func (s *Consul) NewLock(key string, options *store.LockOptions) (store.Locker, lock := &consulLock{} + ttl := defaultLockTTL + if options != nil { // Set optional TTL on Lock if options.TTL != 0 { - entry := &api.SessionEntry{ - Behavior: api.SessionBehaviorRelease, // Release the lock when the session expires - TTL: (options.TTL / 2).String(), // Consul multiplies the TTL by 2x - LockDelay: 1 * time.Millisecond, // Virtually disable lock delay - } - - // Create the key session - session, _, err := s.client.Session().Create(entry, nil) - if err != nil { - return nil, err - } - - // Place the session on lock - lockOpts.Session = session - - // Renew the session ttl lock periodically - go s.client.Session().RenewPeriodic(entry.TTL, session, nil, options.RenewLock) - lock.renewCh = options.RenewLock + ttl = options.TTL } - // Set optional value on Lock if options.Value != nil { lockOpts.Value = options.Value } } + entry := &api.SessionEntry{ + Behavior: api.SessionBehaviorRelease, // Release the lock when the session expires + TTL: (ttl / 2).String(), // Consul multiplies the TTL by 2x + LockDelay: 1 * time.Millisecond, // Virtually disable lock delay + } + + // Create the key session + session, _, err := s.client.Session().Create(entry, nil) + if err != nil { + return nil, err + } + + // Place the session and renew chan on lock + lockOpts.Session = session + lock.renewCh = options.RenewLock + l, err := s.client.LockOpts(lockOpts) if err != nil { return nil, err } + // Renew the session ttl lock periodically + s.renewLockSession(entry.TTL, session, options.RenewLock) + lock.lock = l return lock, nil } +// renewLockSession is used to renew a session Lock, it takes +// a stopRenew chan which is used to explicitely stop the session +// renew process. The renew routine never stops until a signal is +// sent to this channel. If deleting the session fails because the +// connection to the store is lost, it keeps trying to delete the +// session periodically until it can contact the store, this ensures +// that the lock is not maintained indefinitely which ensures liveness +// over safety for the lock when the store becomes unavailable. +func (s *Consul) renewLockSession(initialTTL string, id string, stopRenew chan struct{}) { + sessionDestroyAttempts := 0 + ttl, err := time.ParseDuration(initialTTL) + if err != nil { + return + } + go func() { + for { + select { + case <-time.After(ttl / 2): + entry, _, err := s.client.Session().Renew(id, nil) + if err != nil { + // If an error occurs, continue until the + // session gets destroyed explicitely or + // the session ttl times out + continue + } + if entry == nil { + return + } + + // Handle the server updating the TTL + ttl, _ = time.ParseDuration(entry.TTL) + + case <-stopRenew: + // Attempt a session destroy + _, err := s.client.Session().Destroy(id, nil) + if err == nil { + return + } + + if sessionDestroyAttempts >= MaxSessionDestroyAttempts { + return + } + + // We can't destroy the session because the store + // is unavailable, wait for the session renew period + sessionDestroyAttempts++ + time.Sleep(ttl / 2) + } + } + }() +} + // Lock attempts to acquire the lock and blocks while // doing so. It returns a channel that is closed if our // lock is lost or if an error occurs @@ -436,7 +500,7 @@ func (l *consulLock) Unlock() error { // modified in the meantime, throws an error if this is the case func (s *Consul) AtomicPut(key string, value []byte, previous *store.KVPair, options *store.WriteOptions) (bool, *store.KVPair, error) { - p := &api.KVPair{Key: s.normalize(key), Value: value} + p := &api.KVPair{Key: s.normalize(key), Value: value, Flags: api.LockFlagValue} if previous == nil { // Consul interprets ModifyIndex = 0 as new key. @@ -445,9 +509,14 @@ func (s *Consul) AtomicPut(key string, value []byte, previous *store.KVPair, opt p.ModifyIndex = previous.LastIndex } - if work, _, err := s.client.KV().CAS(p, nil); err != nil { + ok, _, err := s.client.KV().CAS(p, nil) + if err != nil { return false, nil, err - } else if !work { + } + if !ok { + if previous == nil { + return false, nil, store.ErrKeyExists + } return false, nil, store.ErrKeyModified } @@ -466,7 +535,7 @@ func (s *Consul) AtomicDelete(key string, previous *store.KVPair) (bool, error) return false, store.ErrPreviousNotSpecified } - p := &api.KVPair{Key: s.normalize(key), ModifyIndex: previous.LastIndex} + p := &api.KVPair{Key: s.normalize(key), ModifyIndex: previous.LastIndex, Flags: api.LockFlagValue} // Extra Get operation to check on the key _, err := s.Get(key) diff --git a/vendor/src/github.com/docker/libkv/store/etcd/etcd.go b/vendor/src/github.com/docker/libkv/store/etcd/etcd.go index 312bb0b65a..c932ca665e 100644 --- a/vendor/src/github.com/docker/libkv/store/etcd/etcd.go +++ b/vendor/src/github.com/docker/libkv/store/etcd/etcd.go @@ -75,6 +75,9 @@ func New(addrs []string, options *store.Config) (store.Store, error) { if options.ConnectionTimeout != 0 { setTimeout(cfg, options.ConnectionTimeout) } + if options.Username != "" { + setCredentials(cfg, options.Username, options.Password) + } } c, err := etcd.New(*cfg) @@ -119,6 +122,12 @@ func setTimeout(cfg *etcd.Config, time time.Duration) { cfg.HeaderTimeoutPerRequest = time } +// setCredentials sets the username/password credentials for connecting to Etcd +func setCredentials(cfg *etcd.Config, username, password string) { + cfg.Username = username + cfg.Password = password +} + // Normalize the key for usage in Etcd func (s *Etcd) normalize(key string) string { key = store.Normalize(key) @@ -335,6 +344,10 @@ func (s *Etcd) AtomicPut(key string, value []byte, previous *store.KVPair, opts if etcdError.Code == etcd.ErrorCodeTestFailed { return false, nil, store.ErrKeyModified } + // Node exists error (when PrevNoExist) + if etcdError.Code == etcd.ErrorCodeNodeExist { + return false, nil, store.ErrKeyExists + } } return false, nil, err } @@ -508,15 +521,15 @@ func (l *etcdLock) Lock(stopChan chan struct{}) (<-chan struct{}, error) { // Wait for the key to be available or for // a signal to stop trying to lock the key select { - case _ = <-free: + case <-free: break case err := <-errorCh: return nil, err - case _ = <-stopChan: + case <-stopChan: return nil, ErrAbortTryLock } - // Delete or Expire event occured + // Delete or Expire event occurred // Retry } } diff --git a/vendor/src/github.com/docker/libkv/store/store.go b/vendor/src/github.com/docker/libkv/store/store.go index 0df01b6c83..7a4850c019 100644 --- a/vendor/src/github.com/docker/libkv/store/store.go +++ b/vendor/src/github.com/docker/libkv/store/store.go @@ -35,6 +35,8 @@ var ( ErrKeyNotFound = errors.New("Key not found in store") // ErrPreviousNotSpecified is thrown when the previous value is not specified for an atomic operation ErrPreviousNotSpecified = errors.New("Previous K/V pair should be provided for the Atomic operation") + // ErrKeyExists is thrown when the previous value exists in the case of an AtomicPut + ErrKeyExists = errors.New("Previous K/V pair exists, cannot complete Atomic operation") ) // Config contains the options for a storage client @@ -44,6 +46,8 @@ type Config struct { ConnectionTimeout time.Duration Bucket string PersistConnection bool + Username string + Password string } // ClientTLSConfig contains data for a Client TLS configuration in the form diff --git a/vendor/src/github.com/docker/libkv/store/zookeeper/zookeeper.go b/vendor/src/github.com/docker/libkv/store/zookeeper/zookeeper.go index 502b1c6e85..8a44ad318a 100644 --- a/vendor/src/github.com/docker/libkv/store/zookeeper/zookeeper.go +++ b/vendor/src/github.com/docker/libkv/store/zookeeper/zookeeper.go @@ -291,8 +291,8 @@ func (s *Zookeeper) DeleteTree(directory string) error { // AtomicPut put a value at "key" if the key has not been // modified in the meantime, throws an error if this is the case func (s *Zookeeper) AtomicPut(key string, value []byte, previous *store.KVPair, _ *store.WriteOptions) (bool, *store.KVPair, error) { - var lastIndex uint64 + if previous != nil { meta, err := s.client.Set(s.normalize(key), value, int32(previous.LastIndex)) if err != nil { @@ -307,8 +307,9 @@ func (s *Zookeeper) AtomicPut(key string, value []byte, previous *store.KVPair, // Interpret previous == nil as create operation. _, err := s.client.Create(s.normalize(key), value, 0, zk.WorldACL(zk.PermAll)) if err != nil { - // Zookeeper will complain if the directory doesn't exist. + // Directory does not exist if err == zk.ErrNoNode { + // Create the directory parts := store.SplitKey(strings.TrimSuffix(key, "/")) parts = parts[:len(parts)-1] @@ -316,11 +317,22 @@ func (s *Zookeeper) AtomicPut(key string, value []byte, previous *store.KVPair, // Failed to create the directory. return false, nil, err } + + // Create the node if _, err := s.client.Create(s.normalize(key), value, 0, zk.WorldACL(zk.PermAll)); err != nil { + // Node exist error (when previous nil) + if err == zk.ErrNodeExists { + return false, nil, store.ErrKeyExists + } return false, nil, err } } else { + // Node Exists error (when previous nil) + if err == zk.ErrNodeExists { + return false, nil, store.ErrKeyExists + } + // Unhandled error return false, nil, err } diff --git a/vendor/src/github.com/hashicorp/go-multierror/LICENSE b/vendor/src/github.com/hashicorp/go-multierror/LICENSE new file mode 100644 index 0000000000..82b4de97c7 --- /dev/null +++ b/vendor/src/github.com/hashicorp/go-multierror/LICENSE @@ -0,0 +1,353 @@ +Mozilla Public License, version 2.0 + +1. Definitions + +1.1. “Contributor” + + means each individual or legal entity that creates, contributes to the + creation of, or owns Covered Software. + +1.2. “Contributor Version” + + means the combination of the Contributions of others (if any) used by a + Contributor and that particular Contributor’s Contribution. + +1.3. “Contribution” + + means Covered Software of a particular Contributor. + +1.4. “Covered Software” + + means Source Code Form to which the initial Contributor has attached the + notice in Exhibit A, the Executable Form of such Source Code Form, and + Modifications of such Source Code Form, in each case including portions + thereof. + +1.5. “Incompatible With Secondary Licenses” + means + + a. that the initial Contributor has attached the notice described in + Exhibit B to the Covered Software; or + + b. that the Covered Software was made available under the terms of version + 1.1 or earlier of the License, but not also under the terms of a + Secondary License. + +1.6. “Executable Form” + + means any form of the work other than Source Code Form. + +1.7. “Larger Work” + + means a work that combines Covered Software with other material, in a separate + file or files, that is not Covered Software. + +1.8. “License” + + means this document. + +1.9. “Licensable” + + means having the right to grant, to the maximum extent possible, whether at the + time of the initial grant or subsequently, any and all of the rights conveyed by + this License. + +1.10. “Modifications” + + means any of the following: + + a. any file in Source Code Form that results from an addition to, deletion + from, or modification of the contents of Covered Software; or + + b. any new file in Source Code Form that contains any Covered Software. + +1.11. “Patent Claims” of a Contributor + + means any patent claim(s), including without limitation, method, process, + and apparatus claims, in any patent Licensable by such Contributor that + would be infringed, but for the grant of the License, by the making, + using, selling, offering for sale, having made, import, or transfer of + either its Contributions or its Contributor Version. + +1.12. “Secondary License” + + means either the GNU General Public License, Version 2.0, the GNU Lesser + General Public License, Version 2.1, the GNU Affero General Public + License, Version 3.0, or any later versions of those licenses. + +1.13. “Source Code Form” + + means the form of the work preferred for making modifications. + +1.14. “You” (or “Your”) + + means an individual or a legal entity exercising rights under this + License. For legal entities, “You” includes any entity that controls, is + controlled by, or is under common control with You. For purposes of this + definition, “control” means (a) the power, direct or indirect, to cause + the direction or management of such entity, whether by contract or + otherwise, or (b) ownership of more than fifty percent (50%) of the + outstanding shares or beneficial ownership of such entity. + + +2. License Grants and Conditions + +2.1. Grants + + Each Contributor hereby grants You a world-wide, royalty-free, + non-exclusive license: + + a. under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or as + part of a Larger Work; and + + b. under Patent Claims of such Contributor to make, use, sell, offer for + sale, have made, import, and otherwise transfer either its Contributions + or its Contributor Version. + +2.2. Effective Date + + The licenses granted in Section 2.1 with respect to any Contribution become + effective for each Contribution on the date the Contributor first distributes + such Contribution. + +2.3. Limitations on Grant Scope + + The licenses granted in this Section 2 are the only rights granted under this + License. No additional rights or licenses will be implied from the distribution + or licensing of Covered Software under this License. Notwithstanding Section + 2.1(b) above, no patent license is granted by a Contributor: + + a. for any code that a Contributor has removed from Covered Software; or + + b. for infringements caused by: (i) Your and any other third party’s + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + + c. under Patent Claims infringed by Covered Software in the absence of its + Contributions. + + This License does not grant any rights in the trademarks, service marks, or + logos of any Contributor (except as may be necessary to comply with the + notice requirements in Section 3.4). + +2.4. Subsequent Licenses + + No Contributor makes additional grants as a result of Your choice to + distribute the Covered Software under a subsequent version of this License + (see Section 10.2) or under the terms of a Secondary License (if permitted + under the terms of Section 3.3). + +2.5. Representation + + Each Contributor represents that the Contributor believes its Contributions + are its original creation(s) or it has sufficient rights to grant the + rights to its Contributions conveyed by this License. + +2.6. Fair Use + + This License is not intended to limit any rights You have under applicable + copyright doctrines of fair use, fair dealing, or other equivalents. + +2.7. Conditions + + Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in + Section 2.1. + + +3. Responsibilities + +3.1. Distribution of Source Form + + All distribution of Covered Software in Source Code Form, including any + Modifications that You create or to which You contribute, must be under the + terms of this License. You must inform recipients that the Source Code Form + of the Covered Software is governed by the terms of this License, and how + they can obtain a copy of this License. You may not attempt to alter or + restrict the recipients’ rights in the Source Code Form. + +3.2. Distribution of Executable Form + + If You distribute Covered Software in Executable Form then: + + a. such Covered Software must also be made available in Source Code Form, + as described in Section 3.1, and You must inform recipients of the + Executable Form how they can obtain a copy of such Source Code Form by + reasonable means in a timely manner, at a charge no more than the cost + of distribution to the recipient; and + + b. You may distribute such Executable Form under the terms of this License, + or sublicense it under different terms, provided that the license for + the Executable Form does not attempt to limit or alter the recipients’ + rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + + You may create and distribute a Larger Work under terms of Your choice, + provided that You also comply with the requirements of this License for the + Covered Software. If the Larger Work is a combination of Covered Software + with a work governed by one or more Secondary Licenses, and the Covered + Software is not Incompatible With Secondary Licenses, this License permits + You to additionally distribute such Covered Software under the terms of + such Secondary License(s), so that the recipient of the Larger Work may, at + their option, further distribute the Covered Software under the terms of + either this License or such Secondary License(s). + +3.4. Notices + + You may not remove or alter the substance of any license notices (including + copyright notices, patent notices, disclaimers of warranty, or limitations + of liability) contained within the Source Code Form of the Covered + Software, except that You may alter any license notices to the extent + required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + + You may choose to offer, and to charge a fee for, warranty, support, + indemnity or liability obligations to one or more recipients of Covered + Software. However, You may do so only on Your own behalf, and not on behalf + of any Contributor. You must make it absolutely clear that any such + warranty, support, indemnity, or liability obligation is offered by You + alone, and You hereby agree to indemnify every Contributor for any + liability incurred by such Contributor as a result of warranty, support, + indemnity or liability terms You offer. You may include additional + disclaimers of warranty and limitations of liability specific to any + jurisdiction. + +4. Inability to Comply Due to Statute or Regulation + + If it is impossible for You to comply with any of the terms of this License + with respect to some or all of the Covered Software due to statute, judicial + order, or regulation then You must: (a) comply with the terms of this License + to the maximum extent possible; and (b) describe the limitations and the code + they affect. Such description must be placed in a text file included with all + distributions of the Covered Software under this License. Except to the + extent prohibited by statute or regulation, such description must be + sufficiently detailed for a recipient of ordinary skill to be able to + understand it. + +5. Termination + +5.1. The rights granted under this License will terminate automatically if You + fail to comply with any of its terms. However, if You become compliant, + then the rights granted under this License from a particular Contributor + are reinstated (a) provisionally, unless and until such Contributor + explicitly and finally terminates Your grants, and (b) on an ongoing basis, + if such Contributor fails to notify You of the non-compliance by some + reasonable means prior to 60 days after You have come back into compliance. + Moreover, Your grants from a particular Contributor are reinstated on an + ongoing basis if such Contributor notifies You of the non-compliance by + some reasonable means, this is the first time You have received notice of + non-compliance with this License from such Contributor, and You become + compliant prior to 30 days after Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent + infringement claim (excluding declaratory judgment actions, counter-claims, + and cross-claims) alleging that a Contributor Version directly or + indirectly infringes any patent, then the rights granted to You by any and + all Contributors for the Covered Software under Section 2.1 of this License + shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user + license agreements (excluding distributors and resellers) which have been + validly granted by You or Your distributors under this License prior to + termination shall survive termination. + +6. Disclaimer of Warranty + + Covered Software is provided under this License on an “as is” basis, without + warranty of any kind, either expressed, implied, or statutory, including, + without limitation, warranties that the Covered Software is free of defects, + merchantable, fit for a particular purpose or non-infringing. The entire + risk as to the quality and performance of the Covered Software is with You. + Should any Covered Software prove defective in any respect, You (not any + Contributor) assume the cost of any necessary servicing, repair, or + correction. This disclaimer of warranty constitutes an essential part of this + License. No use of any Covered Software is authorized under this License + except under this disclaimer. + +7. Limitation of Liability + + Under no circumstances and under no legal theory, whether tort (including + negligence), contract, or otherwise, shall any Contributor, or anyone who + distributes Covered Software as permitted above, be liable to You for any + direct, indirect, special, incidental, or consequential damages of any + character including, without limitation, damages for lost profits, loss of + goodwill, work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses, even if such party shall have been + informed of the possibility of such damages. This limitation of liability + shall not apply to liability for death or personal injury resulting from such + party’s negligence to the extent applicable law prohibits such limitation. + Some jurisdictions do not allow the exclusion or limitation of incidental or + consequential damages, so this exclusion and limitation may not apply to You. + +8. Litigation + + Any litigation relating to this License may be brought only in the courts of + a jurisdiction where the defendant maintains its principal place of business + and such litigation shall be governed by laws of that jurisdiction, without + reference to its conflict-of-law provisions. Nothing in this Section shall + prevent a party’s ability to bring cross-claims or counter-claims. + +9. Miscellaneous + + This License represents the complete agreement concerning the subject matter + hereof. If any provision of this License is held to be unenforceable, such + provision shall be reformed only to the extent necessary to make it + enforceable. Any law or regulation which provides that the language of a + contract shall be construed against the drafter shall not be used to construe + this License against a Contributor. + + +10. Versions of the License + +10.1. New Versions + + Mozilla Foundation is the license steward. Except as provided in Section + 10.3, no one other than the license steward has the right to modify or + publish new versions of this License. Each version will be given a + distinguishing version number. + +10.2. Effect of New Versions + + You may distribute the Covered Software under the terms of the version of + the License under which You originally received the Covered Software, or + under the terms of any subsequent version published by the license + steward. + +10.3. Modified Versions + + If you create software not governed by this License, and you want to + create a new license for such software, you may create and use a modified + version of this License if you rename the license and remove any + references to the name of the license steward (except to note that such + modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses + If You choose to distribute Source Code Form that is Incompatible With + Secondary Licenses under the terms of this version of the License, the + notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice + + This Source Code Form is subject to the + terms of the Mozilla Public License, v. + 2.0. If a copy of the MPL was not + distributed with this file, You can + obtain one at + http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular file, then +You may include the notice in a location (such as a LICENSE file in a relevant +directory) where a recipient would be likely to look for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - “Incompatible With Secondary Licenses” Notice + + This Source Code Form is “Incompatible + With Secondary Licenses”, as defined by + the Mozilla Public License, v. 2.0. diff --git a/vendor/src/github.com/hashicorp/go-multierror/README.md b/vendor/src/github.com/hashicorp/go-multierror/README.md new file mode 100644 index 0000000000..e81be50e0d --- /dev/null +++ b/vendor/src/github.com/hashicorp/go-multierror/README.md @@ -0,0 +1,91 @@ +# go-multierror + +`go-multierror` is a package for Go that provides a mechanism for +representing a list of `error` values as a single `error`. + +This allows a function in Go to return an `error` that might actually +be a list of errors. If the caller knows this, they can unwrap the +list and access the errors. If the caller doesn't know, the error +formats to a nice human-readable format. + +`go-multierror` implements the +[errwrap](https://github.com/hashicorp/errwrap) interface so that it can +be used with that library, as well. + +## Installation and Docs + +Install using `go get github.com/hashicorp/go-multierror`. + +Full documentation is available at +http://godoc.org/github.com/hashicorp/go-multierror + +## Usage + +go-multierror is easy to use and purposely built to be unobtrusive in +existing Go applications/libraries that may not be aware of it. + +**Building a list of errors** + +The `Append` function is used to create a list of errors. This function +behaves a lot like the Go built-in `append` function: it doesn't matter +if the first argument is nil, a `multierror.Error`, or any other `error`, +the function behaves as you would expect. + +```go +var result error + +if err := step1(); err != nil { + result = multierror.Append(result, err) +} +if err := step2(); err != nil { + result = multierror.Append(result, err) +} + +return result +``` + +**Customizing the formatting of the errors** + +By specifying a custom `ErrorFormat`, you can customize the format +of the `Error() string` function: + +```go +var result *multierror.Error + +// ... accumulate errors here, maybe using Append + +if result != nil { + result.ErrorFormat = func([]error) string { + return "errors!" + } +} +``` + +**Accessing the list of errors** + +`multierror.Error` implements `error` so if the caller doesn't know about +multierror, it will work just fine. But if you're aware a multierror might +be returned, you can use type switches to access the list of errors: + +```go +if err := something(); err != nil { + if merr, ok := err.(*multierror.Error); ok { + // Use merr.Errors + } +} +``` + +**Returning a multierror only if there are errors** + +If you build a `multierror.Error`, you can use the `ErrorOrNil` function +to return an `error` implementation only if there are errors to return: + +```go +var result *multierror.Error + +// ... accumulate errors here + +// Return the `error` only if errors were added to the multierror, otherwise +// return nil since there are no errors. +return result.ErrorOrNil() +``` diff --git a/vendor/src/github.com/hashicorp/go-multierror/append.go b/vendor/src/github.com/hashicorp/go-multierror/append.go new file mode 100644 index 0000000000..8d22ee7a0e --- /dev/null +++ b/vendor/src/github.com/hashicorp/go-multierror/append.go @@ -0,0 +1,30 @@ +package multierror + +// Append is a helper function that will append more errors +// onto an Error in order to create a larger multi-error. +// +// If err is not a multierror.Error, then it will be turned into +// one. If any of the errs are multierr.Error, they will be flattened +// one level into err. +func Append(err error, errs ...error) *Error { + switch err := err.(type) { + case *Error: + // Typed nils can reach here, so initialize if we are nil + if err == nil { + err = new(Error) + } + + err.Errors = append(err.Errors, errs...) + return err + default: + newErrs := make([]error, 0, len(errs)+1) + if err != nil { + newErrs = append(newErrs, err) + } + newErrs = append(newErrs, errs...) + + return &Error{ + Errors: newErrs, + } + } +} diff --git a/vendor/src/github.com/hashicorp/go-multierror/format.go b/vendor/src/github.com/hashicorp/go-multierror/format.go new file mode 100644 index 0000000000..bb65a12e74 --- /dev/null +++ b/vendor/src/github.com/hashicorp/go-multierror/format.go @@ -0,0 +1,23 @@ +package multierror + +import ( + "fmt" + "strings" +) + +// ErrorFormatFunc is a function callback that is called by Error to +// turn the list of errors into a string. +type ErrorFormatFunc func([]error) string + +// ListFormatFunc is a basic formatter that outputs the number of errors +// that occurred along with a bullet point list of the errors. +func ListFormatFunc(es []error) string { + points := make([]string, len(es)) + for i, err := range es { + points[i] = fmt.Sprintf("* %s", err) + } + + return fmt.Sprintf( + "%d error(s) occurred:\n\n%s", + len(es), strings.Join(points, "\n")) +} diff --git a/vendor/src/github.com/hashicorp/go-multierror/multierror.go b/vendor/src/github.com/hashicorp/go-multierror/multierror.go new file mode 100644 index 0000000000..2ea0827329 --- /dev/null +++ b/vendor/src/github.com/hashicorp/go-multierror/multierror.go @@ -0,0 +1,51 @@ +package multierror + +import ( + "fmt" +) + +// Error is an error type to track multiple errors. This is used to +// accumulate errors in cases and return them as a single "error". +type Error struct { + Errors []error + ErrorFormat ErrorFormatFunc +} + +func (e *Error) Error() string { + fn := e.ErrorFormat + if fn == nil { + fn = ListFormatFunc + } + + return fn(e.Errors) +} + +// ErrorOrNil returns an error interface if this Error represents +// a list of errors, or returns nil if the list of errors is empty. This +// function is useful at the end of accumulation to make sure that the value +// returned represents the existence of errors. +func (e *Error) ErrorOrNil() error { + if e == nil { + return nil + } + if len(e.Errors) == 0 { + return nil + } + + return e +} + +func (e *Error) GoString() string { + return fmt.Sprintf("*%#v", *e) +} + +// WrappedErrors returns the list of errors that this Error is wrapping. +// It is an implementatin of the errwrap.Wrapper interface so that +// multierror.Error can be used with that library. +// +// This method is not safe to be called concurrently and is no different +// than accessing the Errors field directly. It is implementd only to +// satisfy the errwrap.Wrapper interface. +func (e *Error) WrappedErrors() []error { + return e.Errors +} diff --git a/vendor/src/github.com/hashicorp/memberlist/README.md b/vendor/src/github.com/hashicorp/memberlist/README.md index d55befac6e..c8a125f2ca 100644 --- a/vendor/src/github.com/hashicorp/memberlist/README.md +++ b/vendor/src/github.com/hashicorp/memberlist/README.md @@ -1,4 +1,4 @@ -# memberlist +# memberlist [![GoDoc](https://godoc.org/github.com/hashicorp/memberlist?status.png)](https://godoc.org/github.com/hashicorp/memberlist) memberlist is a [Go](http://www.golang.org) library that manages cluster membership and member failure detection using a gossip based protocol. @@ -64,7 +64,7 @@ For complete documentation, see the associated [Godoc](http://godoc.org/github.c ## Protocol memberlist is based on ["SWIM: Scalable Weakly-consistent Infection-style Process Group Membership Protocol"](http://www.cs.cornell.edu/~asdas/research/dsn02-swim.pdf), -with a few minor adaptations, mostly to increase propogation speed and +with a few minor adaptations, mostly to increase propagation speed and convergence rate. A high level overview of the memberlist protocol (based on SWIM) is @@ -93,15 +93,22 @@ be disabled entirely. Failure detection is done by periodic random probing using a configurable interval. If the node fails to ack within a reasonable time (typically some multiple -of RTT), then an indirect probe is attempted. An indirect probe asks a -configurable number of random nodes to probe the same node, in case there -are network issues causing our own node to fail the probe. If both our -probe and the indirect probes fail within a reasonable time, then the -node is marked "suspicious" and this knowledge is gossiped to the cluster. -A suspicious node is still considered a member of cluster. If the suspect member -of the cluster does not disputes the suspicion within a configurable period of -time, the node is finally considered dead, and this state is then gossiped -to the cluster. +of RTT), then an indirect probe as well as a direct TCP probe are attempted. An +indirect probe asks a configurable number of random nodes to probe the same node, +in case there are network issues causing our own node to fail the probe. The direct +TCP probe is used to help identify the common situation where networking is +misconfigured to allow TCP but not UDP. Without the TCP probe, a UDP-isolated node +would think all other nodes were suspect and could cause churn in the cluster when +it attempts a TCP-based state exchange with another node. It is not desirable to +operate with only TCP connectivity because convergence will be much slower, but it +is enabled so that memberlist can detect this situation and alert operators. + +If both our probe, the indirect probes, and the direct TCP probe fail within a +configurable time, then the node is marked "suspicious" and this knowledge is +gossiped to the cluster. A suspicious node is still considered a member of +cluster. If the suspect member of the cluster does not dispute the suspicion +within a configurable period of time, the node is finally considered dead, +and this state is then gossiped to the cluster. This is a brief and incomplete description of the protocol. For a better idea, please read the @@ -111,7 +118,7 @@ in its entirety, along with the memberlist source code. ### Changes from SWIM As mentioned earlier, the memberlist protocol is based on SWIM but includes -minor changes, mostly to increase propogation speed and convergence rates. +minor changes, mostly to increase propagation speed and convergence rates. The changes from SWIM are noted here: @@ -127,7 +134,7 @@ The changes from SWIM are noted here: also will periodically send out dedicated gossip messages on their own. This feature lets you have a higher gossip rate (for example once per 200ms) and a slower failure detection rate (such as once per second), resulting - in overall faster convergence rates and data propogation speeds. This feature + in overall faster convergence rates and data propagation speeds. This feature can be totally disabed as well, if you wish. * memberlist stores around the state of dead nodes for a set amount of time, diff --git a/vendor/src/github.com/hashicorp/memberlist/alive_delegate.go b/vendor/src/github.com/hashicorp/memberlist/alive_delegate.go new file mode 100644 index 0000000000..51a0ba9054 --- /dev/null +++ b/vendor/src/github.com/hashicorp/memberlist/alive_delegate.go @@ -0,0 +1,14 @@ +package memberlist + +// AliveDelegate is used to involve a client in processing +// a node "alive" message. When a node joins, either through +// a UDP gossip or TCP push/pull, we update the state of +// that node via an alive message. This can be used to filter +// a node out and prevent it from being considered a peer +// using application specific logic. +type AliveDelegate interface { + // NotifyMerge is invoked when a merge could take place. + // Provides a list of the nodes known by the peer. If + // the return value is non-nil, the merge is canceled. + NotifyAlive(peer *Node) error +} diff --git a/vendor/src/github.com/hashicorp/memberlist/config.go b/vendor/src/github.com/hashicorp/memberlist/config.go index bd15047273..16bebacaaa 100644 --- a/vendor/src/github.com/hashicorp/memberlist/config.go +++ b/vendor/src/github.com/hashicorp/memberlist/config.go @@ -2,6 +2,7 @@ package memberlist import ( "io" + "log" "os" "time" ) @@ -85,6 +86,11 @@ type Config struct { ProbeInterval time.Duration ProbeTimeout time.Duration + // DisableTcpPings will turn off the fallback TCP pings that are attempted + // if the direct UDP ping fails. These get pipelined along with the + // indirect UDP pings. + DisableTcpPings bool + // GossipInterval and GossipNodes are used to configure the gossip // behavior of memberlist. // @@ -111,6 +117,8 @@ type Config struct { // the first key used while attempting to decrypt messages. Providing a // value for this primary key will enable message-level encryption and // verification, and automatically install the key onto the keyring. + // The value should be either 16, 24, or 32 bytes to select AES-128, + // AES-192, or AES-256. SecretKey []byte // The keyring holds all of the encryption keys used internally. It is @@ -132,16 +140,29 @@ type Config struct { Events EventDelegate Conflict ConflictDelegate Merge MergeDelegate + Ping PingDelegate + Alive AliveDelegate + + // DNSConfigPath points to the system's DNS config file, usually located + // at /etc/resolv.conf. It can be overridden via config for easier testing. + DNSConfigPath string // LogOutput is the writer where logs should be sent. If this is not - // set, logging will go to stderr by default. + // set, logging will go to stderr by default. You cannot specify both LogOutput + // and Logger at the same time. LogOutput io.Writer + + // Logger is a custom logger which you provide. If Logger is set, it will use + // this for the internal logger. If Logger is not set, it will fall back to the + // behavior for using LogOutput. You cannot specify both LogOutput and Logger + // at the same time. + Logger *log.Logger } // DefaultLANConfig returns a sane set of configurations for Memberlist. // It uses the hostname as the node name, and otherwise sets very conservative // values that are sane for most LAN environments. The default configuration -// errs on the side on the side of caution, choosing values that are optimized +// errs on the side of caution, choosing values that are optimized // for higher convergence at the cost of higher bandwidth usage. Regardless, // these values are a good starting point when getting started with memberlist. func DefaultLANConfig() *Config { @@ -152,7 +173,7 @@ func DefaultLANConfig() *Config { BindPort: 7946, AdvertiseAddr: "", AdvertisePort: 7946, - ProtocolVersion: ProtocolVersionMax, + ProtocolVersion: ProtocolVersion2Compatible, TCPTimeout: 10 * time.Second, // Timeout after 10 seconds IndirectChecks: 3, // Use 3 nodes for the indirect ping RetransmitMult: 4, // Retransmit a message 4 * log(N+1) nodes @@ -160,6 +181,7 @@ func DefaultLANConfig() *Config { PushPullInterval: 30 * time.Second, // Low frequency ProbeTimeout: 500 * time.Millisecond, // Reasonable RTT time for LAN ProbeInterval: 1 * time.Second, // Failure check every second + DisableTcpPings: false, // TCP pings are safe, even with mixed versions GossipNodes: 3, // Gossip to 3 nodes GossipInterval: 200 * time.Millisecond, // Gossip more rapidly @@ -167,8 +189,9 @@ func DefaultLANConfig() *Config { EnableCompression: true, // Enable compression by default SecretKey: nil, + Keyring: nil, - Keyring: nil, + DNSConfigPath: "/etc/resolv.conf", } } diff --git a/vendor/src/github.com/hashicorp/memberlist/delegate.go b/vendor/src/github.com/hashicorp/memberlist/delegate.go index b1204a41e8..66aa2da796 100644 --- a/vendor/src/github.com/hashicorp/memberlist/delegate.go +++ b/vendor/src/github.com/hashicorp/memberlist/delegate.go @@ -19,7 +19,8 @@ type Delegate interface { // It can return a list of buffers to send. Each buffer should assume an // overhead as provided with a limit on the total byte size allowed. // The total byte size of the resulting data to send must not exceed - // the limit. + // the limit. Care should be taken that this method does not block, + // since doing so would block the entire UDP packet receive loop. GetBroadcasts(overhead, limit int) [][]byte // LocalState is used for a TCP Push/Pull. This is sent to diff --git a/vendor/src/github.com/hashicorp/memberlist/keyring.go b/vendor/src/github.com/hashicorp/memberlist/keyring.go index ebcd2f2002..be2201d488 100644 --- a/vendor/src/github.com/hashicorp/memberlist/keyring.go +++ b/vendor/src/github.com/hashicorp/memberlist/keyring.go @@ -34,6 +34,9 @@ func (k *Keyring) init() { // keyring. If creating a keyring with multiple keys, one key must be designated // primary by passing it as the primaryKey. If the primaryKey does not exist in // the list of secondary keys, it will be automatically added at position 0. +// +// A key should be either 16, 24, or 32 bytes to select AES-128, +// AES-192, or AES-256. func NewKeyring(keys [][]byte, primaryKey []byte) (*Keyring, error) { keyring := &Keyring{} keyring.init() @@ -58,10 +61,12 @@ func NewKeyring(keys [][]byte, primaryKey []byte) (*Keyring, error) { // AddKey will install a new key on the ring. Adding a key to the ring will make // it available for use in decryption. If the key already exists on the ring, // this function will just return noop. +// +// key should be either 16, 24, or 32 bytes to select AES-128, +// AES-192, or AES-256. func (k *Keyring) AddKey(key []byte) error { - // Encorce 16-byte key size - if len(key) != 16 { - return fmt.Errorf("key size must be 16 bytes") + if l := len(key); l != 16 && l != 24 && l != 32 { + return fmt.Errorf("key size must be 16, 24 or 32 bytes") } // No-op if key is already installed diff --git a/vendor/src/github.com/hashicorp/memberlist/logging.go b/vendor/src/github.com/hashicorp/memberlist/logging.go new file mode 100644 index 0000000000..f31acfb2fa --- /dev/null +++ b/vendor/src/github.com/hashicorp/memberlist/logging.go @@ -0,0 +1,22 @@ +package memberlist + +import ( + "fmt" + "net" +) + +func LogAddress(addr net.Addr) string { + if addr == nil { + return "from=" + } + + return fmt.Sprintf("from=%s", addr.String()) +} + +func LogConn(conn net.Conn) string { + if conn == nil { + return LogAddress(nil) + } + + return LogAddress(conn.RemoteAddr()) +} diff --git a/vendor/src/github.com/hashicorp/memberlist/memberlist.go b/vendor/src/github.com/hashicorp/memberlist/memberlist.go index 01807cfe43..593fc17265 100644 --- a/vendor/src/github.com/hashicorp/memberlist/memberlist.go +++ b/vendor/src/github.com/hashicorp/memberlist/memberlist.go @@ -20,11 +20,19 @@ import ( "net" "os" "strconv" + "strings" "sync" "time" + + "github.com/hashicorp/go-multierror" + "github.com/miekg/dns" ) type Memberlist struct { + sequenceNum uint32 // Local sequence number + incarnation uint32 // Local incarnation number + numNodes uint32 // Number of known nodes (estimate) + config *Config shutdown bool shutdownCh chan struct{} @@ -35,9 +43,6 @@ type Memberlist struct { tcpListener *net.TCPListener handoff chan msgHandoff - sequenceNum uint32 // Local sequence number - incarnation uint32 // Local incarnation number - nodeLock sync.RWMutex nodes []*nodeState // Known nodes nodeMap map[string]*nodeState // Maps Addr.String() -> NodeState @@ -52,8 +57,6 @@ type Memberlist struct { broadcasts *TransmitLimitedQueue - startStopLock sync.Mutex - logger *log.Logger } @@ -90,6 +93,9 @@ func newMemberlist(conf *Config) (*Memberlist, error) { if err != nil { return nil, fmt.Errorf("Failed to start TCP listener. Err: %s", err) } + if conf.BindPort == 0 { + conf.BindPort = tcpLn.Addr().(*net.TCPAddr).Port + } udpAddr := &net.UDPAddr{IP: net.ParseIP(conf.BindAddr), Port: conf.BindPort} udpLn, err := net.ListenUDP("udp", udpAddr) @@ -101,10 +107,19 @@ func newMemberlist(conf *Config) (*Memberlist, error) { // Set the UDP receive window size setUDPRecvBuf(udpLn) - if conf.LogOutput == nil { - conf.LogOutput = os.Stderr + if conf.LogOutput != nil && conf.Logger != nil { + return nil, fmt.Errorf("Cannot specify both LogOutput and Logger. Please choose a single log configuration setting.") + } + + logDest := conf.LogOutput + if logDest == nil { + logDest = os.Stderr + } + + logger := conf.Logger + if logger == nil { + logger = log.New(logDest, "", log.LstdFlags) } - logger := log.New(conf.LogOutput, "", log.LstdFlags) m := &Memberlist{ config: conf, @@ -118,7 +133,9 @@ func newMemberlist(conf *Config) (*Memberlist, error) { broadcasts: &TransmitLimitedQueue{RetransmitMult: conf.RetransmitMult}, logger: logger, } - m.broadcasts.NumNodes = func() int { return len(m.nodes) } + m.broadcasts.NumNodes = func() int { + return m.estNumNodes() + } go m.tcpListen() go m.udpListen() go m.udpHandler() @@ -153,79 +170,158 @@ func Create(conf *Config) (*Memberlist, error) { // none could be reached. If an error is returned, the node did not successfully // join the cluster. func (m *Memberlist) Join(existing []string) (int, error) { - // Attempt to join any of them numSuccess := 0 - var retErr error + var errs error for _, exist := range existing { - addrs, port, err := m.resolveAddr(exist) + addrs, err := m.resolveAddr(exist) if err != nil { - m.logger.Printf("[WARN] memberlist: Failed to resolve %s: %v", exist, err) - retErr = err + err = fmt.Errorf("Failed to resolve %s: %v", exist, err) + errs = multierror.Append(errs, err) + m.logger.Printf("[WARN] memberlist: %v", err) continue } for _, addr := range addrs { - if err := m.pushPullNode(addr, port, true); err != nil { - retErr = err + if err := m.pushPullNode(addr.ip, addr.port, true); err != nil { + err = fmt.Errorf("Failed to join %s: %v", addr.ip, err) + errs = multierror.Append(errs, err) + m.logger.Printf("[DEBUG] memberlist: %v", err) continue } numSuccess++ } } - if numSuccess > 0 { - retErr = nil + errs = nil + } + return numSuccess, errs +} + +// ipPort holds information about a node we want to try to join. +type ipPort struct { + ip net.IP + port uint16 +} + +// tcpLookupIP is a helper to initiate a TCP-based DNS lookup for the given host. +// The built-in Go resolver will do a UDP lookup first, and will only use TCP if +// the response has the truncate bit set, which isn't common on DNS servers like +// Consul's. By doing the TCP lookup directly, we get the best chance for the +// largest list of hosts to join. Since joins are relatively rare events, it's ok +// to do this rather expensive operation. +func (m *Memberlist) tcpLookupIP(host string, defaultPort uint16) ([]ipPort, error) { + // Don't attempt any TCP lookups against non-fully qualified domain + // names, since those will likely come from the resolv.conf file. + if !strings.Contains(host, ".") { + return nil, nil } - return numSuccess, retErr + // Make sure the domain name is terminated with a dot (we know there's + // at least one character at this point). + dn := host + if dn[len(dn)-1] != '.' { + dn = dn + "." + } + + // See if we can find a server to try. + cc, err := dns.ClientConfigFromFile(m.config.DNSConfigPath) + if err != nil { + return nil, err + } + if len(cc.Servers) > 0 { + // We support host:port in the DNS config, but need to add the + // default port if one is not supplied. + server := cc.Servers[0] + if !hasPort(server) { + server = net.JoinHostPort(server, cc.Port) + } + + // Do the lookup. + c := new(dns.Client) + c.Net = "tcp" + msg := new(dns.Msg) + msg.SetQuestion(dn, dns.TypeANY) + in, _, err := c.Exchange(msg, server) + if err != nil { + return nil, err + } + + // Handle any IPs we get back that we can attempt to join. + var ips []ipPort + for _, r := range in.Answer { + switch rr := r.(type) { + case (*dns.A): + ips = append(ips, ipPort{rr.A, defaultPort}) + case (*dns.AAAA): + ips = append(ips, ipPort{rr.AAAA, defaultPort}) + case (*dns.CNAME): + m.logger.Printf("[DEBUG] memberlist: Ignoring CNAME RR in TCP-first answer for '%s'", host) + } + } + return ips, nil + } + + return nil, nil } // resolveAddr is used to resolve the address into an address, // port, and error. If no port is given, use the default -func (m *Memberlist) resolveAddr(hostStr string) ([][]byte, uint16, error) { - ips := make([][]byte, 0) +func (m *Memberlist) resolveAddr(hostStr string) ([]ipPort, error) { + // Normalize the incoming string to host:port so we can apply Go's + // parser to it. port := uint16(0) + if !hasPort(hostStr) { + hostStr += ":" + strconv.Itoa(m.config.BindPort) + } host, sport, err := net.SplitHostPort(hostStr) - if ae, ok := err.(*net.AddrError); ok && ae.Err == "missing port in address" { - // error, port missing - we can solve this - port = uint16(m.config.BindPort) - host = hostStr - } else if err != nil { - // error, but not missing port - return ips, port, err - } else if lport, err := strconv.ParseUint(sport, 10, 16); err != nil { - // error, when parsing port - return ips, port, err - } else { - // no error - port = uint16(lport) + if err != nil { + return nil, err } - // Get the addresses that hostPort might resolve to - // ResolveTcpAddr requres ipv6 brackets to separate - // port numbers whereas ParseIP doesn't, but luckily - // SplitHostPort takes care of the brackets - if ip := net.ParseIP(host); ip == nil { - if pre, err := net.LookupIP(host); err == nil { - for _, ip := range pre { - ips = append(ips, ip) - } - } else { - return ips, port, err - } - } else { - ips = append(ips, ip) + // This will capture the supplied port, or the default one added above. + lport, err := strconv.ParseUint(sport, 10, 16) + if err != nil { + return nil, err + } + port = uint16(lport) + + // If it looks like an IP address we are done. The SplitHostPort() above + // will make sure the host part is in good shape for parsing, even for + // IPv6 addresses. + if ip := net.ParseIP(host); ip != nil { + return []ipPort{ipPort{ip, port}}, nil } - return ips, port, nil + // First try TCP so we have the best chance for the largest list of + // hosts to join. If this fails it's not fatal since this isn't a standard + // way to query DNS, and we have a fallback below. + ips, err := m.tcpLookupIP(host, port) + if err != nil { + m.logger.Printf("[DEBUG] memberlist: TCP-first lookup failed for '%s', falling back to UDP: %s", hostStr, err) + } + if len(ips) > 0 { + return ips, nil + } + + // If TCP didn't yield anything then use the normal Go resolver which + // will try UDP, then might possibly try TCP again if the UDP response + // indicates it was truncated. + ans, err := net.LookupIP(host) + if err != nil { + return nil, err + } + ips = make([]ipPort, 0, len(ans)) + for _, ip := range ans { + ips = append(ips, ipPort{ip, port}) + } + return ips, nil } // setAlive is used to mark this node as being alive. This is the same // as if we received an alive notification our own network channel for // ourself. func (m *Memberlist) setAlive() error { - var advertiseAddr []byte var advertisePort int if m.config.AdvertiseAddr != "" { @@ -268,7 +364,7 @@ func (m *Memberlist) setAlive() error { if ip.To4() == nil { continue } - if !isPrivateIP(ip.String()) { + if !IsPrivateIP(ip.String()) { continue } @@ -286,12 +382,14 @@ func (m *Memberlist) setAlive() error { addr := m.tcpListener.Addr().(*net.TCPAddr) advertiseAddr = addr.IP } - advertisePort = m.config.BindPort + + // Use the port we are bound to. + advertisePort = m.tcpListener.Addr().(*net.TCPAddr).Port } // Check if this is a public address without encryption addrStr := net.IP(advertiseAddr).String() - if !isPrivateIP(addrStr) && !isLoopbackIP(addrStr) && !m.config.EncryptionEnabled() { + if !IsPrivateIP(addrStr) && !isLoopbackIP(addrStr) && !m.config.EncryptionEnabled() { m.logger.Printf("[WARN] memberlist: Binding to public address without encryption!") } @@ -385,7 +483,8 @@ func (m *Memberlist) UpdateNode(timeout time.Duration) error { // user-data message, which a delegate will receive through NotifyMsg // The actual data is transmitted over UDP, which means this is a // best-effort transmission mechanism, and the maximum size of the -// message is the size of a single UDP datagram, after compression +// message is the size of a single UDP datagram, after compression. +// This method is DEPRECATED in favor or SendToUDP func (m *Memberlist) SendTo(to net.Addr, msg []byte) error { // Encode as a user message buf := make([]byte, 1, len(msg)+1) @@ -393,7 +492,36 @@ func (m *Memberlist) SendTo(to net.Addr, msg []byte) error { buf = append(buf, msg...) // Send the message - return m.rawSendMsg(to, buf) + return m.rawSendMsgUDP(to, buf) +} + +// SendToUDP is used to directly send a message to another node, without +// the use of the gossip mechanism. This will encode the message as a +// user-data message, which a delegate will receive through NotifyMsg +// The actual data is transmitted over UDP, which means this is a +// best-effort transmission mechanism, and the maximum size of the +// message is the size of a single UDP datagram, after compression +func (m *Memberlist) SendToUDP(to *Node, msg []byte) error { + // Encode as a user message + buf := make([]byte, 1, len(msg)+1) + buf[0] = byte(userMsg) + buf = append(buf, msg...) + + // Send the message + destAddr := &net.UDPAddr{IP: to.Addr, Port: int(to.Port)} + return m.rawSendMsgUDP(destAddr, buf) +} + +// SendToTCP is used to directly send a message to another node, without +// the use of the gossip mechanism. This will encode the message as a +// user-data message, which a delegate will receive through NotifyMsg +// The actual data is transmitted over TCP, which means delivery +// is guaranteed if no error is returned. There is no limit +// to the size of the message +func (m *Memberlist) SendToTCP(to *Node, msg []byte) error { + // Send the message + destAddr := &net.TCPAddr{IP: to.Addr, Port: int(to.Port)} + return m.sendTCPUserMsg(destAddr, msg) } // Members returns a list of all known live nodes. The node structures @@ -441,10 +569,12 @@ func (m *Memberlist) NumMembers() (alive int) { // This method is safe to call multiple times, but must not be called // after the cluster is already shut down. func (m *Memberlist) Leave(timeout time.Duration) error { - m.startStopLock.Lock() - defer m.startStopLock.Unlock() + m.nodeLock.Lock() + // We can't defer m.nodeLock.Unlock() because m.deadNode will also try to + // acquire a lock so we need to Unlock before that. if m.shutdown { + m.nodeLock.Unlock() panic("leave after shutdown") } @@ -452,6 +582,7 @@ func (m *Memberlist) Leave(timeout time.Duration) error { m.leave = true state, ok := m.nodeMap[m.config.Name] + m.nodeLock.Unlock() if !ok { m.logger.Printf("[WARN] memberlist: Leave but we're not in the node map.") return nil @@ -475,6 +606,8 @@ func (m *Memberlist) Leave(timeout time.Duration) error { return fmt.Errorf("timeout waiting for leave broadcast") } } + } else { + m.nodeLock.Unlock() } return nil @@ -509,8 +642,8 @@ func (m *Memberlist) ProtocolVersion() uint8 { // // This method is safe to call multiple times. func (m *Memberlist) Shutdown() error { - m.startStopLock.Lock() - defer m.startStopLock.Unlock() + m.nodeLock.Lock() + defer m.nodeLock.Unlock() if m.shutdown { return nil diff --git a/vendor/src/github.com/hashicorp/memberlist/merge_delegate.go b/vendor/src/github.com/hashicorp/memberlist/merge_delegate.go index dc27250021..89afb59f20 100644 --- a/vendor/src/github.com/hashicorp/memberlist/merge_delegate.go +++ b/vendor/src/github.com/hashicorp/memberlist/merge_delegate.go @@ -8,6 +8,7 @@ package memberlist // as part of the push-pull anti-entropy. type MergeDelegate interface { // NotifyMerge is invoked when a merge could take place. - // Provides a list of the nodes known by the peer. - NotifyMerge(peers []*Node) (cancel bool) + // Provides a list of the nodes known by the peer. If + // the return value is non-nil, the merge is canceled. + NotifyMerge(peers []*Node) error } diff --git a/vendor/src/github.com/hashicorp/memberlist/net.go b/vendor/src/github.com/hashicorp/memberlist/net.go index 4e8319c6e8..b92dccb101 100644 --- a/vendor/src/github.com/hashicorp/memberlist/net.go +++ b/vendor/src/github.com/hashicorp/memberlist/net.go @@ -18,7 +18,15 @@ import ( // range. This range is inclusive. const ( ProtocolVersionMin uint8 = 1 - ProtocolVersionMax = 2 + + // Version 3 added support for TCP pings but we kept the default + // protocol version at 2 to ease transition to this new feature. + // A memberlist speaking version 2 of the protocol will attempt + // to TCP ping another memberlist who understands version 3 or + // greater. + ProtocolVersion2Compatible = 2 + + ProtocolVersionMax = 3 ) // messageType is an integer ID of a type of message that can be received @@ -79,7 +87,8 @@ type indirectPingReq struct { // ack response is sent for a ping type ackResp struct { - SeqNo uint32 + SeqNo uint32 + Payload []byte } // suspect is broadcast when we suspect a node is dead @@ -119,6 +128,11 @@ type pushPullHeader struct { Join bool // Is this a join request or a anti-entropy run } +// userMsgHeader is used to encapsulate a userMsg +type userMsgHeader struct { + UserMsgLen int // Encodes the byte lengh of user state +} + // pushNodeState is used for pushPullReq when we are // transfering out node states type pushNodeState struct { @@ -185,54 +199,65 @@ func (m *Memberlist) tcpListen() { // handleConn handles a single incoming TCP connection func (m *Memberlist) handleConn(conn *net.TCPConn) { - m.logger.Printf("[DEBUG] memberlist: Responding to push/pull sync with: %s", conn.RemoteAddr()) + m.logger.Printf("[DEBUG] memberlist: TCP connection %s", LogConn(conn)) + defer conn.Close() metrics.IncrCounter([]string{"memberlist", "tcp", "accept"}, 1) - join, remoteNodes, userState, err := m.readRemoteState(conn) + conn.SetDeadline(time.Now().Add(m.config.TCPTimeout)) + msgType, bufConn, dec, err := m.readTCP(conn) if err != nil { - m.logger.Printf("[ERR] memberlist: Failed to receive remote state: %s", err) + m.logger.Printf("[ERR] memberlist: failed to receive: %s %s", err, LogConn(conn)) return } - if err := m.sendLocalState(conn, join); err != nil { - m.logger.Printf("[ERR] memberlist: Failed to push local state: %s", err) - } - - if err := m.verifyProtocol(remoteNodes); err != nil { - m.logger.Printf("[ERR] memberlist: Push/pull verification failed: %s", err) - return - } - - // Invoke the merge delegate if any - if join && m.config.Merge != nil { - nodes := make([]*Node, len(remoteNodes)) - for idx, n := range remoteNodes { - nodes[idx] = &Node{ - Name: n.Name, - Addr: n.Addr, - Port: n.Port, - Meta: n.Meta, - PMin: n.Vsn[0], - PMax: n.Vsn[1], - PCur: n.Vsn[2], - DMin: n.Vsn[3], - DMax: n.Vsn[4], - DCur: n.Vsn[5], - } + switch msgType { + case userMsg: + if err := m.readUserMsg(bufConn, dec); err != nil { + m.logger.Printf("[ERR] memberlist: Failed to receive user message: %s %s", err, LogConn(conn)) } - if m.config.Merge.NotifyMerge(nodes) { - m.logger.Printf("[WARN] memberlist: Cluster merge canceled") + case pushPullMsg: + join, remoteNodes, userState, err := m.readRemoteState(bufConn, dec) + if err != nil { + m.logger.Printf("[ERR] memberlist: Failed to read remote state: %s %s", err, LogConn(conn)) return } - } - // Merge the membership state - m.mergeState(remoteNodes) + if err := m.sendLocalState(conn, join); err != nil { + m.logger.Printf("[ERR] memberlist: Failed to push local state: %s %s", err, LogConn(conn)) + return + } - // Invoke the delegate for user state - if m.config.Delegate != nil { - m.config.Delegate.MergeRemoteState(userState, join) + if err := m.mergeRemoteState(join, remoteNodes, userState); err != nil { + m.logger.Printf("[ERR] memberlist: Failed push/pull merge: %s %s", err, LogConn(conn)) + return + } + case pingMsg: + var p ping + if err := dec.Decode(&p); err != nil { + m.logger.Printf("[ERR] memberlist: Failed to decode TCP ping: %s %s", err, LogConn(conn)) + return + } + + if p.Node != "" && p.Node != m.config.Name { + m.logger.Printf("[WARN] memberlist: Got ping for unexpected node %s %s", p.Node, LogConn(conn)) + return + } + + ack := ackResp{p.SeqNo, nil} + out, err := encode(ackRespMsg, &ack) + if err != nil { + m.logger.Printf("[ERR] memberlist: Failed to encode TCP ack: %s", err) + return + } + + err = m.rawSendMsgTCP(conn, out.Bytes()) + if err != nil { + m.logger.Printf("[ERR] memberlist: Failed to send TCP ack: %s %s", err, LogConn(conn)) + return + } + default: + m.logger.Printf("[ERR] memberlist: Received invalid msgType (%d) %s", msgType, LogConn(conn)) } } @@ -265,29 +290,30 @@ func (m *Memberlist) udpListen() { continue } + // Capture the reception time of the packet as close to the + // system calls as possible. + lastPacket = time.Now() + // Check the length if n < 1 { - m.logger.Printf("[ERR] memberlist: UDP packet too short (%d bytes). From: %s", - len(buf), addr) + m.logger.Printf("[ERR] memberlist: UDP packet too short (%d bytes) %s", + len(buf), LogAddress(addr)) continue } - // Capture the current time - lastPacket = time.Now() - // Ingest this packet metrics.IncrCounter([]string{"memberlist", "udp", "received"}, float32(n)) - m.ingestPacket(buf[:n], addr) + m.ingestPacket(buf[:n], addr, lastPacket) } } -func (m *Memberlist) ingestPacket(buf []byte, from net.Addr) { +func (m *Memberlist) ingestPacket(buf []byte, from net.Addr, timestamp time.Time) { // Check if encryption is enabled if m.config.EncryptionEnabled() { // Decrypt the payload plain, err := decryptPayload(m.config.Keyring.GetKeys(), buf, nil) if err != nil { - m.logger.Printf("[ERR] memberlist: Decrypt packet failed: %v", err) + m.logger.Printf("[ERR] memberlist: Decrypt packet failed: %v %s", err, LogAddress(from)) return } @@ -296,10 +322,10 @@ func (m *Memberlist) ingestPacket(buf []byte, from net.Addr) { } // Handle the command - m.handleCommand(buf, from) + m.handleCommand(buf, from, timestamp) } -func (m *Memberlist) handleCommand(buf []byte, from net.Addr) { +func (m *Memberlist) handleCommand(buf []byte, from net.Addr, timestamp time.Time) { // Decode the message type msgType := messageType(buf[0]) buf = buf[1:] @@ -307,16 +333,16 @@ func (m *Memberlist) handleCommand(buf []byte, from net.Addr) { // Switch on the msgType switch msgType { case compoundMsg: - m.handleCompound(buf, from) + m.handleCompound(buf, from, timestamp) case compressMsg: - m.handleCompressed(buf, from) + m.handleCompressed(buf, from, timestamp) case pingMsg: m.handlePing(buf, from) case indirectPingMsg: m.handleIndirectPing(buf, from) case ackRespMsg: - m.handleAck(buf, from) + m.handleAck(buf, from, timestamp) case suspectMsg: fallthrough @@ -328,11 +354,11 @@ func (m *Memberlist) handleCommand(buf []byte, from net.Addr) { select { case m.handoff <- msgHandoff{msgType, buf, from}: default: - m.logger.Printf("[WARN] memberlist: UDP handler queue full, dropping message (%d)", msgType) + m.logger.Printf("[WARN] memberlist: UDP handler queue full, dropping message (%d) %s", msgType, LogAddress(from)) } default: - m.logger.Printf("[ERR] memberlist: UDP msg type (%d) not supported. From: %s", msgType, from) + m.logger.Printf("[ERR] memberlist: UDP msg type (%d) not supported %s", msgType, LogAddress(from)) } } @@ -357,7 +383,7 @@ func (m *Memberlist) udpHandler() { case userMsg: m.handleUser(buf, from) default: - m.logger.Printf("[ERR] memberlist: UDP msg type (%d) not supported. From: %s (handler)", msgType, from) + m.logger.Printf("[ERR] memberlist: UDP msg type (%d) not supported %s (handler)", msgType, LogAddress(from)) } case <-m.shutdownCh: @@ -366,46 +392,50 @@ func (m *Memberlist) udpHandler() { } } -func (m *Memberlist) handleCompound(buf []byte, from net.Addr) { +func (m *Memberlist) handleCompound(buf []byte, from net.Addr, timestamp time.Time) { // Decode the parts trunc, parts, err := decodeCompoundMessage(buf) if err != nil { - m.logger.Printf("[ERR] memberlist: Failed to decode compound request: %s", err) + m.logger.Printf("[ERR] memberlist: Failed to decode compound request: %s %s", err, LogAddress(from)) return } // Log any truncation if trunc > 0 { - m.logger.Printf("[WARN] memberlist: Compound request had %d truncated messages", trunc) + m.logger.Printf("[WARN] memberlist: Compound request had %d truncated messages %s", trunc, LogAddress(from)) } // Handle each message for _, part := range parts { - m.handleCommand(part, from) + m.handleCommand(part, from, timestamp) } } func (m *Memberlist) handlePing(buf []byte, from net.Addr) { var p ping if err := decode(buf, &p); err != nil { - m.logger.Printf("[ERR] memberlist: Failed to decode ping request: %s", err) + m.logger.Printf("[ERR] memberlist: Failed to decode ping request: %s %s", err, LogAddress(from)) return } // If node is provided, verify that it is for us if p.Node != "" && p.Node != m.config.Name { - m.logger.Printf("[WARN] memberlist: Got ping for unexpected node '%s'", p.Node) + m.logger.Printf("[WARN] memberlist: Got ping for unexpected node '%s' %s", p.Node, LogAddress(from)) return } - ack := ackResp{p.SeqNo} + var ack ackResp + ack.SeqNo = p.SeqNo + if m.config.Ping != nil { + ack.Payload = m.config.Ping.AckPayload() + } if err := m.encodeAndSendMsg(from, ackRespMsg, &ack); err != nil { - m.logger.Printf("[ERR] memberlist: Failed to send ack: %s", err) + m.logger.Printf("[ERR] memberlist: Failed to send ack: %s %s", err, LogAddress(from)) } } func (m *Memberlist) handleIndirectPing(buf []byte, from net.Addr) { var ind indirectPingReq if err := decode(buf, &ind); err != nil { - m.logger.Printf("[ERR] memberlist: Failed to decode indirect ping request: %s", err) + m.logger.Printf("[ERR] memberlist: Failed to decode indirect ping request: %s %s", err, LogAddress(from)) return } @@ -421,33 +451,33 @@ func (m *Memberlist) handleIndirectPing(buf []byte, from net.Addr) { destAddr := &net.UDPAddr{IP: ind.Target, Port: int(ind.Port)} // Setup a response handler to relay the ack - respHandler := func() { - ack := ackResp{ind.SeqNo} + respHandler := func(payload []byte, timestamp time.Time) { + ack := ackResp{ind.SeqNo, nil} if err := m.encodeAndSendMsg(from, ackRespMsg, &ack); err != nil { - m.logger.Printf("[ERR] memberlist: Failed to forward ack: %s", err) + m.logger.Printf("[ERR] memberlist: Failed to forward ack: %s %s", err, LogAddress(from)) } } m.setAckHandler(localSeqNo, respHandler, m.config.ProbeTimeout) // Send the ping if err := m.encodeAndSendMsg(destAddr, pingMsg, &ping); err != nil { - m.logger.Printf("[ERR] memberlist: Failed to send ping: %s", err) + m.logger.Printf("[ERR] memberlist: Failed to send ping: %s %s", err, LogAddress(from)) } } -func (m *Memberlist) handleAck(buf []byte, from net.Addr) { +func (m *Memberlist) handleAck(buf []byte, from net.Addr, timestamp time.Time) { var ack ackResp if err := decode(buf, &ack); err != nil { - m.logger.Printf("[ERR] memberlist: Failed to decode ack response: %s", err) + m.logger.Printf("[ERR] memberlist: Failed to decode ack response: %s %s", err, LogAddress(from)) return } - m.invokeAckHandler(ack.SeqNo) + m.invokeAckHandler(ack, timestamp) } func (m *Memberlist) handleSuspect(buf []byte, from net.Addr) { var sus suspect if err := decode(buf, &sus); err != nil { - m.logger.Printf("[ERR] memberlist: Failed to decode suspect message: %s", err) + m.logger.Printf("[ERR] memberlist: Failed to decode suspect message: %s %s", err, LogAddress(from)) return } m.suspectNode(&sus) @@ -456,7 +486,7 @@ func (m *Memberlist) handleSuspect(buf []byte, from net.Addr) { func (m *Memberlist) handleAlive(buf []byte, from net.Addr) { var live alive if err := decode(buf, &live); err != nil { - m.logger.Printf("[ERR] memberlist: Failed to decode alive message: %s", err) + m.logger.Printf("[ERR] memberlist: Failed to decode alive message: %s %s", err, LogAddress(from)) return } @@ -472,7 +502,7 @@ func (m *Memberlist) handleAlive(buf []byte, from net.Addr) { func (m *Memberlist) handleDead(buf []byte, from net.Addr) { var d dead if err := decode(buf, &d); err != nil { - m.logger.Printf("[ERR] memberlist: Failed to decode dead message: %s", err) + m.logger.Printf("[ERR] memberlist: Failed to decode dead message: %s %s", err, LogAddress(from)) return } m.deadNode(&d) @@ -487,16 +517,16 @@ func (m *Memberlist) handleUser(buf []byte, from net.Addr) { } // handleCompressed is used to unpack a compressed message -func (m *Memberlist) handleCompressed(buf []byte, from net.Addr) { +func (m *Memberlist) handleCompressed(buf []byte, from net.Addr, timestamp time.Time) { // Try to decode the payload payload, err := decompressPayload(buf) if err != nil { - m.logger.Printf("[ERR] memberlist: Failed to decompress payload: %v", err) + m.logger.Printf("[ERR] memberlist: Failed to decompress payload: %v %s", err, LogAddress(from)) return } // Recursively handle the payload - m.handleCommand(payload, from) + m.handleCommand(payload, from, timestamp) } // encodeAndSendMsg is used to combine the encoding and sending steps @@ -523,7 +553,7 @@ func (m *Memberlist) sendMsg(to net.Addr, msg []byte) error { // Fast path if nothing to piggypack if len(extra) == 0 { - return m.rawSendMsg(to, msg) + return m.rawSendMsgUDP(to, msg) } // Join all the messages @@ -535,11 +565,11 @@ func (m *Memberlist) sendMsg(to net.Addr, msg []byte) error { compound := makeCompoundMessage(msgs) // Send the message - return m.rawSendMsg(to, compound.Bytes()) + return m.rawSendMsgUDP(to, compound.Bytes()) } -// rawSendMsg is used to send a UDP message to another host without modification -func (m *Memberlist) rawSendMsg(to net.Addr, msg []byte) error { +// rawSendMsgUDP is used to send a UDP message to another host without modification +func (m *Memberlist) rawSendMsgUDP(to net.Addr, msg []byte) error { // Check if we have compression enabled if m.config.EnableCompression { buf, err := compressPayload(msg) @@ -571,7 +601,72 @@ func (m *Memberlist) rawSendMsg(to net.Addr, msg []byte) error { return err } -// sendState is used to initiate a push/pull over TCP with a remote node +// rawSendMsgTCP is used to send a TCP message to another host without modification +func (m *Memberlist) rawSendMsgTCP(conn net.Conn, sendBuf []byte) error { + // Check if compresion is enabled + if m.config.EnableCompression { + compBuf, err := compressPayload(sendBuf) + if err != nil { + m.logger.Printf("[ERROR] memberlist: Failed to compress payload: %v", err) + } else { + sendBuf = compBuf.Bytes() + } + } + + // Check if encryption is enabled + if m.config.EncryptionEnabled() { + crypt, err := m.encryptLocalState(sendBuf) + if err != nil { + m.logger.Printf("[ERROR] memberlist: Failed to encrypt local state: %v", err) + return err + } + sendBuf = crypt + } + + // Write out the entire send buffer + metrics.IncrCounter([]string{"memberlist", "tcp", "sent"}, float32(len(sendBuf))) + + if n, err := conn.Write(sendBuf); err != nil { + return err + } else if n != len(sendBuf) { + return fmt.Errorf("only %d of %d bytes written", n, len(sendBuf)) + } + + return nil +} + +// sendTCPUserMsg is used to send a TCP userMsg to another host +func (m *Memberlist) sendTCPUserMsg(to net.Addr, sendBuf []byte) error { + dialer := net.Dialer{Timeout: m.config.TCPTimeout} + conn, err := dialer.Dial("tcp", to.String()) + if err != nil { + return err + } + defer conn.Close() + + bufConn := bytes.NewBuffer(nil) + + if err := bufConn.WriteByte(byte(userMsg)); err != nil { + return err + } + + // Send our node state + header := userMsgHeader{UserMsgLen: len(sendBuf)} + hd := codec.MsgpackHandle{} + enc := codec.NewEncoder(bufConn, &hd) + + if err := enc.Encode(&header); err != nil { + return err + } + + if _, err := bufConn.Write(sendBuf); err != nil { + return err + } + + return m.rawSendMsgTCP(conn, bufConn.Bytes()) +} + +// sendAndReceiveState is used to initiate a push/pull over TCP with a remote node func (m *Memberlist) sendAndReceiveState(addr []byte, port uint16, join bool) ([]pushNodeState, []byte, error) { // Attempt to connect dialer := net.Dialer{Timeout: m.config.TCPTimeout} @@ -589,15 +684,21 @@ func (m *Memberlist) sendAndReceiveState(addr []byte, port uint16, join bool) ([ return nil, nil, err } - // Read remote state - _, remote, userState, err := m.readRemoteState(conn) + conn.SetDeadline(time.Now().Add(m.config.TCPTimeout)) + msgType, bufConn, dec, err := m.readTCP(conn) if err != nil { - err := fmt.Errorf("Reading remote state failed: %v", err) return nil, nil, err } - // Return the remote state - return remote, userState, nil + // Quit if not push/pull + if msgType != pushPullMsg { + err := fmt.Errorf("received invalid msgType (%d), expected pushPullMsg (%d) %s", msgType, pushPullMsg, LogConn(conn)) + return nil, nil, err + } + + // Read remote state + _, remoteNodes, userState, err := m.readRemoteState(bufConn, dec) + return remoteNodes, userState, err } // sendLocalState is invoked to send our local state over a tcp connection @@ -658,34 +759,7 @@ func (m *Memberlist) sendLocalState(conn net.Conn, join bool) error { } // Get the send buffer - sendBuf := bufConn.Bytes() - - // Check if compresion is enabled - if m.config.EnableCompression { - compBuf, err := compressPayload(bufConn.Bytes()) - if err != nil { - m.logger.Printf("[ERROR] memberlist: Failed to compress local state: %v", err) - } else { - sendBuf = compBuf.Bytes() - } - } - - // Check if encryption is enabled - if m.config.EncryptionEnabled() { - crypt, err := m.encryptLocalState(sendBuf) - if err != nil { - m.logger.Printf("[ERROR] memberlist: Failed to encrypt local state: %v", err) - return err - } - sendBuf = crypt - } - - // Write out the entire send buffer - metrics.IncrCounter([]string{"memberlist", "tcp", "sent"}, float32(len(sendBuf))) - if _, err := conn.Write(sendBuf); err != nil { - return err - } - return nil + return m.rawSendMsgTCP(conn, bufConn.Bytes()) } // encryptLocalState is used to help encrypt local state before sending @@ -743,38 +817,36 @@ func (m *Memberlist) decryptRemoteState(bufConn io.Reader) ([]byte, error) { return decryptPayload(keys, cipherBytes, dataBytes) } -// recvRemoteState is used to read the remote state from a connection -func (m *Memberlist) readRemoteState(conn net.Conn) (bool, []pushNodeState, []byte, error) { - // Setup a deadline - conn.SetDeadline(time.Now().Add(m.config.TCPTimeout)) - +// readTCP is used to read the start of a TCP stream. +// it decrypts and decompresses the stream if necessary +func (m *Memberlist) readTCP(conn net.Conn) (messageType, io.Reader, *codec.Decoder, error) { // Created a buffered reader var bufConn io.Reader = bufio.NewReader(conn) // Read the message type buf := [1]byte{0} if _, err := bufConn.Read(buf[:]); err != nil { - return false, nil, nil, err + return 0, nil, nil, err } msgType := messageType(buf[0]) // Check if the message is encrypted if msgType == encryptMsg { if !m.config.EncryptionEnabled() { - return false, nil, nil, + return 0, nil, nil, fmt.Errorf("Remote state is encrypted and encryption is not configured") } plain, err := m.decryptRemoteState(bufConn) if err != nil { - return false, nil, nil, err + return 0, nil, nil, err } // Reset message type and bufConn msgType = messageType(plain[0]) bufConn = bytes.NewReader(plain[1:]) } else if m.config.EncryptionEnabled() { - return false, nil, nil, + return 0, nil, nil, fmt.Errorf("Encryption is configured but remote state is not encrypted") } @@ -786,11 +858,11 @@ func (m *Memberlist) readRemoteState(conn net.Conn) (bool, []pushNodeState, []by if msgType == compressMsg { var c compress if err := dec.Decode(&c); err != nil { - return false, nil, nil, err + return 0, nil, nil, err } decomp, err := decompressBuffer(&c) if err != nil { - return false, nil, nil, err + return 0, nil, nil, err } // Reset the message type @@ -803,12 +875,11 @@ func (m *Memberlist) readRemoteState(conn net.Conn) (bool, []pushNodeState, []by dec = codec.NewDecoder(bufConn, &hd) } - // Quit if not push/pull - if msgType != pushPullMsg { - err := fmt.Errorf("received invalid msgType (%d)", msgType) - return false, nil, nil, err - } + return msgType, bufConn, dec, nil +} +// readRemoteState is used to read the remote state from a connection +func (m *Memberlist) readRemoteState(bufConn io.Reader, dec *codec.Decoder) (bool, []pushNodeState, []byte, error) { // Read the push/pull header var header pushPullHeader if err := dec.Decode(&header); err != nil { @@ -821,7 +892,7 @@ func (m *Memberlist) readRemoteState(conn net.Conn) (bool, []pushNodeState, []by // Try to decode all the states for i := 0; i < header.Nodes; i++ { if err := dec.Decode(&remoteNodes[i]); err != nil { - return false, remoteNodes, nil, err + return false, nil, nil, err } } @@ -836,7 +907,7 @@ func (m *Memberlist) readRemoteState(conn net.Conn) (bool, []pushNodeState, []by bytes, header.UserStateLen) } if err != nil { - return false, remoteNodes, nil, err + return false, nil, nil, err } } @@ -850,3 +921,119 @@ func (m *Memberlist) readRemoteState(conn net.Conn) (bool, []pushNodeState, []by return header.Join, remoteNodes, userBuf, nil } + +// mergeRemoteState is used to merge the remote state with our local state +func (m *Memberlist) mergeRemoteState(join bool, remoteNodes []pushNodeState, userBuf []byte) error { + if err := m.verifyProtocol(remoteNodes); err != nil { + return err + } + + // Invoke the merge delegate if any + if join && m.config.Merge != nil { + nodes := make([]*Node, len(remoteNodes)) + for idx, n := range remoteNodes { + nodes[idx] = &Node{ + Name: n.Name, + Addr: n.Addr, + Port: n.Port, + Meta: n.Meta, + PMin: n.Vsn[0], + PMax: n.Vsn[1], + PCur: n.Vsn[2], + DMin: n.Vsn[3], + DMax: n.Vsn[4], + DCur: n.Vsn[5], + } + } + if err := m.config.Merge.NotifyMerge(nodes); err != nil { + return err + } + } + + // Merge the membership state + m.mergeState(remoteNodes) + + // Invoke the delegate for user state + if userBuf != nil && m.config.Delegate != nil { + m.config.Delegate.MergeRemoteState(userBuf, join) + } + return nil +} + +// readUserMsg is used to decode a userMsg from a TCP stream +func (m *Memberlist) readUserMsg(bufConn io.Reader, dec *codec.Decoder) error { + // Read the user message header + var header userMsgHeader + if err := dec.Decode(&header); err != nil { + return err + } + + // Read the user message into a buffer + var userBuf []byte + if header.UserMsgLen > 0 { + userBuf = make([]byte, header.UserMsgLen) + bytes, err := io.ReadAtLeast(bufConn, userBuf, header.UserMsgLen) + if err == nil && bytes != header.UserMsgLen { + err = fmt.Errorf( + "Failed to read full user message (%d / %d)", + bytes, header.UserMsgLen) + } + if err != nil { + return err + } + + d := m.config.Delegate + if d != nil { + d.NotifyMsg(userBuf) + } + } + + return nil +} + +// sendPingAndWaitForAck makes a TCP connection to the given address, sends +// a ping, and waits for an ack. All of this is done as a series of blocking +// operations, given the deadline. The bool return parameter is true if we +// we able to round trip a ping to the other node. +func (m *Memberlist) sendPingAndWaitForAck(destAddr net.Addr, ping ping, deadline time.Time) (bool, error) { + dialer := net.Dialer{Deadline: deadline} + conn, err := dialer.Dial("tcp", destAddr.String()) + if err != nil { + // If the node is actually dead we expect this to fail, so we + // shouldn't spam the logs with it. After this point, errors + // with the connection are real, unexpected errors and should + // get propagated up. + return false, nil + } + defer conn.Close() + conn.SetDeadline(deadline) + + out, err := encode(pingMsg, &ping) + if err != nil { + return false, err + } + + if err = m.rawSendMsgTCP(conn, out.Bytes()); err != nil { + return false, err + } + + msgType, _, dec, err := m.readTCP(conn) + if err != nil { + return false, err + } + + if msgType != ackRespMsg { + return false, fmt.Errorf("Unexpected msgType (%d) from TCP ping %s", msgType, LogConn(conn)) + } + + var ack ackResp + if err = dec.Decode(&ack); err != nil { + return false, err + } + + if ack.SeqNo != ping.SeqNo { + return false, fmt.Errorf("Sequence number from ack (%d) doesn't match ping (%d) from TCP ping %s", ack.SeqNo, ping.SeqNo, LogConn(conn)) + } + + return true, nil +} diff --git a/vendor/src/github.com/hashicorp/memberlist/ping_delegate.go b/vendor/src/github.com/hashicorp/memberlist/ping_delegate.go new file mode 100644 index 0000000000..1566c8b3d5 --- /dev/null +++ b/vendor/src/github.com/hashicorp/memberlist/ping_delegate.go @@ -0,0 +1,14 @@ +package memberlist + +import "time" + +// PingDelegate is used to notify an observer how long it took for a ping message to +// complete a round trip. It can also be used for writing arbitrary byte slices +// into ack messages. Note that in order to be meaningful for RTT estimates, this +// delegate does not apply to indirect pings, nor fallback pings sent over TCP. +type PingDelegate interface { + // AckPayload is invoked when an ack is being sent; the returned bytes will be appended to the ack + AckPayload() []byte + // NotifyPing is invoked when an ack for a ping is received + NotifyPingComplete(other *Node, rtt time.Duration, payload []byte) +} diff --git a/vendor/src/github.com/hashicorp/memberlist/state.go b/vendor/src/github.com/hashicorp/memberlist/state.go index 3fc1d02e19..d0339bd158 100644 --- a/vendor/src/github.com/hashicorp/memberlist/state.go +++ b/vendor/src/github.com/hashicorp/memberlist/state.go @@ -44,10 +44,20 @@ type nodeState struct { // ackHandler is used to register handlers for incoming acks type ackHandler struct { - handler func() + handler func([]byte, time.Time) timer *time.Timer } +// NoPingResponseError is used to indicate a 'ping' packet was +// successfully issued but no response was received +type NoPingResponseError struct { + node string +} + +func (f NoPingResponseError) Error() string { + return fmt.Sprintf("No response from node %s", f.node) +} + // Schedule is used to ensure the Tick is performed periodically. This // function is safe to call multiple times. If the memberlist is already // scheduled, then it won't do anything. @@ -128,9 +138,7 @@ func (m *Memberlist) pushPullTrigger(stop <-chan struct{}) { // Tick using a dynamic timer for { - m.nodeLock.RLock() - tickTime := pushPullScale(interval, len(m.nodes)) - m.nodeLock.RUnlock() + tickTime := pushPullScale(interval, m.estNumNodes()) select { case <-time.After(tickTime): m.pushPull() @@ -207,46 +215,55 @@ START: m.probeNode(&node) } -// probeNode handles a single round of failure checking on a node +// probeNode handles a single round of failure checking on a node. func (m *Memberlist) probeNode(node *nodeState) { defer metrics.MeasureSince([]string{"memberlist", "probeNode"}, time.Now()) - // Send a ping to the node + // Prepare a ping message and setup an ack handler. ping := ping{SeqNo: m.nextSeqNo(), Node: node.Name} - destAddr := &net.UDPAddr{IP: node.Addr, Port: int(node.Port)} - - // Setup an ack handler - ackCh := make(chan bool, m.config.IndirectChecks+1) + ackCh := make(chan ackMessage, m.config.IndirectChecks+1) m.setAckChannel(ping.SeqNo, ackCh, m.config.ProbeInterval) - // Send the ping message + // Send a ping to the node. + deadline := time.Now().Add(m.config.ProbeInterval) + destAddr := &net.UDPAddr{IP: node.Addr, Port: int(node.Port)} if err := m.encodeAndSendMsg(destAddr, pingMsg, &ping); err != nil { m.logger.Printf("[ERR] memberlist: Failed to send ping: %s", err) return } - // Wait for response or round-trip-time + // Mark the sent time here, which should be after any pre-processing and + // system calls to do the actual send. This probably under-reports a bit, + // but it's the best we can do. + sent := time.Now() + + // Wait for response or round-trip-time. select { case v := <-ackCh: - if v == true { + if v.Complete == true { + if m.config.Ping != nil { + rtt := v.Timestamp.Sub(sent) + m.config.Ping.NotifyPingComplete(&node.Node, rtt, v.Payload) + } return } // As an edge case, if we get a timeout, we need to re-enqueue it - // here to break out of the select below - if v == false { + // here to break out of the select below. + if v.Complete == false { ackCh <- v } case <-time.After(m.config.ProbeTimeout): + m.logger.Printf("[DEBUG] memberlist: Failed UDP ping: %v (timeout reached)", node.Name) } - // Get some random live nodes + // Get some random live nodes. m.nodeLock.RLock() excludes := []string{m.config.Name, node.Name} kNodes := kRandomNodes(m.config.IndirectChecks, excludes, m.nodes) m.nodeLock.RUnlock() - // Attempt an indirect ping + // Attempt an indirect ping. ind := indirectPingReq{SeqNo: ping.SeqNo, Target: node.Addr, Port: node.Port, Node: node.Name} for _, peer := range kNodes { destAddr := &net.UDPAddr{IP: peer.Addr, Port: int(peer.Port)} @@ -255,10 +272,49 @@ func (m *Memberlist) probeNode(node *nodeState) { } } - // Wait for the acks or timeout + // Also make an attempt to contact the node directly over TCP. This + // helps prevent confused clients who get isolated from UDP traffic + // but can still speak TCP (which also means they can possibly report + // misinformation to other nodes via anti-entropy), avoiding flapping in + // the cluster. + // + // This is a little unusual because we will attempt a TCP ping to any + // member who understands version 3 of the protocol, regardless of + // which protocol version we are speaking. That's why we've included a + // config option to turn this off if desired. + fallbackCh := make(chan bool, 1) + if (!m.config.DisableTcpPings) && (node.PMax >= 3) { + destAddr := &net.TCPAddr{IP: node.Addr, Port: int(node.Port)} + go func() { + defer close(fallbackCh) + didContact, err := m.sendPingAndWaitForAck(destAddr, ping, deadline) + if err != nil { + m.logger.Printf("[ERR] memberlist: Failed TCP fallback ping: %s", err) + } else { + fallbackCh <- didContact + } + }() + } else { + close(fallbackCh) + } + + // Wait for the acks or timeout. Note that we don't check the fallback + // channel here because we want to issue a warning below if that's the + // *only* way we hear back from the peer, so we have to let this time + // out first to allow the normal UDP-based acks to come in. select { case v := <-ackCh: - if v == true { + if v.Complete == true { + return + } + } + + // Finally, poll the fallback channel. The timeouts are set such that + // the channel will have something or be closed without having to wait + // any additional time here. + for didContact := range fallbackCh { + if didContact { + m.logger.Printf("[WARN] memberlist: Was able to reach %s via TCP but not UDP, network may be misconfigured and not allowing bidirectional UDP", node.Name) return } } @@ -269,6 +325,37 @@ func (m *Memberlist) probeNode(node *nodeState) { m.suspectNode(&s) } +// Ping initiates a ping to the node with the specified name. +func (m *Memberlist) Ping(node string, addr net.Addr) (time.Duration, error) { + // Prepare a ping message and setup an ack handler. + ping := ping{SeqNo: m.nextSeqNo(), Node: node} + ackCh := make(chan ackMessage, m.config.IndirectChecks+1) + m.setAckChannel(ping.SeqNo, ackCh, m.config.ProbeInterval) + + // Send a ping to the node. + if err := m.encodeAndSendMsg(addr, pingMsg, &ping); err != nil { + return 0, err + } + + // Mark the sent time here, which should be after any pre-processing and + // system calls to do the actual send. This probably under-reports a bit, + // but it's the best we can do. + sent := time.Now() + + // Wait for response or timeout. + select { + case v := <-ackCh: + if v.Complete == true { + return v.Timestamp.Sub(sent), nil + } + case <-time.After(m.config.ProbeTimeout): + // Timeout, return an error below. + } + + m.logger.Printf("[DEBUG] memberlist: Failed UDP ping: %v (timeout reached)", node) + return 0, NoPingResponseError{ping.Node} +} + // resetNodes is used when the tick wraps around. It will reap the // dead nodes and shuffle the node list. func (m *Memberlist) resetNodes() { @@ -287,6 +374,9 @@ func (m *Memberlist) resetNodes() { // Trim the nodes to exclude the dead nodes m.nodes = m.nodes[0:deadIdx] + // Update numNodes after we've trimmed the dead nodes + atomic.StoreUint32(&m.numNodes, uint32(deadIdx)) + // Shuffle live nodes shuffleNodes(m.nodes) } @@ -320,7 +410,7 @@ func (m *Memberlist) gossip() { // Send the compound message destAddr := &net.UDPAddr{IP: node.Addr, Port: int(node.Port)} - if err := m.rawSendMsg(destAddr, compound.Bytes()); err != nil { + if err := m.rawSendMsgUDP(destAddr, compound.Bytes()); err != nil { m.logger.Printf("[ERR] memberlist: Failed to send gossip to %s: %s", destAddr, err) } } @@ -359,40 +449,9 @@ func (m *Memberlist) pushPullNode(addr []byte, port uint16, join bool) error { return err } - if err := m.verifyProtocol(remote); err != nil { + if err := m.mergeRemoteState(join, remote, userState); err != nil { return err } - - // Invoke the merge delegate if any - if join && m.config.Merge != nil { - nodes := make([]*Node, len(remote)) - for idx, n := range remote { - nodes[idx] = &Node{ - Name: n.Name, - Addr: n.Addr, - Port: n.Port, - Meta: n.Meta, - PMin: n.Vsn[0], - PMax: n.Vsn[1], - PCur: n.Vsn[2], - DMin: n.Vsn[3], - DMax: n.Vsn[4], - DCur: n.Vsn[5], - } - } - if m.config.Merge.NotifyMerge(nodes) { - m.logger.Printf("[WARN] memberlist: Cluster merge canceled") - return fmt.Errorf("Merge canceled") - } - } - - // Merge the state - m.mergeState(remote) - - // Invoke the delegate - if m.config.Delegate != nil { - m.config.Delegate.MergeRemoteState(userState, join) - } return nil } @@ -525,14 +584,24 @@ func (m *Memberlist) nextIncarnation() uint32 { return atomic.AddUint32(&m.incarnation, 1) } -// setAckChannel is used to attach a channel to receive a message when -// an ack with a given sequence number is received. The channel gets sent -// false on timeout -func (m *Memberlist) setAckChannel(seqNo uint32, ch chan bool, timeout time.Duration) { +// estNumNodes is used to get the current estimate of the number of nodes +func (m *Memberlist) estNumNodes() int { + return int(atomic.LoadUint32(&m.numNodes)) +} + +type ackMessage struct { + Complete bool + Payload []byte + Timestamp time.Time +} + +// setAckChannel is used to attach a channel to receive a message when an ack with a given +// sequence number is received. The `complete` field of the message will be false on timeout +func (m *Memberlist) setAckChannel(seqNo uint32, ch chan ackMessage, timeout time.Duration) { // Create a handler function - handler := func() { + handler := func(payload []byte, timestamp time.Time) { select { - case ch <- true: + case ch <- ackMessage{true, payload, timestamp}: default: } } @@ -549,7 +618,7 @@ func (m *Memberlist) setAckChannel(seqNo uint32, ch chan bool, timeout time.Dura delete(m.ackHandlers, seqNo) m.ackLock.Unlock() select { - case ch <- false: + case ch <- ackMessage{false, nil, time.Now()}: default: } }) @@ -558,7 +627,7 @@ func (m *Memberlist) setAckChannel(seqNo uint32, ch chan bool, timeout time.Dura // setAckHandler is used to attach a handler to be invoked when an // ack with a given sequence number is received. If a timeout is reached, // the handler is deleted -func (m *Memberlist) setAckHandler(seqNo uint32, handler func(), timeout time.Duration) { +func (m *Memberlist) setAckHandler(seqNo uint32, handler func([]byte, time.Time), timeout time.Duration) { // Add the handler ah := &ackHandler{handler, nil} m.ackLock.Lock() @@ -574,16 +643,16 @@ func (m *Memberlist) setAckHandler(seqNo uint32, handler func(), timeout time.Du } // Invokes an Ack handler if any is associated, and reaps the handler immediately -func (m *Memberlist) invokeAckHandler(seqNo uint32) { +func (m *Memberlist) invokeAckHandler(ack ackResp, timestamp time.Time) { m.ackLock.Lock() - ah, ok := m.ackHandlers[seqNo] - delete(m.ackHandlers, seqNo) + ah, ok := m.ackHandlers[ack.SeqNo] + delete(m.ackHandlers, ack.SeqNo) m.ackLock.Unlock() if !ok { return } ah.timer.Stop() - ah.handler() + ah.handler(ack.Payload, timestamp) } // aliveNode is invoked by the network layer when we get a message about a @@ -601,6 +670,30 @@ func (m *Memberlist) aliveNode(a *alive, notify chan struct{}, bootstrap bool) { return } + // Invoke the Alive delegate if any. This can be used to filter out + // alive messages based on custom logic. For example, using a cluster name. + // Using a merge delegate is not enough, as it is possible for passive + // cluster merging to still occur. + if m.config.Alive != nil { + node := &Node{ + Name: a.Node, + Addr: a.Addr, + Port: a.Port, + Meta: a.Meta, + PMin: a.Vsn[0], + PMax: a.Vsn[1], + PCur: a.Vsn[2], + DMin: a.Vsn[3], + DMax: a.Vsn[4], + DCur: a.Vsn[5], + } + if err := m.config.Alive.NotifyAlive(node); err != nil { + m.logger.Printf("[WARN] memberlist: ignoring alive message for '%s': %s", + a.Node, err) + return + } + } + // Check if we've never seen this node before, and if not, then // store this node in our node map. if !ok { @@ -627,6 +720,9 @@ func (m *Memberlist) aliveNode(a *alive, notify chan struct{}, bootstrap bool) { // Add at the end and swap with the node at the offset m.nodes = append(m.nodes, state) m.nodes[offset], m.nodes[n] = m.nodes[n], m.nodes[offset] + + // Update numNodes after we've added a new node + atomic.AddUint32(&m.numNodes, 1) } // Check if this address is different than the existing node @@ -658,9 +754,6 @@ func (m *Memberlist) aliveNode(a *alive, notify chan struct{}, bootstrap bool) { return } - // Update metrics - metrics.IncrCounter([]string{"memberlist", "msg", "alive"}, 1) - // Store the old state and meta data oldState := state.State oldMeta := state.Meta @@ -728,6 +821,9 @@ func (m *Memberlist) aliveNode(a *alive, notify chan struct{}, bootstrap bool) { } } + // Update metrics + metrics.IncrCounter([]string{"memberlist", "msg", "alive"}, 1) + // Notify the delegate of any relevant updates if m.config.Events != nil { if oldState == stateDead { @@ -799,7 +895,7 @@ func (m *Memberlist) suspectNode(s *suspect) { state.StateChange = changeTime // Setup a timeout for this - timeout := suspicionTimeout(m.config.SuspicionMult, len(m.nodes), m.config.ProbeInterval) + timeout := suspicionTimeout(m.config.SuspicionMult, m.estNumNodes(), m.config.ProbeInterval) time.AfterFunc(timeout, func() { m.nodeLock.Lock() state, ok := m.nodeMap[s.Node] diff --git a/vendor/src/github.com/hashicorp/memberlist/util.go b/vendor/src/github.com/hashicorp/memberlist/util.go index 27f9f2a3ca..7a59e3b370 100644 --- a/vendor/src/github.com/hashicorp/memberlist/util.go +++ b/vendor/src/github.com/hashicorp/memberlist/util.go @@ -5,12 +5,14 @@ import ( "compress/lzw" "encoding/binary" "fmt" - "github.com/hashicorp/go-msgpack/codec" "io" "math" "math/rand" "net" + "strings" "time" + + "github.com/hashicorp/go-msgpack/codec" ) // pushPullScale is the minimum number of nodes @@ -23,8 +25,11 @@ const pushPullScaleThreshold = 32 /* * Contains an entry for each private block: * 10.0.0.0/8 + * 100.64.0.0/10 + * 127.0.0.0/8 + * 169.254.0.0/16 * 172.16.0.0/12 - * 192.168/16 + * 192.168.0.0/16 */ var privateBlocks []*net.IPNet @@ -40,25 +45,44 @@ func init() { rand.Seed(time.Now().UnixNano()) // Add each private block - privateBlocks = make([]*net.IPNet, 3) + privateBlocks = make([]*net.IPNet, 6) + _, block, err := net.ParseCIDR("10.0.0.0/8") if err != nil { panic(fmt.Sprintf("Bad cidr. Got %v", err)) } privateBlocks[0] = block - _, block, err = net.ParseCIDR("172.16.0.0/12") + _, block, err = net.ParseCIDR("100.64.0.0/10") if err != nil { panic(fmt.Sprintf("Bad cidr. Got %v", err)) } privateBlocks[1] = block - _, block, err = net.ParseCIDR("192.168.0.0/16") + _, block, err = net.ParseCIDR("127.0.0.0/8") if err != nil { panic(fmt.Sprintf("Bad cidr. Got %v", err)) } privateBlocks[2] = block + _, block, err = net.ParseCIDR("169.254.0.0/16") + if err != nil { + panic(fmt.Sprintf("Bad cidr. Got %v", err)) + } + privateBlocks[3] = block + + _, block, err = net.ParseCIDR("172.16.0.0/12") + if err != nil { + panic(fmt.Sprintf("Bad cidr. Got %v", err)) + } + privateBlocks[4] = block + + _, block, err = net.ParseCIDR("192.168.0.0/16") + if err != nil { + panic(fmt.Sprintf("Bad cidr. Got %v", err)) + } + privateBlocks[5] = block + _, block, err = net.ParseCIDR("127.0.0.0/8") if err != nil { panic(fmt.Sprintf("Bad cidr. Got %v", err)) @@ -84,6 +108,42 @@ func encode(msgType messageType, in interface{}) (*bytes.Buffer, error) { return buf, err } +// GetPrivateIP returns the first private IP address found in a list of +// addresses. +func GetPrivateIP(addresses []net.Addr) (net.IP, error) { + var candidates []net.IP + + // Find private IPv4 address + for _, rawAddr := range addresses { + var ip net.IP + switch addr := rawAddr.(type) { + case *net.IPAddr: + ip = addr.IP + case *net.IPNet: + ip = addr.IP + default: + continue + } + + if ip.To4() == nil { + continue + } + if !IsPrivateIP(ip.String()) { + continue + } + candidates = append(candidates, ip) + } + numIps := len(candidates) + switch numIps { + case 0: + return nil, fmt.Errorf("No private IP address found") + case 1: + return candidates[0], nil + default: + return nil, fmt.Errorf("Multiple private IPs found. Please configure one.") + } +} + // Returns a random offset between 0 and n func randomOffset(n int) int { if n == 0 { @@ -107,9 +167,10 @@ func retransmitLimit(retransmitMult, n int) int { return limit } -// shuffleNodes randomly shuffles the input nodes +// shuffleNodes randomly shuffles the input nodes using the Fisher-Yates shuffle func shuffleNodes(nodes []*nodeState) { - for i := range nodes { + n := len(nodes) + for i := n - 1; i > 0; i-- { j := rand.Intn(i + 1) nodes[i], nodes[j] = nodes[j], nodes[i] } @@ -250,7 +311,7 @@ func decodeCompoundMessage(buf []byte) (trunc int, parts [][]byte, err error) { } // Returns if the given IP is in a private block -func isPrivateIP(ip_str string) bool { +func IsPrivateIP(ip_str string) bool { ip := net.ParseIP(ip_str) for _, priv := range privateBlocks { if priv.Contains(ip) { @@ -266,6 +327,12 @@ func isLoopbackIP(ip_str string) bool { return loopbackBlock.Contains(ip) } +// Given a string of the form "host", "host:port", or "[ipv6::address]:port", +// return true if the string includes a port. +func hasPort(s string) bool { + return strings.LastIndex(s, ":") > strings.LastIndex(s, "]") +} + // compressPayload takes an opaque input buffer, compresses it // and wraps it in a compress{} message that is encoded. func compressPayload(inp []byte) (*bytes.Buffer, error) { diff --git a/vendor/src/github.com/hashicorp/serf/coordinate/client.go b/vendor/src/github.com/hashicorp/serf/coordinate/client.go new file mode 100644 index 0000000000..613bfff89e --- /dev/null +++ b/vendor/src/github.com/hashicorp/serf/coordinate/client.go @@ -0,0 +1,180 @@ +package coordinate + +import ( + "fmt" + "math" + "sort" + "sync" + "time" +) + +// Client manages the estimated network coordinate for a given node, and adjusts +// it as the node observes round trip times and estimated coordinates from other +// nodes. The core algorithm is based on Vivaldi, see the documentation for Config +// for more details. +type Client struct { + // coord is the current estimate of the client's network coordinate. + coord *Coordinate + + // origin is a coordinate sitting at the origin. + origin *Coordinate + + // config contains the tuning parameters that govern the performance of + // the algorithm. + config *Config + + // adjustmentIndex is the current index into the adjustmentSamples slice. + adjustmentIndex uint + + // adjustment is used to store samples for the adjustment calculation. + adjustmentSamples []float64 + + // latencyFilterSamples is used to store the last several RTT samples, + // keyed by node name. We will use the config's LatencyFilterSamples + // value to determine how many samples we keep, per node. + latencyFilterSamples map[string][]float64 + + // mutex enables safe concurrent access to the client. + mutex sync.RWMutex +} + +// NewClient creates a new Client and verifies the configuration is valid. +func NewClient(config *Config) (*Client, error) { + if !(config.Dimensionality > 0) { + return nil, fmt.Errorf("dimensionality must be >0") + } + + return &Client{ + coord: NewCoordinate(config), + origin: NewCoordinate(config), + config: config, + adjustmentIndex: 0, + adjustmentSamples: make([]float64, config.AdjustmentWindowSize), + latencyFilterSamples: make(map[string][]float64), + }, nil +} + +// GetCoordinate returns a copy of the coordinate for this client. +func (c *Client) GetCoordinate() *Coordinate { + c.mutex.RLock() + defer c.mutex.RUnlock() + + return c.coord.Clone() +} + +// SetCoordinate forces the client's coordinate to a known state. +func (c *Client) SetCoordinate(coord *Coordinate) { + c.mutex.Lock() + defer c.mutex.Unlock() + + c.coord = coord.Clone() +} + +// ForgetNode removes any client state for the given node. +func (c *Client) ForgetNode(node string) { + c.mutex.Lock() + defer c.mutex.Unlock() + + delete(c.latencyFilterSamples, node) +} + +// latencyFilter applies a simple moving median filter with a new sample for +// a node. This assumes that the mutex has been locked already. +func (c *Client) latencyFilter(node string, rttSeconds float64) float64 { + samples, ok := c.latencyFilterSamples[node] + if !ok { + samples = make([]float64, 0, c.config.LatencyFilterSize) + } + + // Add the new sample and trim the list, if needed. + samples = append(samples, rttSeconds) + if len(samples) > int(c.config.LatencyFilterSize) { + samples = samples[1:] + } + c.latencyFilterSamples[node] = samples + + // Sort a copy of the samples and return the median. + sorted := make([]float64, len(samples)) + copy(sorted, samples) + sort.Float64s(sorted) + return sorted[len(sorted)/2] +} + +// updateVivialdi updates the Vivaldi portion of the client's coordinate. This +// assumes that the mutex has been locked already. +func (c *Client) updateVivaldi(other *Coordinate, rttSeconds float64) { + const zeroThreshold = 1.0e-6 + + dist := c.coord.DistanceTo(other).Seconds() + if rttSeconds < zeroThreshold { + rttSeconds = zeroThreshold + } + wrongness := math.Abs(dist-rttSeconds) / rttSeconds + + totalError := c.coord.Error + other.Error + if totalError < zeroThreshold { + totalError = zeroThreshold + } + weight := c.coord.Error / totalError + + c.coord.Error = c.config.VivaldiCE*weight*wrongness + c.coord.Error*(1.0-c.config.VivaldiCE*weight) + if c.coord.Error > c.config.VivaldiErrorMax { + c.coord.Error = c.config.VivaldiErrorMax + } + + delta := c.config.VivaldiCC * weight + force := delta * (rttSeconds - dist) + c.coord = c.coord.ApplyForce(c.config, force, other) +} + +// updateAdjustment updates the adjustment portion of the client's coordinate, if +// the feature is enabled. This assumes that the mutex has been locked already. +func (c *Client) updateAdjustment(other *Coordinate, rttSeconds float64) { + if c.config.AdjustmentWindowSize == 0 { + return + } + + // Note that the existing adjustment factors don't figure in to this + // calculation so we use the raw distance here. + dist := c.coord.rawDistanceTo(other) + c.adjustmentSamples[c.adjustmentIndex] = rttSeconds - dist + c.adjustmentIndex = (c.adjustmentIndex + 1) % c.config.AdjustmentWindowSize + + sum := 0.0 + for _, sample := range c.adjustmentSamples { + sum += sample + } + c.coord.Adjustment = sum / (2.0 * float64(c.config.AdjustmentWindowSize)) +} + +// updateGravity applies a small amount of gravity to pull coordinates towards +// the center of the coordinate system to combat drift. This assumes that the +// mutex is locked already. +func (c *Client) updateGravity() { + dist := c.origin.DistanceTo(c.coord).Seconds() + force := -1.0 * math.Pow(dist/c.config.GravityRho, 2.0) + c.coord = c.coord.ApplyForce(c.config, force, c.origin) +} + +// Update takes other, a coordinate for another node, and rtt, a round trip +// time observation for a ping to that node, and updates the estimated position of +// the client's coordinate. Returns the updated coordinate. +func (c *Client) Update(node string, other *Coordinate, rtt time.Duration) *Coordinate { + c.mutex.Lock() + defer c.mutex.Unlock() + + rttSeconds := c.latencyFilter(node, rtt.Seconds()) + c.updateVivaldi(other, rttSeconds) + c.updateAdjustment(other, rttSeconds) + c.updateGravity() + return c.coord.Clone() +} + +// DistanceTo returns the estimated RTT from the client's coordinate to other, the +// coordinate for another node. +func (c *Client) DistanceTo(other *Coordinate) time.Duration { + c.mutex.RLock() + defer c.mutex.RUnlock() + + return c.coord.DistanceTo(other) +} diff --git a/vendor/src/github.com/hashicorp/serf/coordinate/config.go b/vendor/src/github.com/hashicorp/serf/coordinate/config.go new file mode 100644 index 0000000000..a5b3aadfe4 --- /dev/null +++ b/vendor/src/github.com/hashicorp/serf/coordinate/config.go @@ -0,0 +1,70 @@ +package coordinate + +// Config is used to set the parameters of the Vivaldi-based coordinate mapping +// algorithm. +// +// The following references are called out at various points in the documentation +// here: +// +// [1] Dabek, Frank, et al. "Vivaldi: A decentralized network coordinate system." +// ACM SIGCOMM Computer Communication Review. Vol. 34. No. 4. ACM, 2004. +// [2] Ledlie, Jonathan, Paul Gardner, and Margo I. Seltzer. "Network Coordinates +// in the Wild." NSDI. Vol. 7. 2007. +// [3] Lee, Sanghwan, et al. "On suitability of Euclidean embedding for +// host-based network coordinate systems." Networking, IEEE/ACM Transactions +// on 18.1 (2010): 27-40. +type Config struct { + // The dimensionality of the coordinate system. As discussed in [2], more + // dimensions improves the accuracy of the estimates up to a point. Per [2] + // we chose 4 dimensions plus a non-Euclidean height. + Dimensionality uint + + // VivaldiErrorMax is the default error value when a node hasn't yet made + // any observations. It also serves as an upper limit on the error value in + // case observations cause the error value to increase without bound. + VivaldiErrorMax float64 + + // VivaldiCE is a tuning factor that controls the maximum impact an + // observation can have on a node's confidence. See [1] for more details. + VivaldiCE float64 + + // VivaldiCC is a tuning factor that controls the maximum impact an + // observation can have on a node's coordinate. See [1] for more details. + VivaldiCC float64 + + // AdjustmentWindowSize is a tuning factor that determines how many samples + // we retain to calculate the adjustment factor as discussed in [3]. Setting + // this to zero disables this feature. + AdjustmentWindowSize uint + + // HeightMin is the minimum value of the height parameter. Since this + // always must be positive, it will introduce a small amount error, so + // the chosen value should be relatively small compared to "normal" + // coordinates. + HeightMin float64 + + // LatencyFilterSamples is the maximum number of samples that are retained + // per node, in order to compute a median. The intent is to ride out blips + // but still keep the delay low, since our time to probe any given node is + // pretty infrequent. See [2] for more details. + LatencyFilterSize uint + + // GravityRho is a tuning factor that sets how much gravity has an effect + // to try to re-center coordinates. See [2] for more details. + GravityRho float64 +} + +// DefaultConfig returns a Config that has some default values suitable for +// basic testing of the algorithm, but not tuned to any particular type of cluster. +func DefaultConfig() *Config { + return &Config{ + Dimensionality: 8, + VivaldiErrorMax: 1.5, + VivaldiCE: 0.25, + VivaldiCC: 0.25, + AdjustmentWindowSize: 20, + HeightMin: 10.0e-6, + LatencyFilterSize: 3, + GravityRho: 150.0, + } +} diff --git a/vendor/src/github.com/hashicorp/serf/coordinate/coordinate.go b/vendor/src/github.com/hashicorp/serf/coordinate/coordinate.go new file mode 100644 index 0000000000..c9194e048b --- /dev/null +++ b/vendor/src/github.com/hashicorp/serf/coordinate/coordinate.go @@ -0,0 +1,183 @@ +package coordinate + +import ( + "math" + "math/rand" + "time" +) + +// Coordinate is a specialized structure for holding network coordinates for the +// Vivaldi-based coordinate mapping algorithm. All of the fields should be public +// to enable this to be serialized. All values in here are in units of seconds. +type Coordinate struct { + // Vec is the Euclidean portion of the coordinate. This is used along + // with the other fields to provide an overall distance estimate. The + // units here are seconds. + Vec []float64 + + // Err reflects the confidence in the given coordinate and is updated + // dynamically by the Vivaldi Client. This is dimensionless. + Error float64 + + // Adjustment is a distance offset computed based on a calculation over + // observations from all other nodes over a fixed window and is updated + // dynamically by the Vivaldi Client. The units here are seconds. + Adjustment float64 + + // Height is a distance offset that accounts for non-Euclidean effects + // which model the access links from nodes to the core Internet. The access + // links are usually set by bandwidth and congestion, and the core links + // usually follow distance based on geography. + Height float64 +} + +const ( + // secondsToNanoseconds is used to convert float seconds to nanoseconds. + secondsToNanoseconds = 1.0e9 + + // zeroThreshold is used to decide if two coordinates are on top of each + // other. + zeroThreshold = 1.0e-6 +) + +// ErrDimensionalityConflict will be panic-d if you try to perform operations +// with incompatible dimensions. +type DimensionalityConflictError struct{} + +// Adds the error interface. +func (e DimensionalityConflictError) Error() string { + return "coordinate dimensionality does not match" +} + +// NewCoordinate creates a new coordinate at the origin, using the given config +// to supply key initial values. +func NewCoordinate(config *Config) *Coordinate { + return &Coordinate{ + Vec: make([]float64, config.Dimensionality), + Error: config.VivaldiErrorMax, + Adjustment: 0.0, + Height: config.HeightMin, + } +} + +// Clone creates an independent copy of this coordinate. +func (c *Coordinate) Clone() *Coordinate { + vec := make([]float64, len(c.Vec)) + copy(vec, c.Vec) + return &Coordinate{ + Vec: vec, + Error: c.Error, + Adjustment: c.Adjustment, + Height: c.Height, + } +} + +// IsCompatibleWith checks to see if the two coordinates are compatible +// dimensionally. If this returns true then you are guaranteed to not get +// any runtime errors operating on them. +func (c *Coordinate) IsCompatibleWith(other *Coordinate) bool { + return len(c.Vec) == len(other.Vec) +} + +// ApplyForce returns the result of applying the force from the direction of the +// other coordinate. +func (c *Coordinate) ApplyForce(config *Config, force float64, other *Coordinate) *Coordinate { + if !c.IsCompatibleWith(other) { + panic(DimensionalityConflictError{}) + } + + ret := c.Clone() + unit, mag := unitVectorAt(c.Vec, other.Vec) + ret.Vec = add(ret.Vec, mul(unit, force)) + if mag > zeroThreshold { + ret.Height = (ret.Height+other.Height)*force/mag + ret.Height + ret.Height = math.Max(ret.Height, config.HeightMin) + } + return ret +} + +// DistanceTo returns the distance between this coordinate and the other +// coordinate, including adjustments. +func (c *Coordinate) DistanceTo(other *Coordinate) time.Duration { + if !c.IsCompatibleWith(other) { + panic(DimensionalityConflictError{}) + } + + dist := c.rawDistanceTo(other) + adjustedDist := dist + c.Adjustment + other.Adjustment + if adjustedDist > 0.0 { + dist = adjustedDist + } + return time.Duration(dist * secondsToNanoseconds) +} + +// rawDistanceTo returns the Vivaldi distance between this coordinate and the +// other coordinate in seconds, not including adjustments. This assumes the +// dimensions have already been checked to be compatible. +func (c *Coordinate) rawDistanceTo(other *Coordinate) float64 { + return magnitude(diff(c.Vec, other.Vec)) + c.Height + other.Height +} + +// add returns the sum of vec1 and vec2. This assumes the dimensions have +// already been checked to be compatible. +func add(vec1 []float64, vec2 []float64) []float64 { + ret := make([]float64, len(vec1)) + for i, _ := range ret { + ret[i] = vec1[i] + vec2[i] + } + return ret +} + +// diff returns the difference between the vec1 and vec2. This assumes the +// dimensions have already been checked to be compatible. +func diff(vec1 []float64, vec2 []float64) []float64 { + ret := make([]float64, len(vec1)) + for i, _ := range ret { + ret[i] = vec1[i] - vec2[i] + } + return ret +} + +// mul returns vec multiplied by a scalar factor. +func mul(vec []float64, factor float64) []float64 { + ret := make([]float64, len(vec)) + for i, _ := range vec { + ret[i] = vec[i] * factor + } + return ret +} + +// magnitude computes the magnitude of the vec. +func magnitude(vec []float64) float64 { + sum := 0.0 + for i, _ := range vec { + sum += vec[i] * vec[i] + } + return math.Sqrt(sum) +} + +// unitVectorAt returns a unit vector pointing at vec1 from vec2. If the two +// positions are the same then a random unit vector is returned. We also return +// the distance between the points for use in the later height calculation. +func unitVectorAt(vec1 []float64, vec2 []float64) ([]float64, float64) { + ret := diff(vec1, vec2) + + // If the coordinates aren't on top of each other we can normalize. + if mag := magnitude(ret); mag > zeroThreshold { + return mul(ret, 1.0/mag), mag + } + + // Otherwise, just return a random unit vector. + for i, _ := range ret { + ret[i] = rand.Float64() - 0.5 + } + if mag := magnitude(ret); mag > zeroThreshold { + return mul(ret, 1.0/mag), 0.0 + } + + // And finally just give up and make a unit vector along the first + // dimension. This should be exceedingly rare. + ret = make([]float64, len(ret)) + ret[0] = 1.0 + return ret, 0.0 +} diff --git a/vendor/src/github.com/hashicorp/serf/coordinate/phantom.go b/vendor/src/github.com/hashicorp/serf/coordinate/phantom.go new file mode 100644 index 0000000000..6fb033c0cd --- /dev/null +++ b/vendor/src/github.com/hashicorp/serf/coordinate/phantom.go @@ -0,0 +1,187 @@ +package coordinate + +import ( + "fmt" + "math" + "math/rand" + "time" +) + +// GenerateClients returns a slice with nodes number of clients, all with the +// given config. +func GenerateClients(nodes int, config *Config) ([]*Client, error) { + clients := make([]*Client, nodes) + for i, _ := range clients { + client, err := NewClient(config) + if err != nil { + return nil, err + } + + clients[i] = client + } + return clients, nil +} + +// GenerateLine returns a truth matrix as if all the nodes are in a straight linke +// with the given spacing between them. +func GenerateLine(nodes int, spacing time.Duration) [][]time.Duration { + truth := make([][]time.Duration, nodes) + for i := range truth { + truth[i] = make([]time.Duration, nodes) + } + + for i := 0; i < nodes; i++ { + for j := i + 1; j < nodes; j++ { + rtt := time.Duration(j-i) * spacing + truth[i][j], truth[j][i] = rtt, rtt + } + } + return truth +} + +// GenerateGrid returns a truth matrix as if all the nodes are in a two dimensional +// grid with the given spacing between them. +func GenerateGrid(nodes int, spacing time.Duration) [][]time.Duration { + truth := make([][]time.Duration, nodes) + for i := range truth { + truth[i] = make([]time.Duration, nodes) + } + + n := int(math.Sqrt(float64(nodes))) + for i := 0; i < nodes; i++ { + for j := i + 1; j < nodes; j++ { + x1, y1 := float64(i%n), float64(i/n) + x2, y2 := float64(j%n), float64(j/n) + dx, dy := x2-x1, y2-y1 + dist := math.Sqrt(dx*dx + dy*dy) + rtt := time.Duration(dist * float64(spacing)) + truth[i][j], truth[j][i] = rtt, rtt + } + } + return truth +} + +// GenerateSplit returns a truth matrix as if half the nodes are close together in +// one location and half the nodes are close together in another. The lan factor +// is used to separate the nodes locally and the wan factor represents the split +// between the two sides. +func GenerateSplit(nodes int, lan time.Duration, wan time.Duration) [][]time.Duration { + truth := make([][]time.Duration, nodes) + for i := range truth { + truth[i] = make([]time.Duration, nodes) + } + + split := nodes / 2 + for i := 0; i < nodes; i++ { + for j := i + 1; j < nodes; j++ { + rtt := lan + if (i <= split && j > split) || (i > split && j <= split) { + rtt += wan + } + truth[i][j], truth[j][i] = rtt, rtt + } + } + return truth +} + +// GenerateCircle returns a truth matrix for a set of nodes, evenly distributed +// around a circle with the given radius. The first node is at the "center" of the +// circle because it's equidistant from all the other nodes, but we place it at +// double the radius, so it should show up above all the other nodes in height. +func GenerateCircle(nodes int, radius time.Duration) [][]time.Duration { + truth := make([][]time.Duration, nodes) + for i := range truth { + truth[i] = make([]time.Duration, nodes) + } + + for i := 0; i < nodes; i++ { + for j := i + 1; j < nodes; j++ { + var rtt time.Duration + if i == 0 { + rtt = 2 * radius + } else { + t1 := 2.0 * math.Pi * float64(i) / float64(nodes) + x1, y1 := math.Cos(t1), math.Sin(t1) + t2 := 2.0 * math.Pi * float64(j) / float64(nodes) + x2, y2 := math.Cos(t2), math.Sin(t2) + dx, dy := x2-x1, y2-y1 + dist := math.Sqrt(dx*dx + dy*dy) + rtt = time.Duration(dist * float64(radius)) + } + truth[i][j], truth[j][i] = rtt, rtt + } + } + return truth +} + +// GenerateRandom returns a truth matrix for a set of nodes with normally +// distributed delays, with the given mean and deviation. The RNG is re-seeded +// so you always get the same matrix for a given size. +func GenerateRandom(nodes int, mean time.Duration, deviation time.Duration) [][]time.Duration { + rand.Seed(1) + + truth := make([][]time.Duration, nodes) + for i := range truth { + truth[i] = make([]time.Duration, nodes) + } + + for i := 0; i < nodes; i++ { + for j := i + 1; j < nodes; j++ { + rttSeconds := rand.NormFloat64()*deviation.Seconds() + mean.Seconds() + rtt := time.Duration(rttSeconds * secondsToNanoseconds) + truth[i][j], truth[j][i] = rtt, rtt + } + } + return truth +} + +// Simulate runs the given number of cycles using the given list of clients and +// truth matrix. On each cycle, each client will pick a random node and observe +// the truth RTT, updating its coordinate estimate. The RNG is re-seeded for +// each simulation run to get deterministic results (for this algorithm and the +// underlying algorithm which will use random numbers for position vectors when +// starting out with everything at the origin). +func Simulate(clients []*Client, truth [][]time.Duration, cycles int) { + rand.Seed(1) + + nodes := len(clients) + for cycle := 0; cycle < cycles; cycle++ { + for i, _ := range clients { + if j := rand.Intn(nodes); j != i { + c := clients[j].GetCoordinate() + rtt := truth[i][j] + node := fmt.Sprintf("node_%d", j) + clients[i].Update(node, c, rtt) + } + } + } +} + +// Stats is returned from the Evaluate function with a summary of the algorithm +// performance. +type Stats struct { + ErrorMax float64 + ErrorAvg float64 +} + +// Evaluate uses the coordinates of the given clients to calculate estimated +// distances and compares them with the given truth matrix, returning summary +// stats. +func Evaluate(clients []*Client, truth [][]time.Duration) (stats Stats) { + nodes := len(clients) + count := 0 + for i := 0; i < nodes; i++ { + for j := i + 1; j < nodes; j++ { + est := clients[i].DistanceTo(clients[j].GetCoordinate()).Seconds() + actual := truth[i][j].Seconds() + error := math.Abs(est-actual) / actual + stats.ErrorMax = math.Max(stats.ErrorMax, error) + stats.ErrorAvg += error + count += 1 + } + } + + stats.ErrorAvg /= float64(count) + fmt.Printf("Error avg=%9.6f max=%9.6f\n", stats.ErrorAvg, stats.ErrorMax) + return +} diff --git a/vendor/src/github.com/hashicorp/serf/serf/config.go b/vendor/src/github.com/hashicorp/serf/serf/config.go index e2a43f549f..87cba9f7ca 100644 --- a/vendor/src/github.com/hashicorp/serf/serf/config.go +++ b/vendor/src/github.com/hashicorp/serf/serf/config.go @@ -149,6 +149,14 @@ type Config struct { // QueryTimeoutMult int + // QueryResponseSizeLimit and QuerySizeLimit limit the inbound and + // outbound payload sizes for queries, respectively. These must fit + // in a UDP packet with some additional overhead, so tuning these + // past the default values of 1024 will depend on your network + // configuration. + QueryResponseSizeLimit int + QuerySizeLimit int + // MemberlistConfig is the memberlist configuration that Serf will // use to do the underlying membership management and gossip. Some // fields in the MemberlistConfig will be overwritten by Serf no @@ -189,6 +197,12 @@ type Config struct { // node stays while the other node will leave the cluster and exit. EnableNameConflictResolution bool + // DisableCoordinates controls if Serf will maintain an estimate of this + // node's network coordinate internally. A network coordinate is useful + // for estimating the network distance (i.e. round trip time) between + // two nodes. Enabling this option adds some overhead to ping messages. + DisableCoordinates bool + // KeyringFile provides the location of a writable file where Serf can // persist changes to the encryption keyring. KeyringFile string @@ -229,6 +243,9 @@ func DefaultConfig() *Config { TombstoneTimeout: 24 * time.Hour, MemberlistConfig: memberlist.DefaultLANConfig(), QueryTimeoutMult: 16, + QueryResponseSizeLimit: 1024, + QuerySizeLimit: 1024, EnableNameConflictResolution: true, + DisableCoordinates: false, } } diff --git a/vendor/src/github.com/hashicorp/serf/serf/delegate.go b/vendor/src/github.com/hashicorp/serf/serf/delegate.go index 4ffedbd3d0..d19ca3090f 100644 --- a/vendor/src/github.com/hashicorp/serf/serf/delegate.go +++ b/vendor/src/github.com/hashicorp/serf/serf/delegate.go @@ -2,6 +2,7 @@ package serf import ( "fmt" + "github.com/armon/go-metrics" ) @@ -170,6 +171,12 @@ func (d *delegate) LocalState(join bool) []byte { } func (d *delegate) MergeRemoteState(buf []byte, isJoin bool) { + // Ensure we have a message + if len(buf) == 0 { + d.serf.logger.Printf("[ERR] serf: Remote state is zero bytes") + return + } + // Check the message type if messageType(buf[0]) != messagePushPullType { d.serf.logger.Printf("[ERR] serf: Remote state has bad type prefix: %v", buf[0]) diff --git a/vendor/src/github.com/hashicorp/serf/serf/event.go b/vendor/src/github.com/hashicorp/serf/serf/event.go index 5c6ff740e2..8337e95ead 100644 --- a/vendor/src/github.com/hashicorp/serf/serf/event.go +++ b/vendor/src/github.com/hashicorp/serf/serf/event.go @@ -152,8 +152,8 @@ func (q *Query) Respond(buf []byte) error { } // Check the size limit - if len(raw) > QueryResponseSizeLimit { - return fmt.Errorf("response exceeds limit of %d bytes", QueryResponseSizeLimit) + if len(raw) > q.serf.config.QueryResponseSizeLimit { + return fmt.Errorf("response exceeds limit of %d bytes", q.serf.config.QueryResponseSizeLimit) } // Send the response diff --git a/vendor/src/github.com/hashicorp/serf/serf/merge_delegate.go b/vendor/src/github.com/hashicorp/serf/serf/merge_delegate.go index ece3e9767f..7fdc732887 100644 --- a/vendor/src/github.com/hashicorp/serf/serf/merge_delegate.go +++ b/vendor/src/github.com/hashicorp/serf/serf/merge_delegate.go @@ -7,29 +7,38 @@ import ( ) type MergeDelegate interface { - NotifyMerge([]*Member) (cancel bool) + NotifyMerge([]*Member) error } type mergeDelegate struct { serf *Serf } -func (m *mergeDelegate) NotifyMerge(nodes []*memberlist.Node) (cancel bool) { +func (m *mergeDelegate) NotifyMerge(nodes []*memberlist.Node) error { members := make([]*Member, len(nodes)) for idx, n := range nodes { - members[idx] = &Member{ - Name: n.Name, - Addr: net.IP(n.Addr), - Port: n.Port, - Tags: m.serf.decodeTags(n.Meta), - Status: StatusNone, - ProtocolMin: n.PMin, - ProtocolMax: n.PMax, - ProtocolCur: n.PCur, - DelegateMin: n.DMin, - DelegateMax: n.DMax, - DelegateCur: n.DCur, - } + members[idx] = m.nodeToMember(n) } return m.serf.config.Merge.NotifyMerge(members) } + +func (m *mergeDelegate) NotifyAlive(peer *memberlist.Node) error { + member := m.nodeToMember(peer) + return m.serf.config.Merge.NotifyMerge([]*Member{member}) +} + +func (m *mergeDelegate) nodeToMember(n *memberlist.Node) *Member { + return &Member{ + Name: n.Name, + Addr: net.IP(n.Addr), + Port: n.Port, + Tags: m.serf.decodeTags(n.Meta), + Status: StatusNone, + ProtocolMin: n.PMin, + ProtocolMax: n.PMax, + ProtocolCur: n.PCur, + DelegateMin: n.DMin, + DelegateMax: n.DMax, + DelegateCur: n.DCur, + } +} diff --git a/vendor/src/github.com/hashicorp/serf/serf/ping_delegate.go b/vendor/src/github.com/hashicorp/serf/serf/ping_delegate.go new file mode 100644 index 0000000000..a482685a20 --- /dev/null +++ b/vendor/src/github.com/hashicorp/serf/serf/ping_delegate.go @@ -0,0 +1,89 @@ +package serf + +import ( + "bytes" + "log" + "time" + + "github.com/armon/go-metrics" + "github.com/hashicorp/go-msgpack/codec" + "github.com/hashicorp/memberlist" + "github.com/hashicorp/serf/coordinate" +) + +// pingDelegate is notified when memberlist successfully completes a direct ping +// of a peer node. We use this to update our estimated network coordinate, as +// well as cache the coordinate of the peer. +type pingDelegate struct { + serf *Serf +} + +const ( + // PingVersion is an internal version for the ping message, above the normal + // versioning we get from the protocol version. This enables small updates + // to the ping message without a full protocol bump. + PingVersion = 1 +) + +// AckPayload is called to produce a payload to send back in response to a ping +// request. +func (p *pingDelegate) AckPayload() []byte { + var buf bytes.Buffer + + // The first byte is the version number, forming a simple header. + version := []byte{PingVersion} + buf.Write(version) + + // The rest of the message is the serialized coordinate. + enc := codec.NewEncoder(&buf, &codec.MsgpackHandle{}) + if err := enc.Encode(p.serf.coordClient.GetCoordinate()); err != nil { + log.Printf("[ERR] serf: Failed to encode coordinate: %v\n", err) + } + return buf.Bytes() +} + +// NotifyPingComplete is called when this node successfully completes a direct ping +// of a peer node. +func (p *pingDelegate) NotifyPingComplete(other *memberlist.Node, rtt time.Duration, payload []byte) { + if payload == nil || len(payload) == 0 { + return + } + + // Verify ping version in the header. + version := payload[0] + if version != PingVersion { + log.Printf("[ERR] serf: Unsupported ping version: %v", version) + return + } + + // Process the remainder of the message as a coordinate. + r := bytes.NewReader(payload[1:]) + dec := codec.NewDecoder(r, &codec.MsgpackHandle{}) + var coord coordinate.Coordinate + if err := dec.Decode(&coord); err != nil { + log.Printf("[ERR] serf: Failed to decode coordinate from ping: %v", err) + } + + // Apply the update. Since this is a coordinate coming from some place + // else we harden this and look for dimensionality problems proactively. + before := p.serf.coordClient.GetCoordinate() + if before.IsCompatibleWith(&coord) { + after := p.serf.coordClient.Update(other.Name, &coord, rtt) + + // Publish some metrics to give us an idea of how much we are + // adjusting each time we update. + d := float32(before.DistanceTo(after).Seconds() * 1.0e3) + metrics.AddSample([]string{"serf", "coordinate", "adjustment-ms"}, d) + + // Cache the coordinate for the other node, and add our own + // to the cache as well since it just got updated. This lets + // users call GetCachedCoordinate with our node name, which is + // more friendly. + p.serf.coordCacheLock.Lock() + p.serf.coordCache[other.Name] = &coord + p.serf.coordCache[p.serf.config.NodeName] = p.serf.coordClient.GetCoordinate() + p.serf.coordCacheLock.Unlock() + } else { + log.Printf("[ERR] serf: Rejected bad coordinate: %v\n", coord) + } +} diff --git a/vendor/src/github.com/hashicorp/serf/serf/serf.go b/vendor/src/github.com/hashicorp/serf/serf/serf.go index a40ad06fbc..613b915dc4 100644 --- a/vendor/src/github.com/hashicorp/serf/serf/serf.go +++ b/vendor/src/github.com/hashicorp/serf/serf/serf.go @@ -17,6 +17,7 @@ import ( "github.com/armon/go-metrics" "github.com/hashicorp/go-msgpack/codec" "github.com/hashicorp/memberlist" + "github.com/hashicorp/serf/coordinate" ) // These are the protocol versions that Serf can _understand_. These are @@ -91,6 +92,10 @@ type Serf struct { snapshotter *Snapshotter keyManager *KeyManager + + coordClient *coordinate.Client + coordCache map[string]*coordinate.Coordinate + coordCacheLock sync.RWMutex } // SerfState is the state of the Serf instance. @@ -209,10 +214,8 @@ type queries struct { } const ( - UserEventSizeLimit = 512 // Maximum byte size for event name and payload - QuerySizeLimit = 1024 // Maximum byte size for query - QueryResponseSizeLimit = 1024 // Maximum bytes size for response - snapshotSizeLimit = 128 * 1024 // Maximum 128 KB snapshot + UserEventSizeLimit = 512 // Maximum byte size for event name and payload + snapshotSizeLimit = 128 * 1024 // Maximum 128 KB snapshot ) // Create creates a new Serf instance, starting all the background tasks @@ -274,15 +277,25 @@ func Create(conf *Config) (*Serf, error) { } conf.EventCh = outCh + // Set up network coordinate client. + if !conf.DisableCoordinates { + serf.coordClient, err = coordinate.NewClient(coordinate.DefaultConfig()) + if err != nil { + return nil, fmt.Errorf("Failed to create coordinate client: %v", err) + } + } + // Try access the snapshot var oldClock, oldEventClock, oldQueryClock LamportTime var prev []*PreviousNode if conf.SnapshotPath != "" { - eventCh, snap, err := NewSnapshotter(conf.SnapshotPath, + eventCh, snap, err := NewSnapshotter( + conf.SnapshotPath, snapshotSizeLimit, conf.RejoinAfterLeave, serf.logger, &serf.clock, + serf.coordClient, conf.EventCh, serf.shutdownCh) if err != nil { @@ -298,6 +311,13 @@ func Create(conf *Config) (*Serf, error) { serf.queryMinTime = oldQueryClock + 1 } + // Set up the coordinate cache. We do this after we read the snapshot to + // make sure we get a good initial value from there, if we got one. + if !conf.DisableCoordinates { + serf.coordCache = make(map[string]*coordinate.Coordinate) + serf.coordCache[conf.NodeName] = serf.coordClient.GetCoordinate() + } + // Setup the various broadcast queues, which we use to send our own // custom broadcasts along the gossip channel. serf.broadcasts = &memberlist.TransmitLimitedQueue{ @@ -347,17 +367,22 @@ func Create(conf *Config) (*Serf, error) { conf.MemberlistConfig.DelegateProtocolMax = ProtocolVersionMax conf.MemberlistConfig.Name = conf.NodeName conf.MemberlistConfig.ProtocolVersion = ProtocolVersionMap[conf.ProtocolVersion] + if !conf.DisableCoordinates { + conf.MemberlistConfig.Ping = &pingDelegate{serf: serf} + } // Setup a merge delegate if necessary if conf.Merge != nil { - conf.MemberlistConfig.Merge = &mergeDelegate{serf: serf} + md := &mergeDelegate{serf: serf} + conf.MemberlistConfig.Merge = md + conf.MemberlistConfig.Alive = md } // Create the underlying memberlist that will manage membership // and failure detection for the Serf instance. memberlist, err := memberlist.Create(conf.MemberlistConfig) if err != nil { - return nil, err + return nil, fmt.Errorf("Failed to create memberlist: %v", err) } serf.memberlist = memberlist @@ -486,8 +511,8 @@ func (s *Serf) Query(name string, payload []byte, params *QueryParam) (*QueryRes } // Check the size - if len(raw) > QuerySizeLimit { - return nil, fmt.Errorf("query exceeds limit of %d bytes", QuerySizeLimit) + if len(raw) > s.config.QuerySizeLimit { + return nil, fmt.Errorf("query exceeds limit of %d bytes", s.config.QuerySizeLimit) } // Register QueryResponse to track acks and responses @@ -950,6 +975,19 @@ func (s *Serf) handleNodeUpdate(n *memberlist.Node) { member.Port = n.Port member.Tags = s.decodeTags(n.Meta) + // Snag the latest versions. NOTE - the current memberlist code will NOT + // fire an update event if the metadata (for Serf, tags) stays the same + // and only the protocol versions change. If we wake any Serf-level + // protocol changes where we want to get this event under those + // circumstances, we will need to update memberlist to do a check of + // versions as well as the metadata. + member.ProtocolMin = n.PMin + member.ProtocolMax = n.PMax + member.ProtocolCur = n.PCur + member.DelegateMin = n.DMin + member.DelegateMax = n.DMax + member.DelegateCur = n.DCur + // Update some metrics metrics.IncrCounter([]string{"serf", "member", "update"}, 1) @@ -1016,6 +1054,17 @@ func (s *Serf) handleNodeLeaveIntent(leaveMsg *messageLeave) bool { s.failedMembers = removeOldMember(s.failedMembers, member.Name) s.leftMembers = append(s.leftMembers, member) + // We must push a message indicating the node has now + // left to allow higher-level applications to handle the + // graceful leave. + s.logger.Printf("[INFO] serf: EventMemberLeave (forced): %s %s", + member.Member.Name, member.Member.Addr) + if s.config.EventCh != nil { + s.config.EventCh <- MemberEvent{ + Type: EventMemberLeave, + Members: []Member{member.Member}, + } + } return true default: return false @@ -1384,6 +1433,16 @@ func (s *Serf) reap(old []*memberState, timeout time.Duration) []*memberState { // Delete from members delete(s.members, m.Name) + // Tell the coordinate client the node has gone away and delete + // its cached coordinates. + if !s.config.DisableCoordinates { + s.coordClient.ForgetNode(m.Name) + + s.coordCacheLock.Lock() + delete(s.coordCache, m.Name) + s.coordCacheLock.Unlock() + } + // Send an event along s.logger.Printf("[INFO] serf: EventMemberReap: %s", m.Name) if s.config.EventCh != nil { @@ -1596,3 +1655,38 @@ func (s *Serf) writeKeyringFile() error { // Success! return nil } + +// GetCoordinate returns the network coordinate of the local node. +func (s *Serf) GetCoordinate() (*coordinate.Coordinate, error) { + if !s.config.DisableCoordinates { + return s.coordClient.GetCoordinate(), nil + } + + return nil, fmt.Errorf("Coordinates are disabled") +} + +// GetCachedCoordinate returns the network coordinate for the node with the given +// name. This will only be valid if DisableCoordinates is set to false. +func (s *Serf) GetCachedCoordinate(name string) (coord *coordinate.Coordinate, ok bool) { + if !s.config.DisableCoordinates { + s.coordCacheLock.RLock() + defer s.coordCacheLock.RUnlock() + if coord, ok = s.coordCache[name]; ok { + return coord, true + } + + return nil, false + } + + return nil, false +} + +// NumNodes returns the number of nodes in the serf cluster, regardless of +// their health or status. +func (s *Serf) NumNodes() (numNodes int) { + s.memberLock.RLock() + numNodes = len(s.members) + s.memberLock.RUnlock() + + return numNodes +} diff --git a/vendor/src/github.com/hashicorp/serf/serf/snapshot.go b/vendor/src/github.com/hashicorp/serf/serf/snapshot.go index 3a1eb92ce2..44f8a5175a 100644 --- a/vendor/src/github.com/hashicorp/serf/serf/snapshot.go +++ b/vendor/src/github.com/hashicorp/serf/serf/snapshot.go @@ -2,6 +2,7 @@ package serf import ( "bufio" + "encoding/json" "fmt" "log" "math/rand" @@ -12,6 +13,7 @@ import ( "time" "github.com/armon/go-metrics" + "github.com/hashicorp/serf/coordinate" ) /* @@ -27,6 +29,7 @@ old events. const flushInterval = 500 * time.Millisecond const clockUpdateInterval = 500 * time.Millisecond +const coordinateUpdateInterval = 60 * time.Second const tmpExt = ".compact" // Snapshotter is responsible for ingesting events and persisting @@ -34,6 +37,7 @@ const tmpExt = ".compact" type Snapshotter struct { aliveNodes map[string]string clock *LamportClock + coordClient *coordinate.Client fh *os.File buffered *bufio.Writer inCh <-chan Event @@ -74,6 +78,7 @@ func NewSnapshotter(path string, rejoinAfterLeave bool, logger *log.Logger, clock *LamportClock, + coordClient *coordinate.Client, outCh chan<- Event, shutdownCh <-chan struct{}) (chan<- Event, *Snapshotter, error) { inCh := make(chan Event, 1024) @@ -96,6 +101,7 @@ func NewSnapshotter(path string, snap := &Snapshotter{ aliveNodes: make(map[string]string), clock: clock, + coordClient: coordClient, fh: fh, buffered: bufio.NewWriter(fh), inCh: inCh, @@ -171,6 +177,12 @@ func (s *Snapshotter) Leave() { // stream is a long running routine that is used to handle events func (s *Snapshotter) stream() { + clockTicker := time.NewTicker(clockUpdateInterval) + defer clockTicker.Stop() + + coordinateTicker := time.NewTicker(coordinateUpdateInterval) + defer coordinateTicker.Stop() + for { select { case <-s.leaveCh: @@ -209,9 +221,12 @@ func (s *Snapshotter) stream() { s.logger.Printf("[ERR] serf: Unknown event to snapshot: %#v", e) } - case <-time.After(clockUpdateInterval): + case <-clockTicker.C: s.updateClock() + case <-coordinateTicker.C: + s.updateCoordinate() + case <-s.shutdownCh: if err := s.buffered.Flush(); err != nil { s.logger.Printf("[ERR] serf: failed to flush snapshot: %v", err) @@ -258,6 +273,20 @@ func (s *Snapshotter) updateClock() { } } +// updateCoordinate is called periodically to write out the current local +// coordinate. It's safe to call this if coordinates aren't enabled (nil +// client) and it will be a no-op. +func (s *Snapshotter) updateCoordinate() { + if s.coordClient != nil { + encoded, err := json.Marshal(s.coordClient.GetCoordinate()) + if err != nil { + s.logger.Printf("[ERR] serf: Failed to encode coordinate: %v", err) + } else { + s.tryAppend(fmt.Sprintf("coordinate: %s\n", encoded)) + } + } +} + // processUserEvent is used to handle a single user event func (s *Snapshotter) processUserEvent(e UserEvent) { // Ignore old clocks @@ -362,6 +391,23 @@ func (s *Snapshotter) compact() error { } offset += int64(n) + // Write out the coordinate. + if s.coordClient != nil { + encoded, err := json.Marshal(s.coordClient.GetCoordinate()) + if err != nil { + fh.Close() + return err + } + + line = fmt.Sprintf("coordinate: %s\n", encoded) + n, err = buf.WriteString(line) + if err != nil { + fh.Close() + return err + } + offset += int64(n) + } + // Flush the new snapshot err = buf.Flush() fh.Close() @@ -473,6 +519,20 @@ func (s *Snapshotter) replay() error { } s.lastQueryClock = LamportTime(timeInt) + } else if strings.HasPrefix(line, "coordinate: ") { + if s.coordClient == nil { + s.logger.Printf("[WARN] serf: Ignoring snapshot coordinates since they are disabled") + continue + } + + coordStr := strings.TrimPrefix(line, "coordinate: ") + var coord coordinate.Coordinate + err := json.Unmarshal([]byte(coordStr), &coord) + if err != nil { + s.logger.Printf("[WARN] serf: Failed to decode coordinate: %v", err) + continue + } + s.coordClient.SetCoordinate(&coord) } else if line == "leave" { // Ignore a leave if we plan on re-joining if s.rejoinAfterLeave { diff --git a/vendor/src/github.com/hashicorp/serf/website/LICENSE.md b/vendor/src/github.com/hashicorp/serf/website/source/LICENSE similarity index 100% rename from vendor/src/github.com/hashicorp/serf/website/LICENSE.md rename to vendor/src/github.com/hashicorp/serf/website/source/LICENSE From 1f9e3815aa4ac7eaa707f3a9b436f23c29355a8f Mon Sep 17 00:00:00 2001 From: Alessandro Boch Date: Sun, 8 May 2016 00:32:51 -0700 Subject: [PATCH 2/3] Vendoring libnetwork b66c0385f30c6aa27b2957ed1072682c19a0b0b4 Signed-off-by: Alessandro Boch --- hack/vendor.sh | 2 +- .../github.com/docker/libnetwork/CHANGELOG.md | 12 + .../src/github.com/docker/libnetwork/Makefile | 6 +- .../github.com/docker/libnetwork/README.md | 2 +- .../src/github.com/docker/libnetwork/agent.go | 369 +++++++++++++++ .../docker/libnetwork/bitseq/sequence.go | 2 + .../docker/libnetwork/bitseq/store.go | 5 + .../docker/libnetwork/config/config.go | 24 + .../docker/libnetwork/controller.go | 367 +++++++------- .../docker/libnetwork/datastore/cache.go | 5 +- .../docker/libnetwork/datastore/datastore.go | 17 +- .../libnetwork/default_gateway_linux.go | 2 +- .../libnetwork/default_gateway_solaris.go | 7 + .../docker/libnetwork/driverapi/driverapi.go | 53 ++- .../github.com/docker/libnetwork/drivers.go | 84 ---- .../libnetwork/drivers/bridge/bridge.go | 13 +- .../libnetwork/drivers/bridge/bridge_store.go | 3 +- .../docker/libnetwork/drivers/host/host.go | 13 +- .../libnetwork/drivers/ipvlan/ipvlan.go | 12 + .../drivers/ipvlan/ipvlan_network.go | 2 +- .../libnetwork/drivers/ipvlan/ipvlan_store.go | 3 +- .../libnetwork/drivers/macvlan/macvlan.go | 12 + .../drivers/macvlan/macvlan_network.go | 2 +- .../drivers/macvlan/macvlan_store.go | 3 +- .../docker/libnetwork/drivers/null/null.go | 13 +- .../libnetwork/drivers/overlay/filter.go | 16 +- .../libnetwork/drivers/overlay/joinleave.go | 45 ++ .../libnetwork/drivers/overlay/ov_network.go | 98 +++- .../libnetwork/drivers/overlay/overlay.go | 14 +- .../libnetwork/drivers/overlay/peerdb.go | 2 + .../libnetwork/drivers/remote/driver.go | 13 +- .../libnetwork/drivers/windows/windows.go | 26 +- .../docker/libnetwork/drivers_solaris.go | 5 + .../libnetwork/drvregistry/drvregistry.go | 241 ++++++++++ .../github.com/docker/libnetwork/endpoint.go | 42 +- .../docker/libnetwork/endpoint_info.go | 20 + .../docker/libnetwork/ipam/allocator.go | 37 +- .../docker/libnetwork/ipam/store.go | 12 +- .../libnetwork/ipams/builtin/builtin_unix.go | 2 +- .../libnetwork/ipamutils/utils_linux.go | 76 --- .../docker/libnetwork/netlabel/labels.go | 3 + .../docker/libnetwork/netutils/utils.go | 7 - .../{ipamutils => netutils}/utils_freebsd.go | 3 +- .../docker/libnetwork/netutils/utils_linux.go | 67 +++ .../libnetwork/netutils/utils_solaris.go | 32 ++ .../{ipamutils => netutils}/utils_windows.go | 3 +- .../github.com/docker/libnetwork/network.go | 169 +++++-- .../docker/libnetwork/networkdb/broadcast.go | 127 +++++ .../docker/libnetwork/networkdb/cluster.go | 446 ++++++++++++++++++ .../docker/libnetwork/networkdb/delegate.go | 315 +++++++++++++ .../libnetwork/networkdb/event_delegate.go | 23 + .../docker/libnetwork/networkdb/message.go | 122 +++++ .../docker/libnetwork/networkdb/networkdb.go | 424 +++++++++++++++++ .../docker/libnetwork/networkdb/watch.go | 98 ++++ .../libnetwork/osl/interface_solaris.go | 4 + .../docker/libnetwork/osl/neigh_solaris.go | 4 + .../docker/libnetwork/osl/route_linux.go | 2 +- .../libnetwork/resolvconf/resolvconf.go | 12 +- .../github.com/docker/libnetwork/resolver.go | 60 +-- .../docker/libnetwork/resolver_unix.go | 77 +++ .../docker/libnetwork/resolver_windows.go | 7 + .../github.com/docker/libnetwork/sandbox.go | 17 +- .../docker/libnetwork/sandbox_dns_unix.go | 11 +- .../libnetwork/sandbox_externalkey_solaris.go | 45 ++ .../docker/libnetwork/sandbox_store.go | 2 +- .../github.com/docker/libnetwork/service.go | 80 ++++ .../src/github.com/docker/libnetwork/store.go | 21 +- .../docker/libnetwork/types/types.go | 13 + 68 files changed, 3327 insertions(+), 549 deletions(-) create mode 100644 vendor/src/github.com/docker/libnetwork/agent.go create mode 100644 vendor/src/github.com/docker/libnetwork/default_gateway_solaris.go delete mode 100644 vendor/src/github.com/docker/libnetwork/drivers.go create mode 100644 vendor/src/github.com/docker/libnetwork/drivers_solaris.go create mode 100644 vendor/src/github.com/docker/libnetwork/drvregistry/drvregistry.go delete mode 100644 vendor/src/github.com/docker/libnetwork/ipamutils/utils_linux.go rename vendor/src/github.com/docker/libnetwork/{ipamutils => netutils}/utils_freebsd.go (89%) create mode 100644 vendor/src/github.com/docker/libnetwork/netutils/utils_solaris.go rename vendor/src/github.com/docker/libnetwork/{ipamutils => netutils}/utils_windows.go (89%) create mode 100644 vendor/src/github.com/docker/libnetwork/networkdb/broadcast.go create mode 100644 vendor/src/github.com/docker/libnetwork/networkdb/cluster.go create mode 100644 vendor/src/github.com/docker/libnetwork/networkdb/delegate.go create mode 100644 vendor/src/github.com/docker/libnetwork/networkdb/event_delegate.go create mode 100644 vendor/src/github.com/docker/libnetwork/networkdb/message.go create mode 100644 vendor/src/github.com/docker/libnetwork/networkdb/networkdb.go create mode 100644 vendor/src/github.com/docker/libnetwork/networkdb/watch.go create mode 100644 vendor/src/github.com/docker/libnetwork/osl/interface_solaris.go create mode 100644 vendor/src/github.com/docker/libnetwork/osl/neigh_solaris.go create mode 100644 vendor/src/github.com/docker/libnetwork/resolver_unix.go create mode 100644 vendor/src/github.com/docker/libnetwork/resolver_windows.go create mode 100644 vendor/src/github.com/docker/libnetwork/sandbox_externalkey_solaris.go create mode 100644 vendor/src/github.com/docker/libnetwork/service.go diff --git a/hack/vendor.sh b/hack/vendor.sh index 3cf528c2f7..b0147f649e 100755 --- a/hack/vendor.sh +++ b/hack/vendor.sh @@ -29,7 +29,7 @@ clone git github.com/RackSec/srslog 259aed10dfa74ea2961eddd1d9847619f6e98837 clone git github.com/imdario/mergo 0.2.1 #get libnetwork packages -clone git github.com/docker/libnetwork v0.8.0-dev.1 +clone git github.com/docker/libnetwork b66c0385f30c6aa27b2957ed1072682c19a0b0b4 clone git github.com/docker/go-events 2e7d352816128aa84f4d29b2a21d400133701a0d clone git github.com/armon/go-radix e39d623f12e8e41c7b5529e9a9dd67a1e2261f80 clone git github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec diff --git a/vendor/src/github.com/docker/libnetwork/CHANGELOG.md b/vendor/src/github.com/docker/libnetwork/CHANGELOG.md index a5ac0aa384..9151c61873 100644 --- a/vendor/src/github.com/docker/libnetwork/CHANGELOG.md +++ b/vendor/src/github.com/docker/libnetwork/CHANGELOG.md @@ -1,5 +1,17 @@ # Changelog +## 0.8.0-dev.2 (2016-05-07) +- Fix an issue which may arise during sandbox cleanup (https://github.com/docker/libnetwork/pull/1157) +- Fix cleanup logic in case of ipv6 allocation failure +- Don't add /etc/hosts record if container's ip is empty (--net=none) +- Fix default gw logic for internal networks +- Error when updating IPv6 gateway (https://github.com/docker/libnetwork/issues/1142) +- Fixes https://github.com/docker/libnetwork/issues/1113 +- Fixes https://github.com/docker/libnetwork/issues/1069 +- Fxies https://github.com/docker/libnetwork/issues/1117 +- Increase the concurrent query rate-limit count +- Changes to build libnetwork in Solaris + ## 0.8.0-dev.1 (2016-04-16) - Fixes docker/docker#16964 - Added maximum egress bandwidth qos for Windows diff --git a/vendor/src/github.com/docker/libnetwork/Makefile b/vendor/src/github.com/docker/libnetwork/Makefile index ec40b694f2..6edbf8da12 100644 --- a/vendor/src/github.com/docker/libnetwork/Makefile +++ b/vendor/src/github.com/docker/libnetwork/Makefile @@ -5,8 +5,8 @@ dockerargs = --privileged -v $(shell pwd):/go/src/github.com/docker/libnetwork - container_env = -e "INSIDECONTAINER=-incontainer=true" docker = docker run --rm -it ${dockerargs} $$EXTRA_ARGS ${container_env} ${build_image} ciargs = -e CIRCLECI -e "COVERALLS_TOKEN=$$COVERALLS_TOKEN" -e "INSIDECONTAINER=-incontainer=true" -cidocker = docker run ${dockerargs} ${ciargs} ${container_env} ${build_image} -CROSS_PLATFORMS = linux/amd64 linux/386 linux/arm windows/amd64 windows/386 +cidocker = docker run ${dockerargs} ${ciargs} $$EXTRA_ARGS ${container_env} ${build_image} +CROSS_PLATFORMS = linux/amd64 linux/386 linux/arm windows/amd64 all: ${build_image}.created build check integration-tests clean @@ -102,4 +102,4 @@ circle-ci-check: ${build_image}.created circle-ci-build: ${build_image}.created @${cidocker} make build-local -circle-ci: circle-ci-check circle-ci-build integration-tests +circle-ci: circle-ci-check circle-ci-cross circle-ci-build integration-tests diff --git a/vendor/src/github.com/docker/libnetwork/README.md b/vendor/src/github.com/docker/libnetwork/README.md index 3f10a0311c..536f8aa2b3 100644 --- a/vendor/src/github.com/docker/libnetwork/README.md +++ b/vendor/src/github.com/docker/libnetwork/README.md @@ -34,7 +34,7 @@ func main() { // Create a network for containers to join. // NewNetwork accepts Variadic optional arguments that libnetwork and Drivers can use. - network, err := controller.NewNetwork(networkType, "network1") + network, err := controller.NewNetwork(networkType, "network1", "") if err != nil { log.Fatalf("controller.NewNetwork: %s", err) } diff --git a/vendor/src/github.com/docker/libnetwork/agent.go b/vendor/src/github.com/docker/libnetwork/agent.go new file mode 100644 index 0000000000..ca54d8c923 --- /dev/null +++ b/vendor/src/github.com/docker/libnetwork/agent.go @@ -0,0 +1,369 @@ +package libnetwork + +import ( + "fmt" + "net" + "os" + "strings" + + "github.com/Sirupsen/logrus" + "github.com/docker/go-events" + "github.com/docker/libnetwork/datastore" + "github.com/docker/libnetwork/discoverapi" + "github.com/docker/libnetwork/driverapi" + "github.com/docker/libnetwork/networkdb" +) + +type agent struct { + networkDB *networkdb.NetworkDB + bindAddr string + epTblCancel func() + driverCancelFuncs map[string][]func() +} + +func getBindAddr(ifaceName string) (string, error) { + iface, err := net.InterfaceByName(ifaceName) + if err != nil { + return "", fmt.Errorf("failed to find interface %s: %v", ifaceName, err) + } + + addrs, err := iface.Addrs() + if err != nil { + return "", fmt.Errorf("failed to get interface addresses: %v", err) + } + + for _, a := range addrs { + addr, ok := a.(*net.IPNet) + if !ok { + continue + } + addrIP := addr.IP + + if addrIP.IsLinkLocalUnicast() { + continue + } + + return addrIP.String(), nil + } + + return "", fmt.Errorf("failed to get bind address") +} + +func resolveAddr(addrOrInterface string) (string, error) { + // Try and see if this is a valid IP address + if net.ParseIP(addrOrInterface) != nil { + return addrOrInterface, nil + } + + // If not a valid IP address, it should be a valid interface + return getBindAddr(addrOrInterface) +} + +func (c *controller) agentInit(bindAddrOrInterface string) error { + if !c.cfg.Daemon.IsAgent { + return nil + } + + bindAddr, err := resolveAddr(bindAddrOrInterface) + if err != nil { + return err + } + + hostname, _ := os.Hostname() + nDB, err := networkdb.New(&networkdb.Config{ + BindAddr: bindAddr, + NodeName: hostname, + }) + + if err != nil { + return err + } + + ch, cancel := nDB.Watch("endpoint_table", "", "") + + c.agent = &agent{ + networkDB: nDB, + bindAddr: bindAddr, + epTblCancel: cancel, + driverCancelFuncs: make(map[string][]func()), + } + + go c.handleTableEvents(ch, c.handleEpTableEvent) + return nil +} + +func (c *controller) agentJoin(remotes []string) error { + if c.agent == nil { + return nil + } + + return c.agent.networkDB.Join(remotes) +} + +func (c *controller) agentDriverNotify(d driverapi.Driver) { + if c.agent == nil { + return + } + + d.DiscoverNew(discoverapi.NodeDiscovery, discoverapi.NodeDiscoveryData{ + Address: c.agent.bindAddr, + Self: true, + }) +} + +func (c *controller) agentClose() { + if c.agent == nil { + return + } + + for _, cancelFuncs := range c.agent.driverCancelFuncs { + for _, cancel := range cancelFuncs { + cancel() + } + } + c.agent.epTblCancel() + + c.agent.networkDB.Close() +} + +func (n *network) isClusterEligible() bool { + if n.driverScope() != datastore.GlobalScope { + return false + } + + c := n.getController() + if c.agent == nil { + return false + } + + return true +} + +func (n *network) joinCluster() error { + if !n.isClusterEligible() { + return nil + } + + c := n.getController() + return c.agent.networkDB.JoinNetwork(n.ID()) +} + +func (n *network) leaveCluster() error { + if !n.isClusterEligible() { + return nil + } + + c := n.getController() + return c.agent.networkDB.LeaveNetwork(n.ID()) +} + +func (ep *endpoint) addToCluster() error { + n := ep.getNetwork() + if !n.isClusterEligible() { + return nil + } + + c := n.getController() + if !ep.isAnonymous() && ep.Iface().Address() != nil { + if err := c.addServiceBinding(ep.svcName, ep.svcID, n.ID(), ep.ID(), ep.Iface().Address().IP); err != nil { + return err + } + + if err := c.agent.networkDB.CreateEntry("endpoint_table", n.ID(), ep.ID(), []byte(fmt.Sprintf("%s,%s,%s,%s", ep.Name(), ep.svcName, + ep.svcID, ep.Iface().Address().IP))); err != nil { + return err + } + } + + for _, te := range ep.joinInfo.driverTableEntries { + if err := c.agent.networkDB.CreateEntry(te.tableName, n.ID(), te.key, te.value); err != nil { + return err + } + } + + return nil +} + +func (ep *endpoint) deleteFromCluster() error { + n := ep.getNetwork() + if !n.isClusterEligible() { + return nil + } + + c := n.getController() + if !ep.isAnonymous() { + if ep.Iface().Address() != nil { + if err := c.rmServiceBinding(ep.svcName, ep.svcID, n.ID(), ep.ID(), ep.Iface().Address().IP); err != nil { + return err + } + } + + if err := c.agent.networkDB.DeleteEntry("endpoint_table", n.ID(), ep.ID()); err != nil { + return err + } + } + + if ep.joinInfo == nil { + return nil + } + + for _, te := range ep.joinInfo.driverTableEntries { + if err := c.agent.networkDB.DeleteEntry(te.tableName, n.ID(), te.key); err != nil { + return err + } + } + + return nil +} + +func (n *network) addDriverWatches() { + if !n.isClusterEligible() { + return + } + + c := n.getController() + for _, tableName := range n.driverTables { + ch, cancel := c.agent.networkDB.Watch(tableName, n.ID(), "") + c.Lock() + c.agent.driverCancelFuncs[n.ID()] = append(c.agent.driverCancelFuncs[n.ID()], cancel) + c.Unlock() + + go c.handleTableEvents(ch, n.handleDriverTableEvent) + d, err := n.driver(false) + if err != nil { + logrus.Errorf("Could not resolve driver %s while walking driver tabl: %v", n.networkType, err) + return + } + + c.agent.networkDB.WalkTable(tableName, func(nid, key string, value []byte) bool { + d.EventNotify(driverapi.Create, n.ID(), tableName, key, value) + return false + }) + } +} + +func (n *network) cancelDriverWatches() { + if !n.isClusterEligible() { + return + } + + c := n.getController() + c.Lock() + cancelFuncs := c.agent.driverCancelFuncs[n.ID()] + delete(c.agent.driverCancelFuncs, n.ID()) + c.Unlock() + + for _, cancel := range cancelFuncs { + cancel() + } +} + +func (c *controller) handleTableEvents(ch chan events.Event, fn func(events.Event)) { + for { + select { + case ev, ok := <-ch: + if !ok { + return + } + + fn(ev) + } + } +} + +func (n *network) handleDriverTableEvent(ev events.Event) { + d, err := n.driver(false) + if err != nil { + logrus.Errorf("Could not resolve driver %s while handling driver table event: %v", n.networkType, err) + return + } + + var ( + etype driverapi.EventType + tname string + key string + value []byte + ) + + switch event := ev.(type) { + case networkdb.CreateEvent: + tname = event.Table + key = event.Key + value = event.Value + etype = driverapi.Create + case networkdb.DeleteEvent: + tname = event.Table + key = event.Key + value = event.Value + etype = driverapi.Delete + case networkdb.UpdateEvent: + tname = event.Table + key = event.Key + value = event.Value + etype = driverapi.Delete + } + + d.EventNotify(etype, n.ID(), tname, key, value) +} + +func (c *controller) handleEpTableEvent(ev events.Event) { + var ( + nid string + eid string + value string + isAdd bool + ) + + switch event := ev.(type) { + case networkdb.CreateEvent: + nid = event.NetworkID + eid = event.Key + value = string(event.Value) + isAdd = true + case networkdb.DeleteEvent: + nid = event.NetworkID + eid = event.Key + value = string(event.Value) + case networkdb.UpdateEvent: + logrus.Errorf("Unexpected update service table event = %#v", event) + } + + nw, err := c.NetworkByID(nid) + if err != nil { + logrus.Errorf("Could not find network %s while handling service table event: %v", nid, err) + return + } + n := nw.(*network) + + vals := strings.Split(value, ",") + if len(vals) < 4 { + logrus.Errorf("Incorrect service table value = %s", value) + return + } + + name := vals[0] + svcName := vals[1] + svcID := vals[2] + ip := net.ParseIP(vals[3]) + + if name == "" || ip == nil { + logrus.Errorf("Invalid endpoint name/ip received while handling service table event %s", value) + return + } + + if isAdd { + if err := c.addServiceBinding(svcName, svcID, nid, eid, ip); err != nil { + logrus.Errorf("Failed adding service binding for value %s: %v", value, err) + return + } + + n.addSvcRecords(name, ip, nil, true) + } else { + if err := c.rmServiceBinding(svcName, svcID, nid, eid, ip); err != nil { + logrus.Errorf("Failed adding service binding for value %s: %v", value, err) + return + } + + n.deleteSvcRecords(name, ip, nil, true) + } +} diff --git a/vendor/src/github.com/docker/libnetwork/bitseq/sequence.go b/vendor/src/github.com/docker/libnetwork/bitseq/sequence.go index 0dc1bc4ad0..550bcbb825 100644 --- a/vendor/src/github.com/docker/libnetwork/bitseq/sequence.go +++ b/vendor/src/github.com/docker/libnetwork/bitseq/sequence.go @@ -370,6 +370,8 @@ func (h *Handle) set(ordinal, start, end uint64, any bool, release bool) (uint64 // checks is needed because to cover the case where the number of bits is not a multiple of blockLen func (h *Handle) validateOrdinal(ordinal uint64) error { + h.Lock() + defer h.Unlock() if ordinal >= h.bits { return fmt.Errorf("bit does not belong to the sequence") } diff --git a/vendor/src/github.com/docker/libnetwork/bitseq/store.go b/vendor/src/github.com/docker/libnetwork/bitseq/store.go index df50331227..5448927eb1 100644 --- a/vendor/src/github.com/docker/libnetwork/bitseq/store.go +++ b/vendor/src/github.com/docker/libnetwork/bitseq/store.go @@ -75,6 +75,10 @@ func (h *Handle) CopyTo(o datastore.KVObject) error { defer h.Unlock() dstH := o.(*Handle) + if h == dstH { + return nil + } + dstH.Lock() dstH.bits = h.bits dstH.unselected = h.unselected dstH.head = h.head.getCopy() @@ -83,6 +87,7 @@ func (h *Handle) CopyTo(o datastore.KVObject) error { dstH.dbIndex = h.dbIndex dstH.dbExists = h.dbExists dstH.store = h.store + dstH.Unlock() return nil } diff --git a/vendor/src/github.com/docker/libnetwork/config/config.go b/vendor/src/github.com/docker/libnetwork/config/config.go index 8da92f7a0e..62d9993a90 100644 --- a/vendor/src/github.com/docker/libnetwork/config/config.go +++ b/vendor/src/github.com/docker/libnetwork/config/config.go @@ -22,9 +22,12 @@ type Config struct { // DaemonCfg represents libnetwork core configuration type DaemonCfg struct { Debug bool + IsAgent bool DataDir string DefaultNetwork string DefaultDriver string + Bind string + Neighbors []string Labels []string DriverCfg map[string]interface{} } @@ -81,6 +84,27 @@ func ParseConfigOptions(cfgOptions ...Option) *Config { // to the controller type Option func(c *Config) +// OptionBind function returns an option setter for setting a bind interface or address +func OptionBind(bind string) Option { + return func(c *Config) { + c.Daemon.Bind = bind + } +} + +// OptionAgent function returns an option setter for setting agent mode +func OptionAgent() Option { + return func(c *Config) { + c.Daemon.IsAgent = true + } +} + +// OptionNeighbors function returns an option setter for setting a list of neighbors to join. +func OptionNeighbors(neighbors []string) Option { + return func(c *Config) { + c.Daemon.Neighbors = neighbors + } +} + // OptionDefaultNetwork function returns an option setter for a default network func OptionDefaultNetwork(dn string) Option { return func(c *Config) { diff --git a/vendor/src/github.com/docker/libnetwork/controller.go b/vendor/src/github.com/docker/libnetwork/controller.go index 0b5ee8746c..fa14b1cf1f 100644 --- a/vendor/src/github.com/docker/libnetwork/controller.go +++ b/vendor/src/github.com/docker/libnetwork/controller.go @@ -15,7 +15,7 @@ create network namespaces and allocate interfaces for containers to use. // Create a network for containers to join. // NewNetwork accepts Variadic optional arguments that libnetwork and Drivers can make use of - network, err := controller.NewNetwork(networkType, "network1") + network, err := controller.NewNetwork(networkType, "network1", "") if err != nil { return } @@ -58,6 +58,7 @@ import ( "github.com/docker/libnetwork/datastore" "github.com/docker/libnetwork/discoverapi" "github.com/docker/libnetwork/driverapi" + "github.com/docker/libnetwork/drvregistry" "github.com/docker/libnetwork/hostdiscovery" "github.com/docker/libnetwork/ipamapi" "github.com/docker/libnetwork/netlabel" @@ -75,7 +76,7 @@ type NetworkController interface { Config() config.Config // Create a new network. The options parameter carries network specific options. - NewNetwork(networkType, name string, options ...NetworkOption) (Network, error) + NewNetwork(networkType, name string, id string, options ...NetworkOption) (Network, error) // Networks returns the list of Network(s) managed by this controller. Networks() []Network @@ -119,55 +120,74 @@ type NetworkWalker func(nw Network) bool // When the function returns true, the walk will stop. type SandboxWalker func(sb Sandbox) bool -type driverData struct { - driver driverapi.Driver - capability driverapi.Capability -} - -type ipamData struct { - driver ipamapi.Ipam - capability *ipamapi.Capability - // default address spaces are provided by ipam driver at registration time - defaultLocalAddressSpace, defaultGlobalAddressSpace string -} - -type driverTable map[string]*driverData -type ipamTable map[string]*ipamData type sandboxTable map[string]*sandbox type controller struct { - id string - drivers driverTable - ipamDrivers ipamTable - sandboxes sandboxTable - cfg *config.Config - stores []datastore.DataStore - discovery hostdiscovery.HostDiscovery - extKeyListener net.Listener - watchCh chan *endpoint - unWatchCh chan *endpoint - svcDb map[string]svcInfo - nmap map[string]*netWatch - defOsSbox osl.Sandbox - sboxOnce sync.Once + id string + drvRegistry *drvregistry.DrvRegistry + sandboxes sandboxTable + cfg *config.Config + stores []datastore.DataStore + discovery hostdiscovery.HostDiscovery + extKeyListener net.Listener + watchCh chan *endpoint + unWatchCh chan *endpoint + svcRecords map[string]svcInfo + nmap map[string]*netWatch + serviceBindings map[string]*service + defOsSbox osl.Sandbox + sboxOnce sync.Once + agent *agent sync.Mutex } +type initializer struct { + fn drvregistry.InitFunc + ntype string +} + // New creates a new instance of network controller. func New(cfgOptions ...config.Option) (NetworkController, error) { c := &controller{ - id: stringid.GenerateRandomID(), - cfg: config.ParseConfigOptions(cfgOptions...), - sandboxes: sandboxTable{}, - drivers: driverTable{}, - ipamDrivers: ipamTable{}, - svcDb: make(map[string]svcInfo), + id: stringid.GenerateRandomID(), + cfg: config.ParseConfigOptions(cfgOptions...), + sandboxes: sandboxTable{}, + svcRecords: make(map[string]svcInfo), + serviceBindings: make(map[string]*service), + } + + if err := c.agentInit(c.cfg.Daemon.Bind); err != nil { + return nil, err + } + + if err := c.agentJoin(c.cfg.Daemon.Neighbors); err != nil { + return nil, err } if err := c.initStores(); err != nil { return nil, err } + drvRegistry, err := drvregistry.New(c.getStore(datastore.LocalScope), c.getStore(datastore.GlobalScope), c.RegisterDriver, nil) + if err != nil { + return nil, err + } + + for _, i := range getInitializers() { + var dcfg map[string]interface{} + + // External plugins don't need config passed through daemon. They can + // bootstrap themselves + if i.ntype != "remote" { + dcfg = c.makeDriverConfig(i.ntype) + } + + if err := drvRegistry.AddDriver(i.ntype, i.fn, dcfg); err != nil { + return nil, err + } + } + c.drvRegistry = drvRegistry + if c.cfg != nil && c.cfg.Cluster.Watcher != nil { if err := c.initDiscovery(c.cfg.Cluster.Watcher); err != nil { // Failing to initialize discovery is a bad situation to be in. @@ -176,15 +196,6 @@ func New(cfgOptions ...config.Option) (NetworkController, error) { } } - if err := initDrivers(c); err != nil { - return nil, err - } - - if err := initIpams(c, c.getStore(datastore.LocalScope), - c.getStore(datastore.GlobalScope)); err != nil { - return nil, err - } - c.sandboxCleanup() c.cleanupLocalEndpoints() c.networkCleanup() @@ -196,8 +207,67 @@ func New(cfgOptions ...config.Option) (NetworkController, error) { return c, nil } +func (c *controller) makeDriverConfig(ntype string) map[string]interface{} { + if c.cfg == nil { + return nil + } + + config := make(map[string]interface{}) + + for _, label := range c.cfg.Daemon.Labels { + if !strings.HasPrefix(netlabel.Key(label), netlabel.DriverPrefix+"."+ntype) { + continue + } + + config[netlabel.Key(label)] = netlabel.Value(label) + } + + drvCfg, ok := c.cfg.Daemon.DriverCfg[ntype] + if ok { + for k, v := range drvCfg.(map[string]interface{}) { + config[k] = v + } + } + + for k, v := range c.cfg.Scopes { + if !v.IsValid() { + continue + } + config[netlabel.MakeKVClient(k)] = discoverapi.DatastoreConfigData{ + Scope: k, + Provider: v.Client.Provider, + Address: v.Client.Address, + Config: v.Client.Config, + } + } + + return config +} + var procReloadConfig = make(chan (bool), 1) +func (c *controller) processAgentConfig(cfg *config.Config) (bool, error) { + if c.cfg.Daemon.IsAgent == cfg.Daemon.IsAgent { + // Agent configuration not changed + return false, nil + } + + c.Lock() + c.cfg = cfg + c.Unlock() + + if err := c.agentInit(c.cfg.Daemon.Bind); err != nil { + return false, err + } + + if err := c.agentJoin(c.cfg.Daemon.Neighbors); err != nil { + c.agentClose() + return false, err + } + + return true, nil +} + func (c *controller) ReloadConfiguration(cfgOptions ...config.Option) error { procReloadConfig <- true defer func() { <-procReloadConfig }() @@ -206,6 +276,16 @@ func (c *controller) ReloadConfiguration(cfgOptions ...config.Option) error { // Refuse the configuration if it alters an existing datastore client configuration. update := false cfg := config.ParseConfigOptions(cfgOptions...) + + isAgentConfig, err := c.processAgentConfig(cfg) + if err != nil { + return err + } + + if isAgentConfig { + return nil + } + for s := range c.cfg.Scopes { if _, ok := cfg.Scopes[s]; !ok { return types.ForbiddenErrorf("cannot accept new configuration because it removes an existing datastore client") @@ -228,16 +308,6 @@ func (c *controller) ReloadConfiguration(cfgOptions ...config.Option) error { return nil } - c.Lock() - c.cfg = cfg - c.Unlock() - - if c.discovery == nil && c.cfg.Cluster.Watcher != nil { - if err := c.initDiscovery(c.cfg.Cluster.Watcher); err != nil { - log.Errorf("Failed to Initialize Discovery after configuration update: %v", err) - } - } - var dsConfig *discoverapi.DatastoreConfigData for scope, sCfg := range cfg.Scopes { if scope == datastore.LocalScope || !sCfg.IsValid() { @@ -255,17 +325,25 @@ func (c *controller) ReloadConfiguration(cfgOptions ...config.Option) error { return nil } - for nm, id := range c.getIpamDrivers() { - err := id.driver.DiscoverNew(discoverapi.DatastoreConfig, *dsConfig) + c.drvRegistry.WalkIPAMs(func(name string, driver ipamapi.Ipam, cap *ipamapi.Capability) bool { + err := driver.DiscoverNew(discoverapi.DatastoreConfig, *dsConfig) if err != nil { - log.Errorf("Failed to set datastore in driver %s: %v", nm, err) + log.Errorf("Failed to set datastore in driver %s: %v", name, err) } - } + return false + }) - for nm, id := range c.getNetDrivers() { - err := id.driver.DiscoverNew(discoverapi.DatastoreConfig, *dsConfig) + c.drvRegistry.WalkDrivers(func(name string, driver driverapi.Driver, capability driverapi.Capability) bool { + err := driver.DiscoverNew(discoverapi.DatastoreConfig, *dsConfig) if err != nil { - log.Errorf("Failed to set datastore in driver %s: %v", nm, err) + log.Errorf("Failed to set datastore in driver %s: %v", name, err) + } + return false + }) + + if c.discovery == nil && c.cfg.Cluster.Watcher != nil { + if err := c.initDiscovery(c.cfg.Cluster.Watcher); err != nil { + log.Errorf("Failed to Initialize Discovery after configuration update: %v", err) } } @@ -333,34 +411,30 @@ func (c *controller) hostLeaveCallback(nodes []net.IP) { } func (c *controller) processNodeDiscovery(nodes []net.IP, add bool) { - c.Lock() - drivers := []*driverData{} - for _, d := range c.drivers { - drivers = append(drivers, d) - } - c.Unlock() - - for _, d := range drivers { - c.pushNodeDiscovery(d, nodes, add) - } + c.drvRegistry.WalkDrivers(func(name string, driver driverapi.Driver, capability driverapi.Capability) bool { + c.pushNodeDiscovery(driver, capability, nodes, add) + return false + }) } -func (c *controller) pushNodeDiscovery(d *driverData, nodes []net.IP, add bool) { +func (c *controller) pushNodeDiscovery(d driverapi.Driver, cap driverapi.Capability, nodes []net.IP, add bool) { var self net.IP if c.cfg != nil { addr := strings.Split(c.cfg.Cluster.Address, ":") self = net.ParseIP(addr[0]) } - if d == nil || d.capability.DataScope != datastore.GlobalScope || nodes == nil { + + if d == nil || cap.DataScope != datastore.GlobalScope || nodes == nil { return } + for _, node := range nodes { nodeData := discoverapi.NodeDiscoveryData{Address: node.String(), Self: node.Equal(self)} var err error if add { - err = d.driver.DiscoverNew(discoverapi.NodeDiscovery, nodeData) + err = d.DiscoverNew(discoverapi.NodeDiscovery, nodeData) } else { - err = d.driver.DiscoverDelete(discoverapi.NodeDiscovery, nodeData) + err = d.DiscoverDelete(discoverapi.NodeDiscovery, nodeData) } if err != nil { log.Debugf("discovery notification error : %v", err) @@ -378,73 +452,36 @@ func (c *controller) Config() config.Config { } func (c *controller) RegisterDriver(networkType string, driver driverapi.Driver, capability driverapi.Capability) error { - if !config.IsValidName(networkType) { - return ErrInvalidName(networkType) - } - c.Lock() - if _, ok := c.drivers[networkType]; ok { - c.Unlock() - return driverapi.ErrActiveRegistration(networkType) - } - dData := &driverData{driver, capability} - c.drivers[networkType] = dData hd := c.discovery c.Unlock() if hd != nil { - c.pushNodeDiscovery(dData, hd.Fetch(), true) + c.pushNodeDiscovery(driver, capability, hd.Fetch(), true) } + c.agentDriverNotify(driver) return nil } -func (c *controller) registerIpamDriver(name string, driver ipamapi.Ipam, caps *ipamapi.Capability) error { - if !config.IsValidName(name) { - return ErrInvalidName(name) - } - - c.Lock() - _, ok := c.ipamDrivers[name] - c.Unlock() - if ok { - return types.ForbiddenErrorf("ipam driver %q already registered", name) - } - locAS, glbAS, err := driver.GetDefaultAddressSpaces() - if err != nil { - return types.InternalErrorf("ipam driver %q failed to return default address spaces: %v", name, err) - } - c.Lock() - c.ipamDrivers[name] = &ipamData{driver: driver, defaultLocalAddressSpace: locAS, defaultGlobalAddressSpace: glbAS, capability: caps} - c.Unlock() - - log.Debugf("Registering ipam driver: %q", name) - - return nil -} - -func (c *controller) RegisterIpamDriver(name string, driver ipamapi.Ipam) error { - return c.registerIpamDriver(name, driver, &ipamapi.Capability{}) -} - -func (c *controller) RegisterIpamDriverWithCapabilities(name string, driver ipamapi.Ipam, caps *ipamapi.Capability) error { - return c.registerIpamDriver(name, driver, caps) -} - // NewNetwork creates a new network of the specified network type. The options // are network specific and modeled in a generic way. -func (c *controller) NewNetwork(networkType, name string, options ...NetworkOption) (Network, error) { +func (c *controller) NewNetwork(networkType, name string, id string, options ...NetworkOption) (Network, error) { if !config.IsValidName(name) { return nil, ErrInvalidName(name) } + if id == "" { + id = stringid.GenerateRandomID() + } + // Construct the network object network := &network{ name: name, networkType: networkType, generic: map[string]interface{}{netlabel.GenericData: make(map[string]string)}, ipamType: ipamapi.DefaultIPAM, - id: stringid.GenerateRandomID(), + id: id, ctrlr: c, persist: true, drvOnce: &sync.Once{}, @@ -468,7 +505,8 @@ func (c *controller) NewNetwork(networkType, name string, options ...NetworkOpti } }() - if err = c.addNetwork(network); err != nil { + err = c.addNetwork(network) + if err != nil { return nil, err } defer func() { @@ -499,6 +537,12 @@ func (c *controller) NewNetwork(networkType, name string, options ...NetworkOpti return nil, err } + if err = network.joinCluster(); err != nil { + log.Errorf("Failed to join network %s into agent cluster: %v", name, err) + } + + network.addDriverWatches() + return network, nil } @@ -509,7 +553,7 @@ func (c *controller) addNetwork(n *network) error { } // Create the network - if err := d.CreateNetwork(n.id, n.generic, n.getIPData(4), n.getIPData(6)); err != nil { + if err := d.CreateNetwork(n.id, n.generic, n, n.getIPData(4), n.getIPData(6)); err != nil { return err } @@ -745,78 +789,47 @@ func SandboxKeyWalker(out *Sandbox, key string) SandboxWalker { } } -func (c *controller) loadDriver(networkType string) (*driverData, error) { +func (c *controller) loadDriver(networkType string) error { // Plugins pkg performs lazy loading of plugins that acts as remote drivers. // As per the design, this Get call will result in remote driver discovery if there is a corresponding plugin available. _, err := plugins.Get(networkType, driverapi.NetworkPluginEndpointType) if err != nil { if err == plugins.ErrNotFound { - return nil, types.NotFoundErrorf(err.Error()) + return types.NotFoundErrorf(err.Error()) } - return nil, err + return err } - c.Lock() - defer c.Unlock() - dd, ok := c.drivers[networkType] - if !ok { - return nil, ErrInvalidNetworkDriver(networkType) - } - return dd, nil + + return nil } -func (c *controller) loadIpamDriver(name string) (*ipamData, error) { +func (c *controller) loadIPAMDriver(name string) error { if _, err := plugins.Get(name, ipamapi.PluginEndpointType); err != nil { if err == plugins.ErrNotFound { - return nil, types.NotFoundErrorf(err.Error()) + return types.NotFoundErrorf(err.Error()) } - return nil, err + return err } - c.Lock() - id, ok := c.ipamDrivers[name] - c.Unlock() - if !ok { - return nil, types.BadRequestErrorf("invalid ipam driver: %q", name) - } - return id, nil + + return nil } -func (c *controller) getIPAM(name string) (id *ipamData, err error) { - var ok bool - c.Lock() - id, ok = c.ipamDrivers[name] - c.Unlock() - if !ok { - id, err = c.loadIpamDriver(name) - } - return id, err -} +func (c *controller) getIPAMDriver(name string) (ipamapi.Ipam, *ipamapi.Capability, error) { + id, cap := c.drvRegistry.IPAM(name) + if id == nil { + // Might be a plugin name. Try loading it + if err := c.loadIPAMDriver(name); err != nil { + return nil, nil, err + } -func (c *controller) getIpamDriver(name string) (ipamapi.Ipam, error) { - id, err := c.getIPAM(name) - if err != nil { - return nil, err + // Now that we resolved the plugin, try again looking up the registry + id, cap = c.drvRegistry.IPAM(name) + if id == nil { + return nil, nil, types.BadRequestErrorf("invalid ipam driver: %q", name) + } } - return id.driver, nil -} -func (c *controller) getIpamDrivers() ipamTable { - c.Lock() - defer c.Unlock() - table := ipamTable{} - for i, d := range c.ipamDrivers { - table[i] = d - } - return table -} - -func (c *controller) getNetDrivers() driverTable { - c.Lock() - defer c.Unlock() - table := driverTable{} - for i, d := range c.drivers { - table[i] = d - } - return table + return id, cap, nil } func (c *controller) Stop() { diff --git a/vendor/src/github.com/docker/libnetwork/datastore/cache.go b/vendor/src/github.com/docker/libnetwork/datastore/cache.go index 08c8ac4839..2d00038290 100644 --- a/vendor/src/github.com/docker/libnetwork/datastore/cache.go +++ b/vendor/src/github.com/docker/libnetwork/datastore/cache.go @@ -5,7 +5,6 @@ import ( "sync" "github.com/docker/libkv/store" - "github.com/docker/libkv/store/boltdb" ) type kvMap map[string]KVObject @@ -42,9 +41,7 @@ func (c *cache) kmap(kvObject KVObject) (kvMap, error) { kvList, err := c.ds.store.List(keyPrefix) if err != nil { - // In case of BoltDB it may return ErrBoltBucketNotFound when no writes - // have ever happened on the db bucket. So check for both err codes - if err == store.ErrKeyNotFound || err == boltdb.ErrBoltBucketNotFound { + if err == store.ErrKeyNotFound { // If the store doesn't have anything then there is nothing to // populate in the cache. Just bail out. goto out diff --git a/vendor/src/github.com/docker/libnetwork/datastore/datastore.go b/vendor/src/github.com/docker/libnetwork/datastore/datastore.go index c15cd620c1..49affc7883 100644 --- a/vendor/src/github.com/docker/libnetwork/datastore/datastore.go +++ b/vendor/src/github.com/docker/libnetwork/datastore/datastore.go @@ -9,10 +9,6 @@ import ( "github.com/docker/libkv" "github.com/docker/libkv/store" - "github.com/docker/libkv/store/boltdb" - "github.com/docker/libkv/store/consul" - "github.com/docker/libkv/store/etcd" - "github.com/docker/libkv/store/zookeeper" "github.com/docker/libnetwork/discoverapi" "github.com/docker/libnetwork/types" ) @@ -148,13 +144,6 @@ func makeDefaultScopes() map[string]*ScopeCfg { var defaultRootChain = []string{"docker", "network", "v1.0"} var rootChain = defaultRootChain -func init() { - consul.Register() - zookeeper.Register() - etcd.Register() - boltdb.Register() -} - // DefaultScopes returns a map of default scopes and it's config for clients to use. func DefaultScopes(dataDir string) map[string]*ScopeCfg { if dataDir != "" { @@ -411,6 +400,9 @@ func (ds *datastore) PutObjectAtomic(kvObject KVObject) error { _, pair, err = ds.store.AtomicPut(Key(kvObject.Key()...), kvObjValue, previous, nil) if err != nil { + if err == store.ErrKeyExists { + return ErrKeyModified + } return err } @@ -571,6 +563,9 @@ func (ds *datastore) DeleteObjectAtomic(kvObject KVObject) error { } if _, err := ds.store.AtomicDelete(Key(kvObject.Key()...), previous); err != nil { + if err == store.ErrKeyExists { + return ErrKeyModified + } return err } diff --git a/vendor/src/github.com/docker/libnetwork/default_gateway_linux.go b/vendor/src/github.com/docker/libnetwork/default_gateway_linux.go index 9376922a21..c08b061898 100644 --- a/vendor/src/github.com/docker/libnetwork/default_gateway_linux.go +++ b/vendor/src/github.com/docker/libnetwork/default_gateway_linux.go @@ -14,7 +14,7 @@ func (c *controller) createGWNetwork() (Network, error) { bridge.EnableIPMasquerade: strconv.FormatBool(true), } - n, err := c.NewNetwork("bridge", libnGWNetwork, + n, err := c.NewNetwork("bridge", libnGWNetwork, "", NetworkOptionDriverOpts(netOption), NetworkOptionEnableIPv6(false), ) diff --git a/vendor/src/github.com/docker/libnetwork/default_gateway_solaris.go b/vendor/src/github.com/docker/libnetwork/default_gateway_solaris.go new file mode 100644 index 0000000000..104781aa34 --- /dev/null +++ b/vendor/src/github.com/docker/libnetwork/default_gateway_solaris.go @@ -0,0 +1,7 @@ +package libnetwork + +import "github.com/docker/libnetwork/types" + +func (c *controller) createGWNetwork() (Network, error) { + return nil, types.NotImplementedErrorf("default gateway functionality is not implemented in solaris") +} diff --git a/vendor/src/github.com/docker/libnetwork/driverapi/driverapi.go b/vendor/src/github.com/docker/libnetwork/driverapi/driverapi.go index 4ea5e11278..f555246ceb 100644 --- a/vendor/src/github.com/docker/libnetwork/driverapi/driverapi.go +++ b/vendor/src/github.com/docker/libnetwork/driverapi/driverapi.go @@ -13,10 +13,25 @@ const NetworkPluginEndpointType = "NetworkDriver" type Driver interface { discoverapi.Discover - // CreateNetwork invokes the driver method to create a network passing - // the network id and network specific config. The config mechanism will - // eventually be replaced with labels which are yet to be introduced. - CreateNetwork(nid string, options map[string]interface{}, ipV4Data, ipV6Data []IPAMData) error + // NetworkAllocate invokes the driver method to allocate network + // specific resources passing network id and network specific config. + // It returns a key,value pair of network specific driver allocations + // to the caller. + NetworkAllocate(nid string, options map[string]string, ipV4Data, ipV6Data []IPAMData) (map[string]string, error) + + // NetworkFree invokes the driver method to free network specific resources + // associated with a given network id. + NetworkFree(nid string) error + + // CreateNetwork invokes the driver method to create a network + // passing the network id and network specific config. The + // config mechanism will eventually be replaced with labels + // which are yet to be introduced. The driver can return a + // list of table names for which it is interested in receiving + // notification when a CRUD operation is performed on any + // entry in that table. This will be ignored for local scope + // drivers. + CreateNetwork(nid string, options map[string]interface{}, nInfo NetworkInfo, ipV4Data, ipV6Data []IPAMData) error // DeleteNetwork invokes the driver method to delete network passing // the network id. @@ -50,10 +65,24 @@ type Driver interface { // programming that was done so far RevokeExternalConnectivity(nid, eid string) error + // EventNotify notifies the driver when a CRUD operation has + // happened on a table of its interest as soon as this node + // receives such an event in the gossip layer. This method is + // only invoked for the global scope driver. + EventNotify(event EventType, nid string, tableName string, key string, value []byte) + // Type returns the the type of this driver, the network type this driver manages Type() string } +// NetworkInfo provides a go interface for drivers to provide network +// specific information to libnetwork. +type NetworkInfo interface { + // TableEventRegister registers driver interest in a given + // table name. + TableEventRegister(tableName string) error +} + // InterfaceInfo provides a go interface for drivers to retrive // network information to interface resources. type InterfaceInfo interface { @@ -102,6 +131,10 @@ type JoinInfo interface { // DisableGatewayService tells libnetwork not to provide Default GW for the container DisableGatewayService() + + // AddTableEntry adds a table entry to the gossip layer + // passing the table name, key and an opaque value. + AddTableEntry(tableName string, key string, value []byte) error } // DriverCallback provides a Callback interface for Drivers into LibNetwork @@ -124,3 +157,15 @@ type IPAMData struct { Gateway *net.IPNet AuxAddresses map[string]*net.IPNet } + +// EventType defines a type for the CRUD event +type EventType uint8 + +const ( + // Create event is generated when a table entry is created, + Create EventType = 1 + iota + // Update event is generated when a table entry is updated. + Update + // Delete event is generated when a table entry is deleted. + Delete +) diff --git a/vendor/src/github.com/docker/libnetwork/drivers.go b/vendor/src/github.com/docker/libnetwork/drivers.go deleted file mode 100644 index 566d330ff4..0000000000 --- a/vendor/src/github.com/docker/libnetwork/drivers.go +++ /dev/null @@ -1,84 +0,0 @@ -package libnetwork - -import ( - "strings" - - "github.com/docker/libnetwork/discoverapi" - "github.com/docker/libnetwork/driverapi" - "github.com/docker/libnetwork/ipamapi" - "github.com/docker/libnetwork/netlabel" - - builtinIpam "github.com/docker/libnetwork/ipams/builtin" - nullIpam "github.com/docker/libnetwork/ipams/null" - remoteIpam "github.com/docker/libnetwork/ipams/remote" -) - -type initializer struct { - fn func(driverapi.DriverCallback, map[string]interface{}) error - ntype string -} - -func initDrivers(c *controller) error { - for _, i := range getInitializers() { - if err := i.fn(c, makeDriverConfig(c, i.ntype)); err != nil { - return err - } - } - - return nil -} - -func makeDriverConfig(c *controller, ntype string) map[string]interface{} { - if c.cfg == nil { - return nil - } - - config := make(map[string]interface{}) - - for _, label := range c.cfg.Daemon.Labels { - if !strings.HasPrefix(netlabel.Key(label), netlabel.DriverPrefix+"."+ntype) { - continue - } - - config[netlabel.Key(label)] = netlabel.Value(label) - } - - drvCfg, ok := c.cfg.Daemon.DriverCfg[ntype] - if ok { - for k, v := range drvCfg.(map[string]interface{}) { - config[k] = v - } - } - - // We don't send datastore configs to external plugins - if ntype == "remote" { - return config - } - - for k, v := range c.cfg.Scopes { - if !v.IsValid() { - continue - } - config[netlabel.MakeKVClient(k)] = discoverapi.DatastoreConfigData{ - Scope: k, - Provider: v.Client.Provider, - Address: v.Client.Address, - Config: v.Client.Config, - } - } - - return config -} - -func initIpams(ic ipamapi.Callback, lDs, gDs interface{}) error { - for _, fn := range [](func(ipamapi.Callback, interface{}, interface{}) error){ - builtinIpam.Init, - remoteIpam.Init, - nullIpam.Init, - } { - if err := fn(ic, lDs, gDs); err != nil { - return err - } - } - return nil -} diff --git a/vendor/src/github.com/docker/libnetwork/drivers/bridge/bridge.go b/vendor/src/github.com/docker/libnetwork/drivers/bridge/bridge.go index a2ec5ce8dc..baa38db5a4 100644 --- a/vendor/src/github.com/docker/libnetwork/drivers/bridge/bridge.go +++ b/vendor/src/github.com/docker/libnetwork/drivers/bridge/bridge.go @@ -535,8 +535,19 @@ func (d *driver) getNetworks() []*bridgeNetwork { return ls } +func (d *driver) NetworkAllocate(id string, option map[string]string, ipV4Data, ipV6Data []driverapi.IPAMData) (map[string]string, error) { + return nil, types.NotImplementedErrorf("not implemented") +} + +func (d *driver) NetworkFree(id string) error { + return types.NotImplementedErrorf("not implemented") +} + +func (d *driver) EventNotify(etype driverapi.EventType, nid, tableName, key string, value []byte) { +} + // Create a new network using bridge plugin -func (d *driver) CreateNetwork(id string, option map[string]interface{}, ipV4Data, ipV6Data []driverapi.IPAMData) error { +func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo driverapi.NetworkInfo, ipV4Data, ipV6Data []driverapi.IPAMData) error { if len(ipV4Data) == 0 || ipV4Data[0].Pool.String() == "0.0.0.0/0" { return types.BadRequestErrorf("ipv4 pool is empty") } diff --git a/vendor/src/github.com/docker/libnetwork/drivers/bridge/bridge_store.go b/vendor/src/github.com/docker/libnetwork/drivers/bridge/bridge_store.go index eca72bd30b..de9635289a 100644 --- a/vendor/src/github.com/docker/libnetwork/drivers/bridge/bridge_store.go +++ b/vendor/src/github.com/docker/libnetwork/drivers/bridge/bridge_store.go @@ -6,7 +6,6 @@ import ( "net" "github.com/Sirupsen/logrus" - "github.com/docker/libkv/store/boltdb" "github.com/docker/libnetwork/datastore" "github.com/docker/libnetwork/discoverapi" "github.com/docker/libnetwork/netlabel" @@ -35,7 +34,7 @@ func (d *driver) initStore(option map[string]interface{}) error { func (d *driver) populateNetworks() error { kvol, err := d.store.List(datastore.Key(bridgePrefix), &networkConfiguration{}) - if err != nil && err != datastore.ErrKeyNotFound && err != boltdb.ErrBoltBucketNotFound { + if err != nil && err != datastore.ErrKeyNotFound { return fmt.Errorf("failed to get bridge network configurations from store: %v", err) } diff --git a/vendor/src/github.com/docker/libnetwork/drivers/host/host.go b/vendor/src/github.com/docker/libnetwork/drivers/host/host.go index bbf59c204c..bec64465a0 100644 --- a/vendor/src/github.com/docker/libnetwork/drivers/host/host.go +++ b/vendor/src/github.com/docker/libnetwork/drivers/host/host.go @@ -24,7 +24,18 @@ func Init(dc driverapi.DriverCallback, config map[string]interface{}) error { return dc.RegisterDriver(networkType, &driver{}, c) } -func (d *driver) CreateNetwork(id string, option map[string]interface{}, ipV4Data, ipV6Data []driverapi.IPAMData) error { +func (d *driver) NetworkAllocate(id string, option map[string]string, ipV4Data, ipV6Data []driverapi.IPAMData) (map[string]string, error) { + return nil, types.NotImplementedErrorf("not implemented") +} + +func (d *driver) NetworkFree(id string) error { + return types.NotImplementedErrorf("not implemented") +} + +func (d *driver) EventNotify(etype driverapi.EventType, nid, tableName, key string, value []byte) { +} + +func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo driverapi.NetworkInfo, ipV4Data, ipV6Data []driverapi.IPAMData) error { d.Lock() defer d.Unlock() diff --git a/vendor/src/github.com/docker/libnetwork/drivers/ipvlan/ipvlan.go b/vendor/src/github.com/docker/libnetwork/drivers/ipvlan/ipvlan.go index a8935710bf..8ea44fcbb4 100644 --- a/vendor/src/github.com/docker/libnetwork/drivers/ipvlan/ipvlan.go +++ b/vendor/src/github.com/docker/libnetwork/drivers/ipvlan/ipvlan.go @@ -8,6 +8,7 @@ import ( "github.com/docker/libnetwork/discoverapi" "github.com/docker/libnetwork/driverapi" "github.com/docker/libnetwork/osl" + "github.com/docker/libnetwork/types" ) const ( @@ -64,6 +65,14 @@ func Init(dc driverapi.DriverCallback, config map[string]interface{}) error { return dc.RegisterDriver(ipvlanType, d, c) } +func (d *driver) NetworkAllocate(id string, option map[string]string, ipV4Data, ipV6Data []driverapi.IPAMData) (map[string]string, error) { + return nil, types.NotImplementedErrorf("not implemented") +} + +func (d *driver) NetworkFree(id string) error { + return types.NotImplementedErrorf("not implemented") +} + func (d *driver) EndpointOperInfo(nid, eid string) (map[string]interface{}, error) { return make(map[string]interface{}, 0), nil } @@ -89,3 +98,6 @@ func (d *driver) DiscoverNew(dType discoverapi.DiscoveryType, data interface{}) func (d *driver) DiscoverDelete(dType discoverapi.DiscoveryType, data interface{}) error { return nil } + +func (d *driver) EventNotify(etype driverapi.EventType, nid, tableName, key string, value []byte) { +} diff --git a/vendor/src/github.com/docker/libnetwork/drivers/ipvlan/ipvlan_network.go b/vendor/src/github.com/docker/libnetwork/drivers/ipvlan/ipvlan_network.go index f2f307daa3..7a58a382d0 100644 --- a/vendor/src/github.com/docker/libnetwork/drivers/ipvlan/ipvlan_network.go +++ b/vendor/src/github.com/docker/libnetwork/drivers/ipvlan/ipvlan_network.go @@ -14,7 +14,7 @@ import ( ) // CreateNetwork the network for the specified driver type -func (d *driver) CreateNetwork(nid string, option map[string]interface{}, ipV4Data, ipV6Data []driverapi.IPAMData) error { +func (d *driver) CreateNetwork(nid string, option map[string]interface{}, nInfo driverapi.NetworkInfo, ipV4Data, ipV6Data []driverapi.IPAMData) error { defer osl.InitOSContext()() kv, err := kernel.GetKernelVersion() if err != nil { diff --git a/vendor/src/github.com/docker/libnetwork/drivers/ipvlan/ipvlan_store.go b/vendor/src/github.com/docker/libnetwork/drivers/ipvlan/ipvlan_store.go index f6746da670..c6430835ae 100644 --- a/vendor/src/github.com/docker/libnetwork/drivers/ipvlan/ipvlan_store.go +++ b/vendor/src/github.com/docker/libnetwork/drivers/ipvlan/ipvlan_store.go @@ -5,7 +5,6 @@ import ( "fmt" "github.com/Sirupsen/logrus" - "github.com/docker/libkv/store/boltdb" "github.com/docker/libnetwork/datastore" "github.com/docker/libnetwork/discoverapi" "github.com/docker/libnetwork/netlabel" @@ -60,7 +59,7 @@ func (d *driver) initStore(option map[string]interface{}) error { // populateNetworks is invoked at driver init to recreate persistently stored networks func (d *driver) populateNetworks() error { kvol, err := d.store.List(datastore.Key(ipvlanPrefix), &configuration{}) - if err != nil && err != datastore.ErrKeyNotFound && err != boltdb.ErrBoltBucketNotFound { + if err != nil && err != datastore.ErrKeyNotFound { return fmt.Errorf("failed to get ipvlan network configurations from store: %v", err) } // If empty it simply means no ipvlan networks have been created yet diff --git a/vendor/src/github.com/docker/libnetwork/drivers/macvlan/macvlan.go b/vendor/src/github.com/docker/libnetwork/drivers/macvlan/macvlan.go index 728ab0ead3..5ace97f90c 100644 --- a/vendor/src/github.com/docker/libnetwork/drivers/macvlan/macvlan.go +++ b/vendor/src/github.com/docker/libnetwork/drivers/macvlan/macvlan.go @@ -8,6 +8,7 @@ import ( "github.com/docker/libnetwork/discoverapi" "github.com/docker/libnetwork/driverapi" "github.com/docker/libnetwork/osl" + "github.com/docker/libnetwork/types" ) const ( @@ -66,6 +67,14 @@ func Init(dc driverapi.DriverCallback, config map[string]interface{}) error { return dc.RegisterDriver(macvlanType, d, c) } +func (d *driver) NetworkAllocate(id string, option map[string]string, ipV4Data, ipV6Data []driverapi.IPAMData) (map[string]string, error) { + return nil, types.NotImplementedErrorf("not implemented") +} + +func (d *driver) NetworkFree(id string) error { + return types.NotImplementedErrorf("not implemented") +} + func (d *driver) EndpointOperInfo(nid, eid string) (map[string]interface{}, error) { return make(map[string]interface{}, 0), nil } @@ -91,3 +100,6 @@ func (d *driver) DiscoverNew(dType discoverapi.DiscoveryType, data interface{}) func (d *driver) DiscoverDelete(dType discoverapi.DiscoveryType, data interface{}) error { return nil } + +func (d *driver) EventNotify(etype driverapi.EventType, nid, tableName, key string, value []byte) { +} diff --git a/vendor/src/github.com/docker/libnetwork/drivers/macvlan/macvlan_network.go b/vendor/src/github.com/docker/libnetwork/drivers/macvlan/macvlan_network.go index 78f906fbf5..071fe349f3 100644 --- a/vendor/src/github.com/docker/libnetwork/drivers/macvlan/macvlan_network.go +++ b/vendor/src/github.com/docker/libnetwork/drivers/macvlan/macvlan_network.go @@ -14,7 +14,7 @@ import ( ) // CreateNetwork the network for the specified driver type -func (d *driver) CreateNetwork(nid string, option map[string]interface{}, ipV4Data, ipV6Data []driverapi.IPAMData) error { +func (d *driver) CreateNetwork(nid string, option map[string]interface{}, nInfo driverapi.NetworkInfo, ipV4Data, ipV6Data []driverapi.IPAMData) error { defer osl.InitOSContext()() kv, err := kernel.GetKernelVersion() if err != nil { diff --git a/vendor/src/github.com/docker/libnetwork/drivers/macvlan/macvlan_store.go b/vendor/src/github.com/docker/libnetwork/drivers/macvlan/macvlan_store.go index 492ea93f71..5f92feadd4 100644 --- a/vendor/src/github.com/docker/libnetwork/drivers/macvlan/macvlan_store.go +++ b/vendor/src/github.com/docker/libnetwork/drivers/macvlan/macvlan_store.go @@ -5,7 +5,6 @@ import ( "fmt" "github.com/Sirupsen/logrus" - "github.com/docker/libkv/store/boltdb" "github.com/docker/libnetwork/datastore" "github.com/docker/libnetwork/discoverapi" "github.com/docker/libnetwork/netlabel" @@ -60,7 +59,7 @@ func (d *driver) initStore(option map[string]interface{}) error { // populateNetworks is invoked at driver init to recreate persistently stored networks func (d *driver) populateNetworks() error { kvol, err := d.store.List(datastore.Key(macvlanPrefix), &configuration{}) - if err != nil && err != datastore.ErrKeyNotFound && err != boltdb.ErrBoltBucketNotFound { + if err != nil && err != datastore.ErrKeyNotFound { return fmt.Errorf("failed to get macvlan network configurations from store: %v", err) } // If empty it simply means no macvlan networks have been created yet diff --git a/vendor/src/github.com/docker/libnetwork/drivers/null/null.go b/vendor/src/github.com/docker/libnetwork/drivers/null/null.go index ecc64d2db3..a137b000fa 100644 --- a/vendor/src/github.com/docker/libnetwork/drivers/null/null.go +++ b/vendor/src/github.com/docker/libnetwork/drivers/null/null.go @@ -24,7 +24,18 @@ func Init(dc driverapi.DriverCallback, config map[string]interface{}) error { return dc.RegisterDriver(networkType, &driver{}, c) } -func (d *driver) CreateNetwork(id string, option map[string]interface{}, ipV4Data, ipV6Data []driverapi.IPAMData) error { +func (d *driver) NetworkAllocate(id string, option map[string]string, ipV4Data, ipV6Data []driverapi.IPAMData) (map[string]string, error) { + return nil, types.NotImplementedErrorf("not implemented") +} + +func (d *driver) NetworkFree(id string) error { + return types.NotImplementedErrorf("not implemented") +} + +func (d *driver) EventNotify(etype driverapi.EventType, nid, tableName, key string, value []byte) { +} + +func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo driverapi.NetworkInfo, ipV4Data, ipV6Data []driverapi.IPAMData) error { d.Lock() defer d.Unlock() diff --git a/vendor/src/github.com/docker/libnetwork/drivers/overlay/filter.go b/vendor/src/github.com/docker/libnetwork/drivers/overlay/filter.go index 0a69c6715b..2bf76b33a5 100644 --- a/vendor/src/github.com/docker/libnetwork/drivers/overlay/filter.go +++ b/vendor/src/github.com/docker/libnetwork/drivers/overlay/filter.go @@ -21,14 +21,18 @@ func chainExists(cname string) bool { } func setupGlobalChain() { - if err := iptables.RawCombinedOutput("-N", globalChain); err != nil { - logrus.Errorf("could not create global overlay chain: %v", err) - return + // Because of an ungraceful shutdown, chain could already be present + if !chainExists(globalChain) { + if err := iptables.RawCombinedOutput("-N", globalChain); err != nil { + logrus.Errorf("could not create global overlay chain: %v", err) + return + } } - if err := iptables.RawCombinedOutput("-A", globalChain, "-j", "RETURN"); err != nil { - logrus.Errorf("could not install default return chain in the overlay global chain: %v", err) - return + if !iptables.Exists(iptables.Filter, globalChain, "-j", "RETURN") { + if err := iptables.RawCombinedOutput("-A", globalChain, "-j", "RETURN"); err != nil { + logrus.Errorf("could not install default return chain in the overlay global chain: %v", err) + } } } diff --git a/vendor/src/github.com/docker/libnetwork/drivers/overlay/joinleave.go b/vendor/src/github.com/docker/libnetwork/drivers/overlay/joinleave.go index f9567d7dad..46efd3f051 100644 --- a/vendor/src/github.com/docker/libnetwork/drivers/overlay/joinleave.go +++ b/vendor/src/github.com/docker/libnetwork/drivers/overlay/joinleave.go @@ -3,6 +3,7 @@ package overlay import ( "fmt" "net" + "strings" log "github.com/Sirupsen/logrus" "github.com/docker/libnetwork/driverapi" @@ -104,11 +105,55 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo, d.peerDbAdd(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac, net.ParseIP(d.bindAddress), true) + + if err := jinfo.AddTableEntry(ovPeerTable, eid, []byte(fmt.Sprintf("%s,%s,%s", ep.addr, ep.mac, d.bindAddress))); err != nil { + log.Errorf("overlay: Failed adding table entry to joininfo: %v", err) + } + d.pushLocalEndpointEvent("join", nid, eid) return nil } +func (d *driver) EventNotify(etype driverapi.EventType, nid, tableName, key string, value []byte) { + if tableName != ovPeerTable { + log.Errorf("Unexpected table notification for table %s received", tableName) + return + } + + eid := key + values := strings.Split(string(value), ",") + if len(values) < 3 { + log.Errorf("Invalid value %s received through event notify", string(value)) + return + } + + addr, err := types.ParseCIDR(values[0]) + if err != nil { + log.Errorf("Invalid peer IP %s received in event notify", values[0]) + return + } + + mac, err := net.ParseMAC(values[1]) + if err != nil { + log.Errorf("Invalid mac %s received in event notify", values[1]) + return + } + + vtep := net.ParseIP(values[2]) + if vtep == nil { + log.Errorf("Invalid VTEP %s received in event notify", values[2]) + return + } + + if etype == driverapi.Delete { + d.peerDelete(nid, eid, addr.IP, addr.Mask, mac, vtep, true) + return + } + + d.peerAdd(nid, eid, addr.IP, addr.Mask, mac, vtep, true) +} + // Leave method is invoked when a Sandbox detaches from an endpoint. func (d *driver) Leave(nid, eid string) error { if err := validateID(nid, eid); err != nil { diff --git a/vendor/src/github.com/docker/libnetwork/drivers/overlay/ov_network.go b/vendor/src/github.com/docker/libnetwork/drivers/overlay/ov_network.go index 18e527a2ba..893f8da314 100644 --- a/vendor/src/github.com/docker/libnetwork/drivers/overlay/ov_network.go +++ b/vendor/src/github.com/docker/libnetwork/drivers/overlay/ov_network.go @@ -6,6 +6,7 @@ import ( "net" "os" "path/filepath" + "strconv" "strings" "sync" "syscall" @@ -13,6 +14,7 @@ import ( "github.com/Sirupsen/logrus" "github.com/docker/libnetwork/datastore" "github.com/docker/libnetwork/driverapi" + "github.com/docker/libnetwork/netlabel" "github.com/docker/libnetwork/netutils" "github.com/docker/libnetwork/osl" "github.com/docker/libnetwork/resolvconf" @@ -59,7 +61,15 @@ type network struct { sync.Mutex } -func (d *driver) CreateNetwork(id string, option map[string]interface{}, ipV4Data, ipV6Data []driverapi.IPAMData) error { +func (d *driver) NetworkAllocate(id string, option map[string]string, ipV4Data, ipV6Data []driverapi.IPAMData) (map[string]string, error) { + return nil, types.NotImplementedErrorf("not implemented") +} + +func (d *driver) NetworkFree(id string) error { + return types.NotImplementedErrorf("not implemented") +} + +func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo driverapi.NetworkInfo, ipV4Data, ipV6Data []driverapi.IPAMData) error { if id == "" { return fmt.Errorf("invalid network id") } @@ -81,12 +91,40 @@ func (d *driver) CreateNetwork(id string, option map[string]interface{}, ipV4Dat subnets: []*subnet{}, } - for _, ipd := range ipV4Data { + vnis := make([]uint32, 0, len(ipV4Data)) + if gval, ok := option[netlabel.GenericData]; ok { + optMap := gval.(map[string]string) + if val, ok := optMap[netlabel.OverlayVxlanIDList]; ok { + logrus.Debugf("overlay: Received vxlan IDs: %s", val) + vniStrings := strings.Split(val, ",") + for _, vniStr := range vniStrings { + vni, err := strconv.Atoi(vniStr) + if err != nil { + return fmt.Errorf("invalid vxlan id value %q passed", vniStr) + } + + vnis = append(vnis, uint32(vni)) + } + } + } + + // If we are getting vnis from libnetwork, either we get for + // all subnets or none. + if len(vnis) != 0 && len(vnis) < len(ipV4Data) { + return fmt.Errorf("insufficient vnis(%d) passed to overlay", len(vnis)) + } + + for i, ipd := range ipV4Data { s := &subnet{ subnetIP: ipd.Pool, gwIP: ipd.Gateway, once: &sync.Once{}, } + + if len(vnis) != 0 { + s.vni = vnis[i] + } + n.subnets = append(n.subnets, s) } @@ -94,8 +132,13 @@ func (d *driver) CreateNetwork(id string, option map[string]interface{}, ipV4Dat return fmt.Errorf("failed to update data store for network %v: %v", n.id, err) } - d.addNetwork(n) + if nInfo != nil { + if err := nInfo.TableEventRegister(ovPeerTable); err != nil { + return err + } + } + d.addNetwork(n) return nil } @@ -244,11 +287,21 @@ func setHostMode() { } func (n *network) generateVxlanName(s *subnet) string { - return "vx-" + fmt.Sprintf("%06x", n.vxlanID(s)) + "-" + n.id[:5] + id := n.id + if len(n.id) > 5 { + id = n.id[:5] + } + + return "vx-" + fmt.Sprintf("%06x", n.vxlanID(s)) + "-" + id } func (n *network) generateBridgeName(s *subnet) string { - return "ov-" + fmt.Sprintf("%06x", n.vxlanID(s)) + "-" + n.id[:5] + id := n.id + if len(n.id) > 5 { + id = n.id[:5] + } + + return "ov-" + fmt.Sprintf("%06x", n.vxlanID(s)) + "-" + id } func isOverlap(nw *net.IPNet) bool { @@ -395,9 +448,10 @@ func (n *network) watchMiss(nlSock *nl.NetlinkSocket) { continue } - if neigh.IP.To16() != nil { + if neigh.IP.To4() == nil { continue } + logrus.Debugf("miss notification for dest IP, %v", neigh.IP.String()) if neigh.State&(netlink.NUD_STALE|netlink.NUD_INCOMPLETE) == 0 { continue @@ -575,32 +629,38 @@ func (n *network) DataScope() string { } func (n *network) writeToStore() error { + if n.driver.store == nil { + return nil + } + return n.driver.store.PutObjectAtomic(n) } func (n *network) releaseVxlanID() error { - if n.driver.store == nil { - return fmt.Errorf("no datastore configured. cannot release vxlan id") - } - if len(n.subnets) == 0 { return nil } - if err := n.driver.store.DeleteObjectAtomic(n); err != nil { - if err == datastore.ErrKeyModified || err == datastore.ErrKeyNotFound { - // In both the above cases we can safely assume that the key has been removed by some other - // instance and so simply get out of here - return nil - } + if n.driver.store != nil { + if err := n.driver.store.DeleteObjectAtomic(n); err != nil { + if err == datastore.ErrKeyModified || err == datastore.ErrKeyNotFound { + // In both the above cases we can safely assume that the key has been removed by some other + // instance and so simply get out of here + return nil + } - return fmt.Errorf("failed to delete network to vxlan id map: %v", err) + return fmt.Errorf("failed to delete network to vxlan id map: %v", err) + } } for _, s := range n.subnets { - n.driver.vxlanIdm.Release(uint64(n.vxlanID(s))) + if n.driver.vxlanIdm != nil { + n.driver.vxlanIdm.Release(uint64(n.vxlanID(s))) + } + n.setVxlanID(s, 0) } + return nil } @@ -611,7 +671,7 @@ func (n *network) obtainVxlanID(s *subnet) error { } if n.driver.store == nil { - return fmt.Errorf("no datastore configured. cannot obtain vxlan id") + return fmt.Errorf("no valid vxlan id and no datastore configured, cannot obtain vxlan id") } for { diff --git a/vendor/src/github.com/docker/libnetwork/drivers/overlay/overlay.go b/vendor/src/github.com/docker/libnetwork/drivers/overlay/overlay.go index 80fc19b7e4..e4d487adaa 100644 --- a/vendor/src/github.com/docker/libnetwork/drivers/overlay/overlay.go +++ b/vendor/src/github.com/docker/libnetwork/drivers/overlay/overlay.go @@ -88,7 +88,7 @@ func Fini(drv driverapi.Driver) { func (d *driver) configure() error { if d.store == nil { - return types.NoServiceErrorf("datastore is not available") + return nil } if d.vxlanIdm == nil { @@ -147,10 +147,14 @@ func (d *driver) nodeJoin(node string, self bool) { d.Lock() d.bindAddress = node d.Unlock() - err := d.serfInit() - if err != nil { - logrus.Errorf("initializing serf instance failed: %v", err) - return + + // If there is no cluster store there is no need to start serf. + if d.store != nil { + err := d.serfInit() + if err != nil { + logrus.Errorf("initializing serf instance failed: %v", err) + return + } } } diff --git a/vendor/src/github.com/docker/libnetwork/drivers/overlay/peerdb.go b/vendor/src/github.com/docker/libnetwork/drivers/overlay/peerdb.go index c820da9f05..3676136434 100644 --- a/vendor/src/github.com/docker/libnetwork/drivers/overlay/peerdb.go +++ b/vendor/src/github.com/docker/libnetwork/drivers/overlay/peerdb.go @@ -7,6 +7,8 @@ import ( "syscall" ) +const ovPeerTable = "overlay_peer_table" + type peerKey struct { peerIP net.IP peerMac net.HardwareAddr diff --git a/vendor/src/github.com/docker/libnetwork/drivers/remote/driver.go b/vendor/src/github.com/docker/libnetwork/drivers/remote/driver.go index e3f2cd58e2..5383d9ee7a 100644 --- a/vendor/src/github.com/docker/libnetwork/drivers/remote/driver.go +++ b/vendor/src/github.com/docker/libnetwork/drivers/remote/driver.go @@ -83,7 +83,18 @@ func (d *driver) call(methodName string, arg interface{}, retVal maybeError) err return nil } -func (d *driver) CreateNetwork(id string, options map[string]interface{}, ipV4Data, ipV6Data []driverapi.IPAMData) error { +func (d *driver) NetworkAllocate(id string, option map[string]string, ipV4Data, ipV6Data []driverapi.IPAMData) (map[string]string, error) { + return nil, types.NotImplementedErrorf("not implemented") +} + +func (d *driver) NetworkFree(id string) error { + return types.NotImplementedErrorf("not implemented") +} + +func (d *driver) EventNotify(etype driverapi.EventType, nid, tableName, key string, value []byte) { +} + +func (d *driver) CreateNetwork(id string, options map[string]interface{}, nInfo driverapi.NetworkInfo, ipV4Data, ipV6Data []driverapi.IPAMData) error { create := &api.CreateNetworkRequest{ NetworkID: id, Options: options, diff --git a/vendor/src/github.com/docker/libnetwork/drivers/windows/windows.go b/vendor/src/github.com/docker/libnetwork/drivers/windows/windows.go index 95467a8615..aa4c7e5808 100644 --- a/vendor/src/github.com/docker/libnetwork/drivers/windows/windows.go +++ b/vendor/src/github.com/docker/libnetwork/drivers/windows/windows.go @@ -149,8 +149,11 @@ func (c *networkConfiguration) processIPAM(id string, ipamV4Data, ipamV6Data []d return nil } +func (d *driver) EventNotify(etype driverapi.EventType, nid, tableName, key string, value []byte) { +} + // Create a new network -func (d *driver) CreateNetwork(id string, option map[string]interface{}, ipV4Data, ipV6Data []driverapi.IPAMData) error { +func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo driverapi.NetworkInfo, ipV4Data, ipV6Data []driverapi.IPAMData) error { if _, err := d.getNetwork(id); err == nil { return types.ForbiddenErrorf("network %s exists", id) } @@ -414,6 +417,10 @@ func (d *driver) CreateEndpoint(nid, eid string, ifInfo driverapi.InterfaceInfo, } endpointStruct.Policies = append(endpointStruct.Policies, qosPolicies...) + if ifInfo.Address() != nil { + endpointStruct.IPAddress = ifInfo.Address().IP + } + configurationb, err := json.Marshal(endpointStruct) if err != nil { return err @@ -449,8 +456,13 @@ func (d *driver) CreateEndpoint(nid, eid string, ifInfo driverapi.InterfaceInfo, n.endpoints[eid] = endpoint n.Unlock() - ifInfo.SetIPAddress(endpoint.addr) - ifInfo.SetMacAddress(endpoint.macAddress) + if ifInfo.Address() == nil { + ifInfo.SetIPAddress(endpoint.addr) + } + + if macAddress == nil { + ifInfo.SetMacAddress(endpoint.macAddress) + } return nil } @@ -560,6 +572,14 @@ func (d *driver) RevokeExternalConnectivity(nid, eid string) error { return nil } +func (d *driver) NetworkAllocate(id string, option map[string]string, ipV4Data, ipV6Data []driverapi.IPAMData) (map[string]string, error) { + return nil, types.NotImplementedErrorf("not implemented") +} + +func (d *driver) NetworkFree(id string) error { + return types.NotImplementedErrorf("not implemented") +} + func (d *driver) Type() string { return d.name } diff --git a/vendor/src/github.com/docker/libnetwork/drivers_solaris.go b/vendor/src/github.com/docker/libnetwork/drivers_solaris.go new file mode 100644 index 0000000000..89ae42c58c --- /dev/null +++ b/vendor/src/github.com/docker/libnetwork/drivers_solaris.go @@ -0,0 +1,5 @@ +package libnetwork + +func getInitializers() []initializer { + return []initializer{} +} diff --git a/vendor/src/github.com/docker/libnetwork/drvregistry/drvregistry.go b/vendor/src/github.com/docker/libnetwork/drvregistry/drvregistry.go new file mode 100644 index 0000000000..6c1804dfa9 --- /dev/null +++ b/vendor/src/github.com/docker/libnetwork/drvregistry/drvregistry.go @@ -0,0 +1,241 @@ +package drvregistry + +import ( + "fmt" + "strings" + "sync" + + "github.com/docker/libnetwork/driverapi" + "github.com/docker/libnetwork/ipamapi" + "github.com/docker/libnetwork/types" + + builtinIpam "github.com/docker/libnetwork/ipams/builtin" + nullIpam "github.com/docker/libnetwork/ipams/null" + remoteIpam "github.com/docker/libnetwork/ipams/remote" +) + +type driverData struct { + driver driverapi.Driver + capability driverapi.Capability +} + +type ipamData struct { + driver ipamapi.Ipam + capability *ipamapi.Capability + // default address spaces are provided by ipam driver at registration time + defaultLocalAddressSpace, defaultGlobalAddressSpace string +} + +type driverTable map[string]*driverData +type ipamTable map[string]*ipamData + +// DrvRegistry holds the registry of all network drivers and IPAM drivers that it knows about. +type DrvRegistry struct { + sync.Mutex + drivers driverTable + ipamDrivers ipamTable + dfn DriverNotifyFunc + ifn IPAMNotifyFunc +} + +// Functors definition + +// InitFunc defines the driver initialization function signature. +type InitFunc func(driverapi.DriverCallback, map[string]interface{}) error + +// IPAMWalkFunc defines the IPAM driver table walker function signature. +type IPAMWalkFunc func(name string, driver ipamapi.Ipam, cap *ipamapi.Capability) bool + +// DriverWalkFunc defines the network driver table walker function signature. +type DriverWalkFunc func(name string, driver driverapi.Driver, capability driverapi.Capability) bool + +// IPAMNotifyFunc defines the notify function signature when a new IPAM driver gets registered. +type IPAMNotifyFunc func(name string, driver ipamapi.Ipam, cap *ipamapi.Capability) error + +// DriverNotifyFunc defines the notify function signature when a new network driver gets registered. +type DriverNotifyFunc func(name string, driver driverapi.Driver, capability driverapi.Capability) error + +// New retruns a new driver registry handle. +func New(lDs, gDs interface{}, dfn DriverNotifyFunc, ifn IPAMNotifyFunc) (*DrvRegistry, error) { + r := &DrvRegistry{ + drivers: make(driverTable), + ipamDrivers: make(ipamTable), + dfn: dfn, + ifn: ifn, + } + + if err := r.initIPAMs(lDs, gDs); err != nil { + return nil, err + } + + return r, nil +} + +// AddDriver adds a network driver to the registry. +func (r *DrvRegistry) AddDriver(ntype string, fn InitFunc, config map[string]interface{}) error { + return fn(r, config) +} + +// WalkIPAMs walks the IPAM drivers registered in the registry and invokes the passed walk function and each one of them. +func (r *DrvRegistry) WalkIPAMs(ifn IPAMWalkFunc) { + type ipamVal struct { + name string + data *ipamData + } + + r.Lock() + ivl := make([]ipamVal, 0, len(r.ipamDrivers)) + for k, v := range r.ipamDrivers { + ivl = append(ivl, ipamVal{name: k, data: v}) + } + r.Unlock() + + for _, iv := range ivl { + if ifn(iv.name, iv.data.driver, iv.data.capability) { + break + } + } +} + +// WalkDrivers walks the network drivers registered in the registry and invokes the passed walk function and each one of them. +func (r *DrvRegistry) WalkDrivers(dfn DriverWalkFunc) { + type driverVal struct { + name string + data *driverData + } + + r.Lock() + dvl := make([]driverVal, 0, len(r.drivers)) + for k, v := range r.drivers { + dvl = append(dvl, driverVal{name: k, data: v}) + } + r.Unlock() + + for _, dv := range dvl { + if dfn(dv.name, dv.data.driver, dv.data.capability) { + break + } + } +} + +// Driver returns the actual network driver instance and its capability which registered with the passed name. +func (r *DrvRegistry) Driver(name string) (driverapi.Driver, *driverapi.Capability) { + r.Lock() + defer r.Unlock() + + d, ok := r.drivers[name] + if !ok { + return nil, nil + } + + return d.driver, &d.capability +} + +// IPAM returns the actual IPAM driver instance and its capability which registered with the passed name. +func (r *DrvRegistry) IPAM(name string) (ipamapi.Ipam, *ipamapi.Capability) { + r.Lock() + defer r.Unlock() + + i, ok := r.ipamDrivers[name] + if !ok { + return nil, nil + } + + return i.driver, i.capability +} + +// IPAMDefaultAddressSpaces returns the default address space strings for the passed IPAM driver name. +func (r *DrvRegistry) IPAMDefaultAddressSpaces(name string) (string, string, error) { + r.Lock() + defer r.Unlock() + + i, ok := r.ipamDrivers[name] + if !ok { + return "", "", fmt.Errorf("ipam %s not found", name) + } + + return i.defaultLocalAddressSpace, i.defaultGlobalAddressSpace, nil +} + +func (r *DrvRegistry) initIPAMs(lDs, gDs interface{}) error { + for _, fn := range [](func(ipamapi.Callback, interface{}, interface{}) error){ + builtinIpam.Init, + remoteIpam.Init, + nullIpam.Init, + } { + if err := fn(r, lDs, gDs); err != nil { + return err + } + } + + return nil +} + +// RegisterDriver registers the network driver when it gets discovered. +func (r *DrvRegistry) RegisterDriver(ntype string, driver driverapi.Driver, capability driverapi.Capability) error { + if strings.TrimSpace(ntype) == "" { + return fmt.Errorf("network type string cannot be empty") + } + + r.Lock() + _, ok := r.drivers[ntype] + r.Unlock() + + if ok { + return driverapi.ErrActiveRegistration(ntype) + } + + if r.dfn != nil { + if err := r.dfn(ntype, driver, capability); err != nil { + return err + } + } + + dData := &driverData{driver, capability} + + r.Lock() + r.drivers[ntype] = dData + r.Unlock() + + return nil +} + +func (r *DrvRegistry) registerIpamDriver(name string, driver ipamapi.Ipam, caps *ipamapi.Capability) error { + if strings.TrimSpace(name) == "" { + return fmt.Errorf("ipam driver name string cannot be empty") + } + + r.Lock() + _, ok := r.ipamDrivers[name] + r.Unlock() + if ok { + return types.ForbiddenErrorf("ipam driver %q already registered", name) + } + + locAS, glbAS, err := driver.GetDefaultAddressSpaces() + if err != nil { + return types.InternalErrorf("ipam driver %q failed to return default address spaces: %v", name, err) + } + + if r.ifn != nil { + if err := r.ifn(name, driver, caps); err != nil { + return err + } + } + + r.Lock() + r.ipamDrivers[name] = &ipamData{driver: driver, defaultLocalAddressSpace: locAS, defaultGlobalAddressSpace: glbAS, capability: caps} + r.Unlock() + + return nil +} + +// RegisterIpamDriver registers the IPAM driver discovered with default capabilities. +func (r *DrvRegistry) RegisterIpamDriver(name string, driver ipamapi.Ipam) error { + return r.registerIpamDriver(name, driver, &ipamapi.Capability{}) +} + +// RegisterIpamDriverWithCapabilities registers the IPAM driver discovered with specified capabilities. +func (r *DrvRegistry) RegisterIpamDriverWithCapabilities(name string, driver ipamapi.Ipam, caps *ipamapi.Capability) error { + return r.registerIpamDriver(name, driver, caps) +} diff --git a/vendor/src/github.com/docker/libnetwork/endpoint.go b/vendor/src/github.com/docker/libnetwork/endpoint.go index 7608dd7457..5335945690 100644 --- a/vendor/src/github.com/docker/libnetwork/endpoint.go +++ b/vendor/src/github.com/docker/libnetwork/endpoint.go @@ -67,6 +67,8 @@ type endpoint struct { ipamOptions map[string]string aliases map[string]string myAliases []string + svcID string + svcName string dbIndex uint64 dbExists bool sync.Mutex @@ -89,6 +91,9 @@ func (ep *endpoint) MarshalJSON() ([]byte, error) { epMap["anonymous"] = ep.anonymous epMap["disableResolution"] = ep.disableResolution epMap["myAliases"] = ep.myAliases + epMap["svcName"] = ep.svcName + epMap["svcID"] = ep.svcID + return json.Marshal(epMap) } @@ -172,6 +177,15 @@ func (ep *endpoint) UnmarshalJSON(b []byte) (err error) { if l, ok := epMap["locator"]; ok { ep.locator = l.(string) } + + if sn, ok := epMap["svcName"]; ok { + ep.svcName = sn.(string) + } + + if si, ok := epMap["svcID"]; ok { + ep.svcID = si.(string) + } + ma, _ := json.Marshal(epMap["myAliases"]) var myAliases []string json.Unmarshal(ma, &myAliases) @@ -196,6 +210,8 @@ func (ep *endpoint) CopyTo(o datastore.KVObject) error { dstEp.dbExists = ep.dbExists dstEp.anonymous = ep.anonymous dstEp.disableResolution = ep.disableResolution + dstEp.svcName = ep.svcName + dstEp.svcID = ep.svcID if ep.iface != nil { dstEp.iface = &endpointInterface{} @@ -413,7 +429,9 @@ func (ep *endpoint) sbJoin(sb *sandbox, options ...EndpointOption) error { }() // Watch for service records - n.getController().watchSvcRecord(ep) + if !n.getController().cfg.Daemon.IsAgent { + n.getController().watchSvcRecord(ep) + } address := "" if ip := ep.getFirstInterfaceAddress(); ip != nil { @@ -446,6 +464,10 @@ func (ep *endpoint) sbJoin(sb *sandbox, options ...EndpointOption) error { return err } + if e := ep.addToCluster(); e != nil { + log.Errorf("Could not update state for endpoint %s into cluster: %v", ep.Name(), e) + } + if sb.needDefaultGW() && sb.getEndpointInGWNetwork() == nil { return sb.setupDefaultGW() } @@ -632,6 +654,10 @@ func (ep *endpoint) sbLeave(sb *sandbox, force bool, options ...EndpointOption) return err } + if e := ep.deleteFromCluster(); e != nil { + log.Errorf("Could not delete state for endpoint %s from cluster: %v", ep.Name(), e) + } + sb.deleteHostsEntries(n.getSvcRecords(ep)) if !sb.inDelete && sb.needDefaultGW() && sb.getEndpointInGWNetwork() == nil { return sb.setupDefaultGW() @@ -730,7 +756,9 @@ func (ep *endpoint) Delete(force bool) error { }() // unwatch for service records - n.getController().unWatchSvcRecord(ep) + if !n.getController().cfg.Daemon.IsAgent { + n.getController().unWatchSvcRecord(ep) + } if err = ep.deleteEndpoint(force); err != nil && !force { return err @@ -863,6 +891,14 @@ func CreateOptionAlias(name string, alias string) EndpointOption { } } +// CreateOptionService function returns an option setter for setting service binding configuration +func CreateOptionService(name, id string) EndpointOption { + return func(ep *endpoint) { + ep.svcName = name + ep.svcID = id + } +} + //CreateOptionMyAlias function returns an option setter for setting endpoint's self alias func CreateOptionMyAlias(alias string) EndpointOption { return func(ep *endpoint) { @@ -981,7 +1017,7 @@ func (ep *endpoint) releaseAddress() { log.Debugf("Releasing addresses for endpoint %s's interface on network %s", ep.Name(), n.Name()) - ipam, err := n.getController().getIpamDriver(n.ipamType) + ipam, _, err := n.getController().getIPAMDriver(n.ipamType) if err != nil { log.Warnf("Failed to retrieve ipam driver to release interface address on delete of endpoint %s (%s): %v", ep.Name(), ep.ID(), err) return diff --git a/vendor/src/github.com/docker/libnetwork/endpoint_info.go b/vendor/src/github.com/docker/libnetwork/endpoint_info.go index 4ba8e3d548..cc7aa17a66 100644 --- a/vendor/src/github.com/docker/libnetwork/endpoint_info.go +++ b/vendor/src/github.com/docker/libnetwork/endpoint_info.go @@ -143,9 +143,16 @@ type endpointJoinInfo struct { gw net.IP gw6 net.IP StaticRoutes []*types.StaticRoute + driverTableEntries []*tableEntry disableGatewayService bool } +type tableEntry struct { + tableName string + key string + value []byte +} + func (ep *endpoint) Info() EndpointInfo { n, err := ep.getNetworkFromStore() if err != nil { @@ -292,6 +299,19 @@ func (ep *endpoint) AddStaticRoute(destination *net.IPNet, routeType int, nextHo return nil } +func (ep *endpoint) AddTableEntry(tableName, key string, value []byte) error { + ep.Lock() + defer ep.Unlock() + + ep.joinInfo.driverTableEntries = append(ep.joinInfo.driverTableEntries, &tableEntry{ + tableName: tableName, + key: key, + value: value, + }) + + return nil +} + func (ep *endpoint) Sandbox() Sandbox { cnt, ok := ep.getSandbox() if !ok { diff --git a/vendor/src/github.com/docker/libnetwork/ipam/allocator.go b/vendor/src/github.com/docker/libnetwork/ipam/allocator.go index 70fe06eba7..c059d447d7 100644 --- a/vendor/src/github.com/docker/libnetwork/ipam/allocator.go +++ b/vendor/src/github.com/docker/libnetwork/ipam/allocator.go @@ -3,6 +3,7 @@ package ipam import ( "fmt" "net" + "sort" "sync" log "github.com/Sirupsen/logrus" @@ -58,9 +59,6 @@ func NewAllocator(lcDs, glDs datastore.DataStore) (*Allocator, error) { {localAddressSpace, lcDs}, {globalAddressSpace, glDs}, } { - if aspc.ds == nil { - continue - } a.initializeAddressSpace(aspc.as, aspc.ds) } @@ -143,15 +141,22 @@ func (a *Allocator) checkConsistency(as string) { } func (a *Allocator) initializeAddressSpace(as string, ds datastore.DataStore) error { + scope := "" + if ds != nil { + scope = ds.Scope() + } + a.Lock() - if _, ok := a.addrSpaces[as]; ok { - a.Unlock() - return types.ForbiddenErrorf("tried to add an axisting address space: %s", as) + if currAS, ok := a.addrSpaces[as]; ok { + if currAS.ds != nil { + a.Unlock() + return types.ForbiddenErrorf("a datastore is already configured for the address space %s", as) + } } a.addrSpaces[as] = &addrSpace{ subnets: map[SubnetKey]*PoolData{}, id: dsConfigKey + "/" + as, - scope: ds.Scope(), + scope: scope, ds: ds, alloc: a, } @@ -313,10 +318,6 @@ func (a *Allocator) insertBitMask(key SubnetKey, pool *net.IPNet) error { //log.Debugf("Inserting bitmask (%s, %s)", key.String(), pool.String()) store := a.getStore(key.AddressSpace) - if store == nil { - return types.InternalErrorf("could not find store for address space %s while inserting bit mask", key.AddressSpace) - } - ipVer := getAddressVersion(pool.IP) ones, bits := pool.Mask.Size() numAddresses := uint64(1 << uint(bits-ones)) @@ -401,13 +402,6 @@ func (a *Allocator) getPredefinedPool(as string, ipV6 bool) (*net.IPNet, error) } if !aSpace.contains(as, nw) { - if as == localAddressSpace { - // Check if nw overlap with system routes, name servers - if _, err := ipamutils.FindAvailableNetwork([]*net.IPNet{nw}); err == nil { - return nw, nil - } - continue - } return nw, nil } } @@ -563,13 +557,18 @@ func (a *Allocator) getAddress(nw *net.IPNet, bitmask *bitseq.Handle, prefAddres func (a *Allocator) DumpDatabase() string { a.Lock() aspaces := make(map[string]*addrSpace, len(a.addrSpaces)) + orderedAS := make([]string, 0, len(a.addrSpaces)) for as, aSpace := range a.addrSpaces { + orderedAS = append(orderedAS, as) aspaces[as] = aSpace } a.Unlock() + sort.Strings(orderedAS) + var s string - for as, aSpace := range aspaces { + for _, as := range orderedAS { + aSpace := aspaces[as] s = fmt.Sprintf("\n\n%s Config", as) aSpace.Lock() for k, config := range aSpace.subnets { diff --git a/vendor/src/github.com/docker/libnetwork/ipam/store.go b/vendor/src/github.com/docker/libnetwork/ipam/store.go index ba44ef9dd9..02f627b66e 100644 --- a/vendor/src/github.com/docker/libnetwork/ipam/store.go +++ b/vendor/src/github.com/docker/libnetwork/ipam/store.go @@ -82,8 +82,10 @@ func (a *Allocator) getStore(as string) datastore.DataStore { func (a *Allocator) getAddressSpaceFromStore(as string) (*addrSpace, error) { store := a.getStore(as) + + // IPAM may not have a valid store. In such cases it is just in-memory state. if store == nil { - return nil, types.InternalErrorf("store for address space %s not found", as) + return nil, nil } pc := &addrSpace{id: dsConfigKey + "/" + as, ds: store, alloc: a} @@ -100,8 +102,10 @@ func (a *Allocator) getAddressSpaceFromStore(as string) (*addrSpace, error) { func (a *Allocator) writeToStore(aSpace *addrSpace) error { store := aSpace.store() + + // IPAM may not have a valid store. In such cases it is just in-memory state. if store == nil { - return types.InternalErrorf("invalid store while trying to write %s address space", aSpace.DataScope()) + return nil } err := store.PutObjectAtomic(aSpace) @@ -114,8 +118,10 @@ func (a *Allocator) writeToStore(aSpace *addrSpace) error { func (a *Allocator) deleteFromStore(aSpace *addrSpace) error { store := aSpace.store() + + // IPAM may not have a valid store. In such cases it is just in-memory state. if store == nil { - return types.InternalErrorf("invalid store while trying to delete %s address space", aSpace.DataScope()) + return nil } return store.DeleteObjectAtomic(aSpace) diff --git a/vendor/src/github.com/docker/libnetwork/ipams/builtin/builtin_unix.go b/vendor/src/github.com/docker/libnetwork/ipams/builtin/builtin_unix.go index c47674aaf0..1c9e852602 100644 --- a/vendor/src/github.com/docker/libnetwork/ipams/builtin/builtin_unix.go +++ b/vendor/src/github.com/docker/libnetwork/ipams/builtin/builtin_unix.go @@ -1,4 +1,4 @@ -// +build linux freebsd +// +build linux freebsd solaris darwin package builtin diff --git a/vendor/src/github.com/docker/libnetwork/ipamutils/utils_linux.go b/vendor/src/github.com/docker/libnetwork/ipamutils/utils_linux.go deleted file mode 100644 index 056a234c8f..0000000000 --- a/vendor/src/github.com/docker/libnetwork/ipamutils/utils_linux.go +++ /dev/null @@ -1,76 +0,0 @@ -// Package ipamutils provides utililty functions for ipam management -package ipamutils - -import ( - "fmt" - "net" - - "github.com/docker/libnetwork/netutils" - "github.com/docker/libnetwork/osl" - "github.com/docker/libnetwork/resolvconf" - "github.com/vishvananda/netlink" -) - -// ElectInterfaceAddresses looks for an interface on the OS with the specified name -// and returns its IPv4 and IPv6 addresses in CIDR form. If the interface does not exist, -// it chooses from a predifined list the first IPv4 address which does not conflict -// with other interfaces on the system. -func ElectInterfaceAddresses(name string) (*net.IPNet, []*net.IPNet, error) { - var ( - v4Net *net.IPNet - v6Nets []*net.IPNet - err error - ) - - InitNetworks() - - defer osl.InitOSContext()() - - link, _ := netlink.LinkByName(name) - if link != nil { - v4addr, err := netlink.AddrList(link, netlink.FAMILY_V4) - if err != nil { - return nil, nil, err - } - v6addr, err := netlink.AddrList(link, netlink.FAMILY_V6) - if err != nil { - return nil, nil, err - } - if len(v4addr) > 0 { - v4Net = v4addr[0].IPNet - } - for _, nlAddr := range v6addr { - v6Nets = append(v6Nets, nlAddr.IPNet) - } - } - - if link == nil || v4Net == nil { - // Choose from predifined broad networks - v4Net, err = FindAvailableNetwork(PredefinedBroadNetworks) - if err != nil { - return nil, nil, err - } - } - - return v4Net, v6Nets, nil -} - -// FindAvailableNetwork returns a network from the passed list which does not -// overlap with existing interfaces in the system -func FindAvailableNetwork(list []*net.IPNet) (*net.IPNet, error) { - // We don't check for an error here, because we don't really care if we - // can't read /etc/resolv.conf. So instead we skip the append if resolvConf - // is nil. It either doesn't exist, or we can't read it for some reason. - var nameservers []string - if rc, err := resolvconf.Get(); err == nil { - nameservers = resolvconf.GetNameserversAsCIDR(rc.Content) - } - for _, nw := range list { - if err := netutils.CheckNameserverOverlaps(nameservers, nw); err == nil { - if err := netutils.CheckRouteOverlaps(nw); err == nil { - return nw, nil - } - } - } - return nil, fmt.Errorf("no available network") -} diff --git a/vendor/src/github.com/docker/libnetwork/netlabel/labels.go b/vendor/src/github.com/docker/libnetwork/netlabel/labels.go index d44015f159..7d5c35579b 100644 --- a/vendor/src/github.com/docker/libnetwork/netlabel/labels.go +++ b/vendor/src/github.com/docker/libnetwork/netlabel/labels.go @@ -39,6 +39,9 @@ const ( // OverlayNeighborIP constant represents overlay driver neighbor IP OverlayNeighborIP = DriverPrefix + ".overlay.neighbor_ip" + // OverlayVxlanIDList constant represents a list of VXLAN Ids as csv + OverlayVxlanIDList = DriverPrefix + ".overlay.vxlanid_list" + // Gateway represents the gateway for the network Gateway = Prefix + ".gateway" diff --git a/vendor/src/github.com/docker/libnetwork/netutils/utils.go b/vendor/src/github.com/docker/libnetwork/netutils/utils.go index 482e4f038f..62287efcc9 100644 --- a/vendor/src/github.com/docker/libnetwork/netutils/utils.go +++ b/vendor/src/github.com/docker/libnetwork/netutils/utils.go @@ -14,13 +14,6 @@ import ( "github.com/docker/libnetwork/types" ) -// constants for the IP address type -const ( - IP = iota // IPv4 and IPv6 - IPv4 - IPv6 -) - var ( // ErrNetworkOverlapsWithNameservers preformatted error ErrNetworkOverlapsWithNameservers = errors.New("requested network overlaps with nameserver") diff --git a/vendor/src/github.com/docker/libnetwork/ipamutils/utils_freebsd.go b/vendor/src/github.com/docker/libnetwork/netutils/utils_freebsd.go similarity index 89% rename from vendor/src/github.com/docker/libnetwork/ipamutils/utils_freebsd.go rename to vendor/src/github.com/docker/libnetwork/netutils/utils_freebsd.go index 09eced12d1..f7a7ac75f5 100644 --- a/vendor/src/github.com/docker/libnetwork/ipamutils/utils_freebsd.go +++ b/vendor/src/github.com/docker/libnetwork/netutils/utils_freebsd.go @@ -1,5 +1,4 @@ -// Package ipamutils provides utililty functions for ipam management -package ipamutils +package netutils import ( "net" diff --git a/vendor/src/github.com/docker/libnetwork/netutils/utils_linux.go b/vendor/src/github.com/docker/libnetwork/netutils/utils_linux.go index 782e542a52..f1e73d2297 100644 --- a/vendor/src/github.com/docker/libnetwork/netutils/utils_linux.go +++ b/vendor/src/github.com/docker/libnetwork/netutils/utils_linux.go @@ -4,9 +4,13 @@ package netutils import ( + "fmt" "net" "strings" + "github.com/docker/libnetwork/ipamutils" + "github.com/docker/libnetwork/osl" + "github.com/docker/libnetwork/resolvconf" "github.com/docker/libnetwork/types" "github.com/vishvananda/netlink" ) @@ -48,3 +52,66 @@ func GenerateIfaceName(prefix string, len int) (string, error) { } return "", types.InternalErrorf("could not generate interface name") } + +// ElectInterfaceAddresses looks for an interface on the OS with the +// specified name and returns its IPv4 and IPv6 addresses in CIDR +// form. If the interface does not exist, it chooses from a predifined +// list the first IPv4 address which does not conflict with other +// interfaces on the system. +func ElectInterfaceAddresses(name string) (*net.IPNet, []*net.IPNet, error) { + var ( + v4Net *net.IPNet + v6Nets []*net.IPNet + err error + ) + + defer osl.InitOSContext()() + + link, _ := netlink.LinkByName(name) + if link != nil { + v4addr, err := netlink.AddrList(link, netlink.FAMILY_V4) + if err != nil { + return nil, nil, err + } + v6addr, err := netlink.AddrList(link, netlink.FAMILY_V6) + if err != nil { + return nil, nil, err + } + if len(v4addr) > 0 { + v4Net = v4addr[0].IPNet + } + for _, nlAddr := range v6addr { + v6Nets = append(v6Nets, nlAddr.IPNet) + } + } + + if link == nil || v4Net == nil { + // Choose from predifined broad networks + v4Net, err = FindAvailableNetwork(ipamutils.PredefinedBroadNetworks) + if err != nil { + return nil, nil, err + } + } + + return v4Net, v6Nets, nil +} + +// FindAvailableNetwork returns a network from the passed list which does not +// overlap with existing interfaces in the system +func FindAvailableNetwork(list []*net.IPNet) (*net.IPNet, error) { + // We don't check for an error here, because we don't really care if we + // can't read /etc/resolv.conf. So instead we skip the append if resolvConf + // is nil. It either doesn't exist, or we can't read it for some reason. + var nameservers []string + if rc, err := resolvconf.Get(); err == nil { + nameservers = resolvconf.GetNameserversAsCIDR(rc.Content) + } + for _, nw := range list { + if err := CheckNameserverOverlaps(nameservers, nw); err == nil { + if err := CheckRouteOverlaps(nw); err == nil { + return nw, nil + } + } + } + return nil, fmt.Errorf("no available network") +} diff --git a/vendor/src/github.com/docker/libnetwork/netutils/utils_solaris.go b/vendor/src/github.com/docker/libnetwork/netutils/utils_solaris.go new file mode 100644 index 0000000000..d0356f6262 --- /dev/null +++ b/vendor/src/github.com/docker/libnetwork/netutils/utils_solaris.go @@ -0,0 +1,32 @@ +package netutils + +// Solaris: TODO + +import ( + "net" + + "github.com/docker/libnetwork/ipamutils" +) + +// ElectInterfaceAddresses looks for an interface on the OS with the specified name +// and returns its IPv4 and IPv6 addresses in CIDR form. If the interface does not exist, +// it chooses from a predifined list the first IPv4 address which does not conflict +// with other interfaces on the system. +func ElectInterfaceAddresses(name string) (*net.IPNet, []*net.IPNet, error) { + var ( + v4Net *net.IPNet + err error + ) + + v4Net, err = FindAvailableNetwork(ipamutils.PredefinedBroadNetworks) + if err != nil { + return nil, nil, err + } + return v4Net, nil, nil +} + +// FindAvailableNetwork returns a network from the passed list which does not +// overlap with existing interfaces in the system +func FindAvailableNetwork(list []*net.IPNet) (*net.IPNet, error) { + return list[0], nil +} diff --git a/vendor/src/github.com/docker/libnetwork/ipamutils/utils_windows.go b/vendor/src/github.com/docker/libnetwork/netutils/utils_windows.go similarity index 89% rename from vendor/src/github.com/docker/libnetwork/ipamutils/utils_windows.go rename to vendor/src/github.com/docker/libnetwork/netutils/utils_windows.go index 4878ca2b86..3b4bb9d909 100644 --- a/vendor/src/github.com/docker/libnetwork/ipamutils/utils_windows.go +++ b/vendor/src/github.com/docker/libnetwork/netutils/utils_windows.go @@ -1,5 +1,4 @@ -// Package ipamutils provides utililty functions for ipam management -package ipamutils +package netutils import ( "net" diff --git a/vendor/src/github.com/docker/libnetwork/network.go b/vendor/src/github.com/docker/libnetwork/network.go index a14550cd7e..5072e088c2 100644 --- a/vendor/src/github.com/docker/libnetwork/network.go +++ b/vendor/src/github.com/docker/libnetwork/network.go @@ -171,6 +171,7 @@ type network struct { drvOnce *sync.Once internal bool inDelete bool + driverTables []string sync.Mutex } @@ -620,49 +621,62 @@ func (n *network) processOptions(options ...NetworkOption) { } } -func (n *network) driverScope() string { +func (n *network) resolveDriver(name string, load bool) (driverapi.Driver, *driverapi.Capability, error) { c := n.getController() - c.Lock() // Check if a driver for the specified network type is available - dd, ok := c.drivers[n.networkType] - c.Unlock() + d, cap := c.drvRegistry.Driver(name) + if d == nil { + if load { + var err error + err = c.loadDriver(name) + if err != nil { + return nil, nil, err + } - if !ok { - var err error - dd, err = c.loadDriver(n.networkType) - if err != nil { - // If driver could not be resolved simply return an empty string - return "" + d, cap = c.drvRegistry.Driver(name) + if d == nil { + return nil, nil, fmt.Errorf("could not resolve driver %s in registry", name) + } + } else { + // don't fail if driver loading is not required + return nil, nil, nil } } - return dd.capability.DataScope + return d, cap, nil +} + +func (n *network) driverScope() string { + _, cap, err := n.resolveDriver(n.networkType, true) + if err != nil { + // If driver could not be resolved simply return an empty string + return "" + } + + return cap.DataScope } func (n *network) driver(load bool) (driverapi.Driver, error) { - c := n.getController() - - c.Lock() - // Check if a driver for the specified network type is available - dd, ok := c.drivers[n.networkType] - c.Unlock() - - if !ok && load { - var err error - dd, err = c.loadDriver(n.networkType) - if err != nil { - return nil, err - } - } else if !ok { - // don't fail if driver loading is not required - return nil, nil + d, cap, err := n.resolveDriver(n.networkType, load) + if err != nil { + return nil, err } + c := n.getController() n.Lock() - n.scope = dd.capability.DataScope + // If load is not required, driver, cap and err may all be nil + if cap != nil { + n.scope = cap.DataScope + } + if c.cfg.Daemon.IsAgent { + // If we are running in agent mode then all networks + // in libnetwork are local scope regardless of the + // backing driver. + n.scope = datastore.LocalScope + } n.Unlock() - return dd.driver, nil + return d, nil } func (n *network) Delete() error { @@ -717,6 +731,12 @@ func (n *network) delete(force bool) error { return fmt.Errorf("error deleting network from store: %v", err) } + n.cancelDriverWatches() + + if err = n.leaveCluster(); err != nil { + log.Errorf("Failed leaving network %s from the agent cluster: %v", n.Name(), err) + } + return nil } @@ -786,12 +806,12 @@ func (n *network) CreateEndpoint(name string, options ...EndpointOption) (Endpoi } } - ipam, err := n.getController().getIPAM(n.ipamType) + ipam, cap, err := n.getController().getIPAMDriver(n.ipamType) if err != nil { return nil, err } - if ipam.capability.RequiresMACAddress { + if cap.RequiresMACAddress { if ep.iface.mac == nil { ep.iface.mac = netutils.GenerateRandomMAC() } @@ -801,7 +821,7 @@ func (n *network) CreateEndpoint(name string, options ...EndpointOption) (Endpoi ep.ipamOptions[netlabel.MacAddress] = ep.iface.mac.String() } - if err = ep.assignAddress(ipam.driver, true, n.enableIPv6 && !n.postIPv6); err != nil { + if err = ep.assignAddress(ipam, true, n.enableIPv6 && !n.postIPv6); err != nil { return nil, err } defer func() { @@ -821,7 +841,7 @@ func (n *network) CreateEndpoint(name string, options ...EndpointOption) (Endpoi } }() - if err = ep.assignAddress(ipam.driver, false, n.enableIPv6 && n.postIPv6); err != nil { + if err = ep.assignAddress(ipam, false, n.enableIPv6 && n.postIPv6); err != nil { return nil, err } @@ -985,14 +1005,14 @@ func (n *network) addSvcRecords(name string, epIP net.IP, epIPv6 net.IP, ipMapUp c := n.getController() c.Lock() defer c.Unlock() - sr, ok := c.svcDb[n.ID()] + sr, ok := c.svcRecords[n.ID()] if !ok { sr = svcInfo{ svcMap: make(map[string][]net.IP), svcIPv6Map: make(map[string][]net.IP), ipMap: make(map[string]string), } - c.svcDb[n.ID()] = sr + c.svcRecords[n.ID()] = sr } if ipMapUpdate { @@ -1012,7 +1032,7 @@ func (n *network) deleteSvcRecords(name string, epIP net.IP, epIPv6 net.IP, ipMa c := n.getController() c.Lock() defer c.Unlock() - sr, ok := c.svcDb[n.ID()] + sr, ok := c.svcRecords[n.ID()] if !ok { return } @@ -1037,7 +1057,7 @@ func (n *network) getSvcRecords(ep *endpoint) []etchosts.Record { defer n.Unlock() var recs []etchosts.Record - sr, _ := n.ctrlr.svcDb[n.id] + sr, _ := n.ctrlr.svcRecords[n.id] for h, ip := range sr.svcMap { if ep != nil && strings.Split(h, ".")[0] == ep.Name() { @@ -1065,7 +1085,7 @@ func (n *network) ipamAllocate() error { return nil } - ipam, err := n.getController().getIpamDriver(n.ipamType) + ipam, _, err := n.getController().getIPAMDriver(n.ipamType) if err != nil { return err } @@ -1091,7 +1111,53 @@ func (n *network) ipamAllocate() error { return nil } - return n.ipamAllocateVersion(6, ipam) + err = n.ipamAllocateVersion(6, ipam) + if err != nil { + return err + } + + return nil +} + +func (n *network) requestPoolHelper(ipam ipamapi.Ipam, addressSpace, preferredPool, subPool string, options map[string]string, v6 bool) (string, *net.IPNet, map[string]string, error) { + for { + poolID, pool, meta, err := ipam.RequestPool(addressSpace, preferredPool, subPool, options, v6) + if err != nil { + return "", nil, nil, err + } + + // If the network belongs to global scope or the pool was + // explicitely chosen or it is invalid, do not perform the overlap check. + if n.Scope() == datastore.GlobalScope || preferredPool != "" || !types.IsIPNetValid(pool) { + return poolID, pool, meta, nil + } + + // Check for overlap and if none found, we have found the right pool. + if _, err := netutils.FindAvailableNetwork([]*net.IPNet{pool}); err == nil { + return poolID, pool, meta, nil + } + + // Pool obtained in this iteration is + // overlapping. Hold onto the pool and don't release + // it yet, because we don't want ipam to give us back + // the same pool over again. But make sure we still do + // a deferred release when we have either obtained a + // non-overlapping pool or ran out of pre-defined + // pools. + defer func() { + if err := ipam.ReleasePool(poolID); err != nil { + log.Warnf("Failed to release overlapping pool %s while returning from pool request helper for network %s", pool, n.Name()) + } + }() + + // If this is a preferred pool request and the network + // is local scope and there is a overlap, we fail the + // network creation right here. The pool will be + // released in the defer. + if preferredPool != "" { + return "", nil, nil, fmt.Errorf("requested subnet %s overlaps in the host", preferredPool) + } + } } func (n *network) ipamAllocateVersion(ipVer int, ipam ipamapi.Ipam) error { @@ -1130,7 +1196,7 @@ func (n *network) ipamAllocateVersion(ipVer int, ipam ipamapi.Ipam) error { d := &IpamInfo{} (*infoList)[i] = d - d.PoolID, d.Pool, d.Meta, err = ipam.RequestPool(n.addrSpace, cfg.PreferredPool, cfg.SubPool, n.ipamOptions, ipVer == 6) + d.PoolID, d.Pool, d.Meta, err = n.requestPoolHelper(ipam, n.addrSpace, cfg.PreferredPool, cfg.SubPool, n.ipamOptions, ipVer == 6) if err != nil { return err } @@ -1189,7 +1255,7 @@ func (n *network) ipamRelease() { if n.Type() == "host" || n.Type() == "null" { return } - ipam, err := n.getController().getIpamDriver(n.ipamType) + ipam, _, err := n.getController().getIPAMDriver(n.ipamType) if err != nil { log.Warnf("Failed to retrieve ipam driver to release address pool(s) on delete of network %s (%s): %v", n.Name(), n.ID(), err) return @@ -1279,17 +1345,14 @@ func (n *network) getIPData(ipVer int) []driverapi.IPAMData { } func (n *network) deriveAddressSpace() (string, error) { - c := n.getController() - c.Lock() - ipd, ok := c.ipamDrivers[n.ipamType] - c.Unlock() - if !ok { - return "", types.NotFoundErrorf("could not find ipam driver %s to get default address space", n.ipamType) + local, global, err := n.getController().drvRegistry.IPAMDefaultAddressSpaces(n.ipamType) + if err != nil { + return "", types.NotFoundErrorf("failed to get default address space: %v", err) } if n.DataScope() == datastore.GlobalScope { - return ipd.defaultGlobalAddressSpace, nil + return global, nil } - return ipd.defaultLocalAddressSpace, nil + return local, nil } func (n *network) Info() NetworkInfo { @@ -1382,3 +1445,11 @@ func (n *network) Labels() map[string]string { return lbls } + +func (n *network) TableEventRegister(tableName string) error { + n.Lock() + defer n.Unlock() + + n.driverTables = append(n.driverTables, tableName) + return nil +} diff --git a/vendor/src/github.com/docker/libnetwork/networkdb/broadcast.go b/vendor/src/github.com/docker/libnetwork/networkdb/broadcast.go new file mode 100644 index 0000000000..a1c3c61c84 --- /dev/null +++ b/vendor/src/github.com/docker/libnetwork/networkdb/broadcast.go @@ -0,0 +1,127 @@ +package networkdb + +import ( + "github.com/hashicorp/memberlist" + "github.com/hashicorp/serf/serf" +) + +type networkEventType uint8 + +const ( + networkJoin networkEventType = 1 + iota + networkLeave +) + +type networkEventData struct { + Event networkEventType + LTime serf.LamportTime + NodeName string + NetworkID string +} + +type networkEventMessage struct { + id string + node string + msg []byte +} + +func (m *networkEventMessage) Invalidates(other memberlist.Broadcast) bool { + otherm := other.(*networkEventMessage) + return m.id == otherm.id && m.node == otherm.node +} + +func (m *networkEventMessage) Message() []byte { + return m.msg +} + +func (m *networkEventMessage) Finished() { +} + +func (nDB *NetworkDB) sendNetworkEvent(nid string, event networkEventType, ltime serf.LamportTime) error { + nEvent := networkEventData{ + Event: event, + LTime: ltime, + NodeName: nDB.config.NodeName, + NetworkID: nid, + } + + raw, err := encodeMessage(networkEventMsg, &nEvent) + if err != nil { + return err + } + + nDB.networkBroadcasts.QueueBroadcast(&networkEventMessage{ + msg: raw, + id: nid, + node: nDB.config.NodeName, + }) + return nil +} + +type tableEventType uint8 + +const ( + tableEntryCreate tableEventType = 1 + iota + tableEntryUpdate + tableEntryDelete +) + +type tableEventData struct { + Event tableEventType + LTime serf.LamportTime + NetworkID string + TableName string + NodeName string + Value []byte + Key string +} + +type tableEventMessage struct { + id string + tname string + key string + msg []byte + node string +} + +func (m *tableEventMessage) Invalidates(other memberlist.Broadcast) bool { + otherm := other.(*tableEventMessage) + return m.id == otherm.id && m.tname == otherm.tname && m.key == otherm.key +} + +func (m *tableEventMessage) Message() []byte { + return m.msg +} + +func (m *tableEventMessage) Finished() { +} + +func (nDB *NetworkDB) sendTableEvent(event tableEventType, nid string, tname string, key string, entry *entry) error { + tEvent := tableEventData{ + Event: event, + LTime: entry.ltime, + NodeName: nDB.config.NodeName, + NetworkID: nid, + TableName: tname, + Key: key, + Value: entry.value, + } + + raw, err := encodeMessage(tableEventMsg, &tEvent) + if err != nil { + return err + } + + nDB.RLock() + broadcastQ := nDB.networks[nDB.config.NodeName][nid].tableBroadcasts + nDB.RUnlock() + + broadcastQ.QueueBroadcast(&tableEventMessage{ + msg: raw, + id: nid, + tname: tname, + key: key, + node: nDB.config.NodeName, + }) + return nil +} diff --git a/vendor/src/github.com/docker/libnetwork/networkdb/cluster.go b/vendor/src/github.com/docker/libnetwork/networkdb/cluster.go new file mode 100644 index 0000000000..317f1e5974 --- /dev/null +++ b/vendor/src/github.com/docker/libnetwork/networkdb/cluster.go @@ -0,0 +1,446 @@ +package networkdb + +import ( + "crypto/rand" + "fmt" + "math/big" + rnd "math/rand" + "strings" + "time" + + "github.com/Sirupsen/logrus" + "github.com/hashicorp/memberlist" + "github.com/hashicorp/serf/serf" +) + +const reapInterval = 2 * time.Second + +type logWriter struct{} + +func (l *logWriter) Write(p []byte) (int, error) { + str := string(p) + + switch { + case strings.Contains(str, "[WARN]"): + logrus.Warn(str) + case strings.Contains(str, "[DEBUG]"): + logrus.Debug(str) + case strings.Contains(str, "[INFO]"): + logrus.Info(str) + case strings.Contains(str, "[ERR]"): + logrus.Warn(str) + } + + return len(p), nil +} + +func (nDB *NetworkDB) clusterInit() error { + config := memberlist.DefaultLANConfig() + config.Name = nDB.config.NodeName + config.BindAddr = nDB.config.BindAddr + + if nDB.config.BindPort != 0 { + config.BindPort = nDB.config.BindPort + } + + config.ProtocolVersion = memberlist.ProtocolVersionMax + config.Delegate = &delegate{nDB: nDB} + config.Events = &eventDelegate{nDB: nDB} + config.LogOutput = &logWriter{} + + nDB.networkBroadcasts = &memberlist.TransmitLimitedQueue{ + NumNodes: func() int { + return len(nDB.nodes) + }, + RetransmitMult: config.RetransmitMult, + } + + mlist, err := memberlist.Create(config) + if err != nil { + return fmt.Errorf("failed to create memberlist: %v", err) + } + + nDB.stopCh = make(chan struct{}) + nDB.memberlist = mlist + nDB.mConfig = config + + for _, trigger := range []struct { + interval time.Duration + fn func() + }{ + {reapInterval, nDB.reapState}, + {config.GossipInterval, nDB.gossip}, + {config.PushPullInterval, nDB.bulkSyncTables}, + } { + t := time.NewTicker(trigger.interval) + go nDB.triggerFunc(trigger.interval, t.C, nDB.stopCh, trigger.fn) + nDB.tickers = append(nDB.tickers, t) + } + + return nil +} + +func (nDB *NetworkDB) clusterJoin(members []string) error { + mlist := nDB.memberlist + + if _, err := mlist.Join(members); err != nil { + return fmt.Errorf("could not join node to memberlist: %v", err) + } + + return nil +} + +func (nDB *NetworkDB) clusterLeave() error { + mlist := nDB.memberlist + + if err := mlist.Leave(time.Second); err != nil { + return err + } + + close(nDB.stopCh) + + for _, t := range nDB.tickers { + t.Stop() + } + + return mlist.Shutdown() +} + +func (nDB *NetworkDB) triggerFunc(stagger time.Duration, C <-chan time.Time, stop <-chan struct{}, f func()) { + // Use a random stagger to avoid syncronizing + randStagger := time.Duration(uint64(rnd.Int63()) % uint64(stagger)) + select { + case <-time.After(randStagger): + case <-stop: + return + } + for { + select { + case <-C: + f() + case <-stop: + return + } + } +} + +func (nDB *NetworkDB) reapState() { + nDB.reapNetworks() + nDB.reapTableEntries() +} + +func (nDB *NetworkDB) reapNetworks() { + now := time.Now() + nDB.Lock() + for name, nn := range nDB.networks { + for id, n := range nn { + if n.leaving && now.Sub(n.leaveTime) > reapInterval { + delete(nn, id) + nDB.deleteNetworkNode(id, name) + } + } + } + nDB.Unlock() +} + +func (nDB *NetworkDB) reapTableEntries() { + var paths []string + + now := time.Now() + + nDB.RLock() + nDB.indexes[byTable].Walk(func(path string, v interface{}) bool { + entry, ok := v.(*entry) + if !ok { + return false + } + + if !entry.deleting || now.Sub(entry.deleteTime) <= reapInterval { + return false + } + + paths = append(paths, path) + return false + }) + nDB.RUnlock() + + nDB.Lock() + for _, path := range paths { + params := strings.Split(path[1:], "/") + tname := params[0] + nid := params[1] + key := params[2] + + if _, ok := nDB.indexes[byTable].Delete(fmt.Sprintf("/%s/%s/%s", tname, nid, key)); !ok { + logrus.Errorf("Could not delete entry in table %s with network id %s and key %s as it does not exist", tname, nid, key) + } + + if _, ok := nDB.indexes[byNetwork].Delete(fmt.Sprintf("/%s/%s/%s", nid, tname, key)); !ok { + logrus.Errorf("Could not delete entry in network %s with table name %s and key %s as it does not exist", nid, tname, key) + } + } + nDB.Unlock() +} + +func (nDB *NetworkDB) gossip() { + networkNodes := make(map[string][]string) + nDB.RLock() + for nid := range nDB.networks[nDB.config.NodeName] { + networkNodes[nid] = nDB.networkNodes[nid] + + } + nDB.RUnlock() + + for nid, nodes := range networkNodes { + mNodes := nDB.mRandomNodes(3, nodes) + bytesAvail := udpSendBuf - compoundHeaderOverhead + + nDB.RLock() + broadcastQ := nDB.networks[nDB.config.NodeName][nid].tableBroadcasts + nDB.RUnlock() + + if broadcastQ == nil { + logrus.Errorf("Invalid broadcastQ encountered while gossiping for network %s", nid) + continue + } + + msgs := broadcastQ.GetBroadcasts(compoundOverhead, bytesAvail) + if len(msgs) == 0 { + continue + } + + // Create a compound message + compound := makeCompoundMessage(msgs) + + for _, node := range mNodes { + nDB.RLock() + mnode := nDB.nodes[node] + nDB.RUnlock() + + if mnode == nil { + break + } + + // Send the compound message + if err := nDB.memberlist.SendToUDP(mnode, compound.Bytes()); err != nil { + logrus.Errorf("Failed to send gossip to %s: %s", mnode.Addr, err) + } + } + } +} + +type bulkSyncMessage struct { + LTime serf.LamportTime + Unsolicited bool + NodeName string + Networks []string + Payload []byte +} + +func (nDB *NetworkDB) bulkSyncTables() { + var networks []string + nDB.RLock() + for nid := range nDB.networks[nDB.config.NodeName] { + networks = append(networks, nid) + } + nDB.RUnlock() + + for { + if len(networks) == 0 { + break + } + + nid := networks[0] + networks = networks[1:] + + completed, err := nDB.bulkSync(nid, false) + if err != nil { + logrus.Errorf("periodic bulk sync failure for network %s: %v", nid, err) + continue + } + + // Remove all the networks for which we have + // successfully completed bulk sync in this iteration. + updatedNetworks := make([]string, 0, len(networks)) + for _, nid := range networks { + for _, completedNid := range completed { + if nid == completedNid { + continue + } + + updatedNetworks = append(updatedNetworks, nid) + } + } + + networks = updatedNetworks + } +} + +func (nDB *NetworkDB) bulkSync(nid string, all bool) ([]string, error) { + nDB.RLock() + nodes := nDB.networkNodes[nid] + nDB.RUnlock() + + if !all { + // If not all, then just pick one. + nodes = nDB.mRandomNodes(1, nodes) + } + + logrus.Debugf("%s: Initiating bulk sync with nodes %v", nDB.config.NodeName, nodes) + var err error + var networks []string + for _, node := range nodes { + if node == nDB.config.NodeName { + continue + } + + networks = nDB.findCommonNetworks(node) + err = nDB.bulkSyncNode(networks, node, true) + if err != nil { + err = fmt.Errorf("bulk sync failed on node %s: %v", node, err) + } + } + + if err != nil { + return nil, err + } + + return networks, nil +} + +// Bulk sync all the table entries belonging to a set of networks to a +// single peer node. It can be unsolicited or can be in response to an +// unsolicited bulk sync +func (nDB *NetworkDB) bulkSyncNode(networks []string, node string, unsolicited bool) error { + var msgs [][]byte + + logrus.Debugf("%s: Initiating bulk sync for networks %v with node %s", nDB.config.NodeName, networks, node) + + nDB.RLock() + mnode := nDB.nodes[node] + if mnode == nil { + nDB.RUnlock() + return nil + } + + for _, nid := range networks { + nDB.indexes[byNetwork].WalkPrefix(fmt.Sprintf("/%s", nid), func(path string, v interface{}) bool { + entry, ok := v.(*entry) + if !ok { + return false + } + + params := strings.Split(path[1:], "/") + tEvent := tableEventData{ + Event: tableEntryCreate, + LTime: entry.ltime, + NodeName: entry.node, + NetworkID: nid, + TableName: params[1], + Key: params[2], + Value: entry.value, + } + + msg, err := encodeMessage(tableEventMsg, &tEvent) + if err != nil { + logrus.Errorf("Encode failure during bulk sync: %#v", tEvent) + return false + } + + msgs = append(msgs, msg) + return false + }) + } + nDB.RUnlock() + + // Create a compound message + compound := makeCompoundMessage(msgs) + + bsm := bulkSyncMessage{ + LTime: nDB.tableClock.Time(), + Unsolicited: unsolicited, + NodeName: nDB.config.NodeName, + Networks: networks, + Payload: compound.Bytes(), + } + + buf, err := encodeMessage(bulkSyncMsg, &bsm) + if err != nil { + return fmt.Errorf("failed to encode bulk sync message: %v", err) + } + + nDB.Lock() + ch := make(chan struct{}) + nDB.bulkSyncAckTbl[node] = ch + nDB.Unlock() + + err = nDB.memberlist.SendToTCP(mnode, buf) + if err != nil { + nDB.Lock() + delete(nDB.bulkSyncAckTbl, node) + nDB.Unlock() + + return fmt.Errorf("failed to send a TCP message during bulk sync: %v", err) + } + + startTime := time.Now() + select { + case <-time.After(30 * time.Second): + logrus.Errorf("Bulk sync to node %s timed out", node) + case <-ch: + nDB.Lock() + delete(nDB.bulkSyncAckTbl, node) + nDB.Unlock() + + logrus.Debugf("%s: Bulk sync to node %s took %s", nDB.config.NodeName, node, time.Now().Sub(startTime)) + } + + return nil +} + +// Returns a random offset between 0 and n +func randomOffset(n int) int { + if n == 0 { + return 0 + } + + val, err := rand.Int(rand.Reader, big.NewInt(int64(n))) + if err != nil { + logrus.Errorf("Failed to get a random offset: %v", err) + return 0 + } + + return int(val.Int64()) +} + +// mRandomNodes is used to select up to m random nodes. It is possible +// that less than m nodes are returned. +func (nDB *NetworkDB) mRandomNodes(m int, nodes []string) []string { + n := len(nodes) + mNodes := make([]string, 0, m) +OUTER: + // Probe up to 3*n times, with large n this is not necessary + // since k << n, but with small n we want search to be + // exhaustive + for i := 0; i < 3*n && len(mNodes) < m; i++ { + // Get random node + idx := randomOffset(n) + node := nodes[idx] + + if node == nDB.config.NodeName { + continue + } + + // Check if we have this node already + for j := 0; j < len(mNodes); j++ { + if node == mNodes[j] { + continue OUTER + } + } + + // Append the node + mNodes = append(mNodes, node) + } + + return mNodes +} diff --git a/vendor/src/github.com/docker/libnetwork/networkdb/delegate.go b/vendor/src/github.com/docker/libnetwork/networkdb/delegate.go new file mode 100644 index 0000000000..f2c7b2ff76 --- /dev/null +++ b/vendor/src/github.com/docker/libnetwork/networkdb/delegate.go @@ -0,0 +1,315 @@ +package networkdb + +import ( + "fmt" + "time" + + "github.com/Sirupsen/logrus" + "github.com/hashicorp/serf/serf" +) + +type networkData struct { + LTime serf.LamportTime + ID string + NodeName string + Leaving bool +} + +type networkPushPull struct { + LTime serf.LamportTime + Networks []networkData +} + +type delegate struct { + nDB *NetworkDB +} + +func (d *delegate) NodeMeta(limit int) []byte { + return []byte{} +} + +func (nDB *NetworkDB) handleNetworkEvent(nEvent *networkEventData) bool { + // Update our local clock if the received messages has newer + // time. + nDB.networkClock.Witness(nEvent.LTime) + + nDB.Lock() + defer nDB.Unlock() + + nodeNetworks, ok := nDB.networks[nEvent.NodeName] + if !ok { + // We haven't heard about this node at all. Ignore the leave + if nEvent.Event == networkLeave { + return false + } + + nodeNetworks = make(map[string]*network) + nDB.networks[nEvent.NodeName] = nodeNetworks + } + + if n, ok := nodeNetworks[nEvent.NetworkID]; ok { + // We have the latest state. Ignore the event + // since it is stale. + if n.ltime >= nEvent.LTime { + return false + } + + n.ltime = nEvent.LTime + n.leaving = nEvent.Event == networkLeave + if n.leaving { + n.leaveTime = time.Now() + } + + return true + } + + if nEvent.Event == networkLeave { + return false + } + + // This remote network join is being seen the first time. + nodeNetworks[nEvent.NetworkID] = &network{ + id: nEvent.NetworkID, + ltime: nEvent.LTime, + } + + nDB.networkNodes[nEvent.NetworkID] = append(nDB.networkNodes[nEvent.NetworkID], nEvent.NodeName) + return true +} + +func (nDB *NetworkDB) handleTableEvent(tEvent *tableEventData) bool { + // Update our local clock if the received messages has newer + // time. + nDB.tableClock.Witness(tEvent.LTime) + + if entry, err := nDB.getEntry(tEvent.TableName, tEvent.NetworkID, tEvent.Key); err == nil { + // We have the latest state. Ignore the event + // since it is stale. + if entry.ltime >= tEvent.LTime { + return false + } + } + + entry := &entry{ + ltime: tEvent.LTime, + node: tEvent.NodeName, + value: tEvent.Value, + deleting: tEvent.Event == tableEntryDelete, + } + + if entry.deleting { + entry.deleteTime = time.Now() + } + + nDB.Lock() + nDB.indexes[byTable].Insert(fmt.Sprintf("/%s/%s/%s", tEvent.TableName, tEvent.NetworkID, tEvent.Key), entry) + nDB.indexes[byNetwork].Insert(fmt.Sprintf("/%s/%s/%s", tEvent.NetworkID, tEvent.TableName, tEvent.Key), entry) + nDB.Unlock() + + var op opType + switch tEvent.Event { + case tableEntryCreate: + op = opCreate + case tableEntryUpdate: + op = opUpdate + case tableEntryDelete: + op = opDelete + } + + nDB.broadcaster.Write(makeEvent(op, tEvent.TableName, tEvent.NetworkID, tEvent.Key, tEvent.Value)) + return true +} + +func (nDB *NetworkDB) handleCompound(buf []byte) { + // Decode the parts + trunc, parts, err := decodeCompoundMessage(buf[1:]) + if err != nil { + logrus.Errorf("Failed to decode compound request: %v", err) + return + } + + // Log any truncation + if trunc > 0 { + logrus.Warnf("Compound request had %d truncated messages", trunc) + } + + // Handle each message + for _, part := range parts { + nDB.handleMessage(part) + } +} + +func (nDB *NetworkDB) handleTableMessage(buf []byte) { + var tEvent tableEventData + if err := decodeMessage(buf[1:], &tEvent); err != nil { + logrus.Errorf("Error decoding table event message: %v", err) + return + } + + if rebroadcast := nDB.handleTableEvent(&tEvent); rebroadcast { + // Copy the buffer since we cannot rely on the slice not changing + newBuf := make([]byte, len(buf)) + copy(newBuf, buf) + + nDB.RLock() + n, ok := nDB.networks[nDB.config.NodeName][tEvent.NetworkID] + nDB.RUnlock() + + if !ok { + return + } + + broadcastQ := n.tableBroadcasts + broadcastQ.QueueBroadcast(&tableEventMessage{ + msg: newBuf, + id: tEvent.NetworkID, + tname: tEvent.TableName, + key: tEvent.Key, + node: nDB.config.NodeName, + }) + } +} + +func (nDB *NetworkDB) handleNetworkMessage(buf []byte) { + var nEvent networkEventData + if err := decodeMessage(buf[1:], &nEvent); err != nil { + logrus.Errorf("Error decoding network event message: %v", err) + return + } + + if rebroadcast := nDB.handleNetworkEvent(&nEvent); rebroadcast { + // Copy the buffer since it we cannot rely on the slice not changing + newBuf := make([]byte, len(buf)) + copy(newBuf, buf) + + nDB.networkBroadcasts.QueueBroadcast(&networkEventMessage{ + msg: newBuf, + id: nEvent.NetworkID, + node: nEvent.NodeName, + }) + } +} + +func (nDB *NetworkDB) handleBulkSync(buf []byte) { + var bsm bulkSyncMessage + if err := decodeMessage(buf[1:], &bsm); err != nil { + logrus.Errorf("Error decoding bulk sync message: %v", err) + return + } + + if bsm.LTime > 0 { + nDB.tableClock.Witness(bsm.LTime) + } + + nDB.handleMessage(bsm.Payload) + + // Don't respond to a bulk sync which was not unsolicited + if !bsm.Unsolicited { + nDB.RLock() + ch, ok := nDB.bulkSyncAckTbl[bsm.NodeName] + nDB.RUnlock() + if ok { + close(ch) + } + + return + } + + if err := nDB.bulkSyncNode(bsm.Networks, bsm.NodeName, false); err != nil { + logrus.Errorf("Error in responding to bulk sync from node %s: %v", nDB.nodes[bsm.NodeName].Addr, err) + } +} + +func (nDB *NetworkDB) handleMessage(buf []byte) { + msgType := messageType(buf[0]) + + switch msgType { + case networkEventMsg: + nDB.handleNetworkMessage(buf) + case tableEventMsg: + nDB.handleTableMessage(buf) + case compoundMsg: + nDB.handleCompound(buf) + case bulkSyncMsg: + nDB.handleBulkSync(buf) + default: + logrus.Errorf("%s: unknown message type %d payload = %v", nDB.config.NodeName, msgType, buf[:8]) + } +} + +func (d *delegate) NotifyMsg(buf []byte) { + if len(buf) == 0 { + return + } + + d.nDB.handleMessage(buf) +} + +func (d *delegate) GetBroadcasts(overhead, limit int) [][]byte { + return d.nDB.networkBroadcasts.GetBroadcasts(overhead, limit) +} + +func (d *delegate) LocalState(join bool) []byte { + d.nDB.RLock() + defer d.nDB.RUnlock() + + pp := networkPushPull{ + LTime: d.nDB.networkClock.Time(), + } + + for name, nn := range d.nDB.networks { + for _, n := range nn { + pp.Networks = append(pp.Networks, networkData{ + LTime: n.ltime, + ID: n.id, + NodeName: name, + Leaving: n.leaving, + }) + } + } + + buf, err := encodeMessage(networkPushPullMsg, &pp) + if err != nil { + logrus.Errorf("Failed to encode local network state: %v", err) + return nil + } + + return buf +} + +func (d *delegate) MergeRemoteState(buf []byte, isJoin bool) { + if len(buf) == 0 { + logrus.Error("zero byte remote network state received") + return + } + + if messageType(buf[0]) != networkPushPullMsg { + logrus.Errorf("Invalid message type %v received from remote", buf[0]) + } + + pp := networkPushPull{} + if err := decodeMessage(buf[1:], &pp); err != nil { + logrus.Errorf("Failed to decode remote network state: %v", err) + return + } + + if pp.LTime > 0 { + d.nDB.networkClock.Witness(pp.LTime) + } + + for _, n := range pp.Networks { + nEvent := &networkEventData{ + LTime: n.LTime, + NodeName: n.NodeName, + NetworkID: n.ID, + Event: networkJoin, + } + + if n.Leaving { + nEvent.Event = networkLeave + } + + d.nDB.handleNetworkEvent(nEvent) + } + +} diff --git a/vendor/src/github.com/docker/libnetwork/networkdb/event_delegate.go b/vendor/src/github.com/docker/libnetwork/networkdb/event_delegate.go new file mode 100644 index 0000000000..4a924482e7 --- /dev/null +++ b/vendor/src/github.com/docker/libnetwork/networkdb/event_delegate.go @@ -0,0 +1,23 @@ +package networkdb + +import "github.com/hashicorp/memberlist" + +type eventDelegate struct { + nDB *NetworkDB +} + +func (e *eventDelegate) NotifyJoin(n *memberlist.Node) { + e.nDB.Lock() + e.nDB.nodes[n.Name] = n + e.nDB.Unlock() +} + +func (e *eventDelegate) NotifyLeave(n *memberlist.Node) { + e.nDB.deleteNodeTableEntries(n.Name) + e.nDB.Lock() + delete(e.nDB.nodes, n.Name) + e.nDB.Unlock() +} + +func (e *eventDelegate) NotifyUpdate(n *memberlist.Node) { +} diff --git a/vendor/src/github.com/docker/libnetwork/networkdb/message.go b/vendor/src/github.com/docker/libnetwork/networkdb/message.go new file mode 100644 index 0000000000..48f69da0e0 --- /dev/null +++ b/vendor/src/github.com/docker/libnetwork/networkdb/message.go @@ -0,0 +1,122 @@ +package networkdb + +import ( + "bytes" + "encoding/binary" + "fmt" + + "github.com/hashicorp/go-msgpack/codec" +) + +type messageType uint8 + +const ( + // For network join/leave event message + networkEventMsg messageType = 1 + iota + + // For pushing/pulling network/node association state + networkPushPullMsg + + // For table entry CRUD event message + tableEventMsg + + // For building a compound message which packs many different + // message types together + compoundMsg + + // For syncing table entries in bulk b/w nodes. + bulkSyncMsg +) + +const ( + // Max udp message size chosen to avoid network packet + // fragmentation. + udpSendBuf = 1400 + + // Compound message header overhead 1 byte(message type) + 4 + // bytes (num messages) + compoundHeaderOverhead = 5 + + // Overhead for each embedded message in a compound message 2 + // bytes (len of embedded message) + compoundOverhead = 2 +) + +func decodeMessage(buf []byte, out interface{}) error { + var handle codec.MsgpackHandle + return codec.NewDecoder(bytes.NewReader(buf), &handle).Decode(out) +} + +func encodeMessage(t messageType, msg interface{}) ([]byte, error) { + buf := bytes.NewBuffer(nil) + buf.WriteByte(uint8(t)) + + handle := codec.MsgpackHandle{} + encoder := codec.NewEncoder(buf, &handle) + err := encoder.Encode(msg) + return buf.Bytes(), err +} + +// makeCompoundMessage takes a list of messages and generates +// a single compound message containing all of them +func makeCompoundMessage(msgs [][]byte) *bytes.Buffer { + // Create a local buffer + buf := bytes.NewBuffer(nil) + + // Write out the type + buf.WriteByte(uint8(compoundMsg)) + + // Write out the number of message + binary.Write(buf, binary.BigEndian, uint32(len(msgs))) + + // Add the message lengths + for _, m := range msgs { + binary.Write(buf, binary.BigEndian, uint16(len(m))) + } + + // Append the messages + for _, m := range msgs { + buf.Write(m) + } + + return buf +} + +// decodeCompoundMessage splits a compound message and returns +// the slices of individual messages. Also returns the number +// of truncated messages and any potential error +func decodeCompoundMessage(buf []byte) (trunc int, parts [][]byte, err error) { + if len(buf) < 1 { + err = fmt.Errorf("missing compound length byte") + return + } + numParts := binary.BigEndian.Uint32(buf[0:4]) + buf = buf[4:] + + // Check we have enough bytes + if len(buf) < int(numParts*2) { + err = fmt.Errorf("truncated len slice") + return + } + + // Decode the lengths + lengths := make([]uint16, numParts) + for i := 0; i < int(numParts); i++ { + lengths[i] = binary.BigEndian.Uint16(buf[i*2 : i*2+2]) + } + buf = buf[numParts*2:] + + // Split each message + for idx, msgLen := range lengths { + if len(buf) < int(msgLen) { + trunc = int(numParts) - idx + return + } + + // Extract the slice, seek past on the buffer + slice := buf[:msgLen] + buf = buf[msgLen:] + parts = append(parts, slice) + } + return +} diff --git a/vendor/src/github.com/docker/libnetwork/networkdb/networkdb.go b/vendor/src/github.com/docker/libnetwork/networkdb/networkdb.go new file mode 100644 index 0000000000..1c49371896 --- /dev/null +++ b/vendor/src/github.com/docker/libnetwork/networkdb/networkdb.go @@ -0,0 +1,424 @@ +package networkdb + +import ( + "fmt" + "strings" + "sync" + "time" + + "github.com/Sirupsen/logrus" + "github.com/armon/go-radix" + "github.com/docker/go-events" + "github.com/hashicorp/memberlist" + "github.com/hashicorp/serf/serf" +) + +const ( + byTable int = 1 + iota + byNetwork +) + +// NetworkDB instance drives the networkdb cluster and acts the broker +// for cluster-scoped and network-scoped gossip and watches. +type NetworkDB struct { + sync.RWMutex + + // NetworkDB configuration. + config *Config + + // local copy of memberlist config that we use to driver + // network scoped gossip and bulk sync. + mConfig *memberlist.Config + + // All the tree index (byTable, byNetwork) that we maintain + // the db. + indexes map[int]*radix.Tree + + // Memberlist we use to drive the cluster. + memberlist *memberlist.Memberlist + + // List of all peer nodes in the cluster not-limited to any + // network. + nodes map[string]*memberlist.Node + + // A multi-dimensional map of network/node attachmemts. The + // first key is a node name and the second key is a network ID + // for the network that node is participating in. + networks map[string]map[string]*network + + // A map of nodes which are participating in a given + // network. The key is a network ID. + + networkNodes map[string][]string + + // A table of ack channels for every node from which we are + // waiting for an ack. + bulkSyncAckTbl map[string]chan struct{} + + // Global lamport clock for node network attach events. + networkClock serf.LamportClock + + // Global lamport clock for table events. + tableClock serf.LamportClock + + // Broadcast queue for network event gossip. + networkBroadcasts *memberlist.TransmitLimitedQueue + + // A central stop channel to stop all go routines running on + // behalf of the NetworkDB instance. + stopCh chan struct{} + + // A central broadcaster for all local watchers watching table + // events. + broadcaster *events.Broadcaster + + // List of all tickers which needed to be stopped when + // cleaning up. + tickers []*time.Ticker +} + +// network describes the node/network attachment. +type network struct { + // Network ID + id string + + // Lamport time for the latest state of the entry. + ltime serf.LamportTime + + // Node leave is in progress. + leaving bool + + // The time this node knew about the node's network leave. + leaveTime time.Time + + // The broadcast queue for table event gossip. This is only + // initialized for this node's network attachment entries. + tableBroadcasts *memberlist.TransmitLimitedQueue +} + +// Config represents the configuration of the networdb instance and +// can be passed by the caller. +type Config struct { + // NodeName is the cluster wide unique name for this node. + NodeName string + + // BindAddr is the local node's IP address that we bind to for + // cluster communication. + BindAddr string + + // BindPort is the local node's port to which we bind to for + // cluster communication. + BindPort int +} + +// entry defines a table entry +type entry struct { + // node from which this entry was learned. + node string + + // Lamport time for the most recent update to the entry + ltime serf.LamportTime + + // Opaque value store in the entry + value []byte + + // Deleting the entry is in progress. All entries linger in + // the cluster for certain amount of time after deletion. + deleting bool + + // The wall clock time when this node learned about this deletion. + deleteTime time.Time +} + +// New creates a new instance of NetworkDB using the Config passed by +// the caller. +func New(c *Config) (*NetworkDB, error) { + nDB := &NetworkDB{ + config: c, + indexes: make(map[int]*radix.Tree), + networks: make(map[string]map[string]*network), + nodes: make(map[string]*memberlist.Node), + networkNodes: make(map[string][]string), + bulkSyncAckTbl: make(map[string]chan struct{}), + broadcaster: events.NewBroadcaster(), + } + + nDB.indexes[byTable] = radix.New() + nDB.indexes[byNetwork] = radix.New() + + if err := nDB.clusterInit(); err != nil { + return nil, err + } + + return nDB, nil +} + +// Join joins this NetworkDB instance with a list of peer NetworkDB +// instances passed by the caller in the form of addr:port +func (nDB *NetworkDB) Join(members []string) error { + return nDB.clusterJoin(members) +} + +// Close destroys this NetworkDB instance by leave the cluster, +// stopping timers, canceling goroutines etc. +func (nDB *NetworkDB) Close() { + if err := nDB.clusterLeave(); err != nil { + logrus.Errorf("Could not close DB %s: %v", nDB.config.NodeName, err) + } +} + +// GetEntry retrieves the value of a table entry in a given (network, +// table, key) tuple +func (nDB *NetworkDB) GetEntry(tname, nid, key string) ([]byte, error) { + entry, err := nDB.getEntry(tname, nid, key) + if err != nil { + return nil, err + } + + return entry.value, nil +} + +func (nDB *NetworkDB) getEntry(tname, nid, key string) (*entry, error) { + nDB.RLock() + defer nDB.RUnlock() + + e, ok := nDB.indexes[byTable].Get(fmt.Sprintf("/%s/%s/%s", tname, nid, key)) + if !ok { + return nil, fmt.Errorf("could not get entry in table %s with network id %s and key %s", tname, nid, key) + } + + return e.(*entry), nil +} + +// CreateEntry creates a table entry in NetworkDB for given (network, +// table, key) tuple and if the NetworkDB is part of the cluster +// propogates this event to the cluster. It is an error to create an +// entry for the same tuple for which there is already an existing +// entry. +func (nDB *NetworkDB) CreateEntry(tname, nid, key string, value []byte) error { + if _, err := nDB.GetEntry(tname, nid, key); err == nil { + return fmt.Errorf("cannot create entry as the entry in table %s with network id %s and key %s already exists", tname, nid, key) + } + + entry := &entry{ + ltime: nDB.tableClock.Increment(), + node: nDB.config.NodeName, + value: value, + } + + if err := nDB.sendTableEvent(tableEntryCreate, nid, tname, key, entry); err != nil { + return fmt.Errorf("cannot send table create event: %v", err) + } + + nDB.Lock() + nDB.indexes[byTable].Insert(fmt.Sprintf("/%s/%s/%s", tname, nid, key), entry) + nDB.indexes[byNetwork].Insert(fmt.Sprintf("/%s/%s/%s", nid, tname, key), entry) + nDB.Unlock() + + nDB.broadcaster.Write(makeEvent(opCreate, tname, nid, key, value)) + return nil +} + +// UpdateEntry updates a table entry in NetworkDB for given (network, +// table, key) tuple and if the NetworkDB is part of the cluster +// propogates this event to the cluster. It is an error to update a +// non-existent entry. +func (nDB *NetworkDB) UpdateEntry(tname, nid, key string, value []byte) error { + if _, err := nDB.GetEntry(tname, nid, key); err != nil { + return fmt.Errorf("cannot update entry as the entry in table %s with network id %s and key %s does not exist", tname, nid, key) + } + + entry := &entry{ + ltime: nDB.tableClock.Increment(), + node: nDB.config.NodeName, + value: value, + } + + if err := nDB.sendTableEvent(tableEntryUpdate, nid, tname, key, entry); err != nil { + return fmt.Errorf("cannot send table update event: %v", err) + } + + nDB.Lock() + nDB.indexes[byTable].Insert(fmt.Sprintf("/%s/%s/%s", tname, nid, key), entry) + nDB.indexes[byNetwork].Insert(fmt.Sprintf("/%s/%s/%s", nid, tname, key), entry) + nDB.Unlock() + + nDB.broadcaster.Write(makeEvent(opUpdate, tname, nid, key, value)) + return nil +} + +// DeleteEntry deletes a table entry in NetworkDB for given (network, +// table, key) tuple and if the NetworkDB is part of the cluster +// propogates this event to the cluster. +func (nDB *NetworkDB) DeleteEntry(tname, nid, key string) error { + value, err := nDB.GetEntry(tname, nid, key) + if err != nil { + return fmt.Errorf("cannot delete entry as the entry in table %s with network id %s and key %s does not exist", tname, nid, key) + } + + entry := &entry{ + ltime: nDB.tableClock.Increment(), + node: nDB.config.NodeName, + value: value, + deleting: true, + deleteTime: time.Now(), + } + + if err := nDB.sendTableEvent(tableEntryDelete, nid, tname, key, entry); err != nil { + return fmt.Errorf("cannot send table delete event: %v", err) + } + + nDB.Lock() + nDB.indexes[byTable].Insert(fmt.Sprintf("/%s/%s/%s", tname, nid, key), entry) + nDB.indexes[byNetwork].Insert(fmt.Sprintf("/%s/%s/%s", nid, tname, key), entry) + nDB.Unlock() + + nDB.broadcaster.Write(makeEvent(opDelete, tname, nid, key, value)) + return nil +} + +func (nDB *NetworkDB) deleteNodeTableEntries(node string) { + nDB.Lock() + nDB.indexes[byTable].Walk(func(path string, v interface{}) bool { + oldEntry := v.(*entry) + if oldEntry.node != node { + return false + } + + params := strings.Split(path[1:], "/") + tname := params[0] + nid := params[1] + key := params[2] + + entry := &entry{ + ltime: oldEntry.ltime, + node: node, + value: oldEntry.value, + deleting: true, + deleteTime: time.Now(), + } + + nDB.indexes[byTable].Insert(fmt.Sprintf("/%s/%s/%s", tname, nid, key), entry) + nDB.indexes[byNetwork].Insert(fmt.Sprintf("/%s/%s/%s", nid, tname, key), entry) + return false + }) + nDB.Unlock() +} + +// WalkTable walks a single table in NetworkDB and invokes the passed +// function for each entry in the table passing the network, key, +// value. The walk stops if the passed function returns a true. +func (nDB *NetworkDB) WalkTable(tname string, fn func(string, string, []byte) bool) error { + nDB.RLock() + values := make(map[string]interface{}) + nDB.indexes[byTable].WalkPrefix(fmt.Sprintf("/%s", tname), func(path string, v interface{}) bool { + values[path] = v + return false + }) + nDB.RUnlock() + + for k, v := range values { + params := strings.Split(k[1:], "/") + nid := params[1] + key := params[2] + if fn(nid, key, v.(*entry).value) { + return nil + } + } + + return nil +} + +// JoinNetwork joins this node to a given network and propogates this +// event across the cluster. This triggers this node joining the +// sub-cluster of this network and participates in the network-scoped +// gossip and bulk sync for this network. +func (nDB *NetworkDB) JoinNetwork(nid string) error { + ltime := nDB.networkClock.Increment() + + nDB.Lock() + nodeNetworks, ok := nDB.networks[nDB.config.NodeName] + if !ok { + nodeNetworks = make(map[string]*network) + nDB.networks[nDB.config.NodeName] = nodeNetworks + } + nodeNetworks[nid] = &network{id: nid, ltime: ltime} + nodeNetworks[nid].tableBroadcasts = &memberlist.TransmitLimitedQueue{ + NumNodes: func() int { + return len(nDB.networkNodes[nid]) + }, + RetransmitMult: 4, + } + nDB.networkNodes[nid] = append(nDB.networkNodes[nid], nDB.config.NodeName) + nDB.Unlock() + + if err := nDB.sendNetworkEvent(nid, networkJoin, ltime); err != nil { + return fmt.Errorf("failed to send leave network event for %s: %v", nid, err) + } + + logrus.Debugf("%s: joined network %s", nDB.config.NodeName, nid) + if _, err := nDB.bulkSync(nid, true); err != nil { + logrus.Errorf("Error bulk syncing while joining network %s: %v", nid, err) + } + + return nil +} + +// LeaveNetwork leaves this node from a given network and propogates +// this event across the cluster. This triggers this node leaving the +// sub-cluster of this network and as a result will no longer +// participate in the network-scoped gossip and bulk sync for this +// network. +func (nDB *NetworkDB) LeaveNetwork(nid string) error { + ltime := nDB.networkClock.Increment() + if err := nDB.sendNetworkEvent(nid, networkLeave, ltime); err != nil { + return fmt.Errorf("failed to send leave network event for %s: %v", nid, err) + } + + nDB.Lock() + defer nDB.Unlock() + nodeNetworks, ok := nDB.networks[nDB.config.NodeName] + if !ok { + return fmt.Errorf("could not find self node for network %s while trying to leave", nid) + } + + n, ok := nodeNetworks[nid] + if !ok { + return fmt.Errorf("could not find network %s while trying to leave", nid) + } + + n.ltime = ltime + n.leaving = true + return nil +} + +// Deletes the node from the list of nodes which participate in the +// passed network. Caller should hold the NetworkDB lock while calling +// this +func (nDB *NetworkDB) deleteNetworkNode(nid string, nodeName string) { + nodes := nDB.networkNodes[nid] + for i, name := range nodes { + if name == nodeName { + nodes[i] = nodes[len(nodes)-1] + nodes = nodes[:len(nodes)-1] + break + } + } + nDB.networkNodes[nid] = nodes +} + +// findCommonnetworks find the networks that both this node and the +// passed node have joined. +func (nDB *NetworkDB) findCommonNetworks(nodeName string) []string { + nDB.RLock() + defer nDB.RUnlock() + + var networks []string + for nid := range nDB.networks[nDB.config.NodeName] { + if _, ok := nDB.networks[nodeName][nid]; ok { + networks = append(networks, nid) + } + } + + return networks +} diff --git a/vendor/src/github.com/docker/libnetwork/networkdb/watch.go b/vendor/src/github.com/docker/libnetwork/networkdb/watch.go new file mode 100644 index 0000000000..2df00fa54f --- /dev/null +++ b/vendor/src/github.com/docker/libnetwork/networkdb/watch.go @@ -0,0 +1,98 @@ +package networkdb + +import "github.com/docker/go-events" + +type opType uint8 + +const ( + opCreate opType = 1 + iota + opUpdate + opDelete +) + +type event struct { + Table string + NetworkID string + Key string + Value []byte +} + +// CreateEvent generates a table entry create event to the watchers +type CreateEvent event + +// UpdateEvent generates a table entry update event to the watchers +type UpdateEvent event + +// DeleteEvent generates a table entry delete event to the watchers +type DeleteEvent event + +// Watch creates a watcher with filters for a particular table or +// network or key or any combination of the tuple. If any of the +// filter is an empty string it acts as a wildcard for that +// field. Watch returns a channel of events, where the events will be +// sent. +func (nDB *NetworkDB) Watch(tname, nid, key string) (chan events.Event, func()) { + var matcher events.Matcher + + if tname != "" || nid != "" || key != "" { + matcher = events.MatcherFunc(func(ev events.Event) bool { + var evt event + switch ev := ev.(type) { + case CreateEvent: + evt = event(ev) + case UpdateEvent: + evt = event(ev) + case DeleteEvent: + evt = event(ev) + } + + if tname != "" && evt.Table != tname { + return false + } + + if nid != "" && evt.NetworkID != nid { + return false + } + + if key != "" && evt.Key != key { + return false + } + + return true + }) + } + + ch := events.NewChannel(0) + sink := events.Sink(events.NewQueue(ch)) + + if matcher != nil { + sink = events.NewFilter(sink, matcher) + } + + nDB.broadcaster.Add(sink) + return ch.C, func() { + nDB.broadcaster.Remove(sink) + ch.Close() + sink.Close() + } +} + +func makeEvent(op opType, tname, nid, key string, value []byte) events.Event { + ev := event{ + Table: tname, + NetworkID: nid, + Key: key, + Value: value, + } + + switch op { + case opCreate: + return CreateEvent(ev) + case opUpdate: + return UpdateEvent(ev) + case opDelete: + return DeleteEvent(ev) + } + + return nil +} diff --git a/vendor/src/github.com/docker/libnetwork/osl/interface_solaris.go b/vendor/src/github.com/docker/libnetwork/osl/interface_solaris.go new file mode 100644 index 0000000000..9c0141fd9b --- /dev/null +++ b/vendor/src/github.com/docker/libnetwork/osl/interface_solaris.go @@ -0,0 +1,4 @@ +package osl + +// IfaceOption is a function option type to set interface options +type IfaceOption func() diff --git a/vendor/src/github.com/docker/libnetwork/osl/neigh_solaris.go b/vendor/src/github.com/docker/libnetwork/osl/neigh_solaris.go new file mode 100644 index 0000000000..ffa8d75337 --- /dev/null +++ b/vendor/src/github.com/docker/libnetwork/osl/neigh_solaris.go @@ -0,0 +1,4 @@ +package osl + +// NeighOption is a function option type to set interface options +type NeighOption func() diff --git a/vendor/src/github.com/docker/libnetwork/osl/route_linux.go b/vendor/src/github.com/docker/libnetwork/osl/route_linux.go index 3ebaaec77c..09a0a45f89 100644 --- a/vendor/src/github.com/docker/libnetwork/osl/route_linux.go +++ b/vendor/src/github.com/docker/libnetwork/osl/route_linux.go @@ -142,7 +142,7 @@ func (n *networkNamespace) SetGatewayIPv6(gwv6 net.IP) error { err := programGateway(n.nsPath(), gwv6, true) if err == nil { - n.SetGatewayIPv6(gwv6) + n.setGatewayIPv6(gwv6) } return err diff --git a/vendor/src/github.com/docker/libnetwork/resolvconf/resolvconf.go b/vendor/src/github.com/docker/libnetwork/resolvconf/resolvconf.go index 507d9ef50d..017b413dfc 100644 --- a/vendor/src/github.com/docker/libnetwork/resolvconf/resolvconf.go +++ b/vendor/src/github.com/docker/libnetwork/resolvconf/resolvconf.go @@ -10,8 +10,8 @@ import ( "github.com/Sirupsen/logrus" "github.com/docker/docker/pkg/ioutils" - "github.com/docker/libnetwork/netutils" "github.com/docker/libnetwork/resolvconf/dns" + "github.com/docker/libnetwork/types" ) var ( @@ -122,7 +122,7 @@ func FilterResolvDNS(resolvConf []byte, ipv6Enabled bool) (*File, error) { } // if the resulting resolvConf has no more nameservers defined, add appropriate // default DNS servers for IPv4 and (optionally) IPv6 - if len(GetNameservers(cleanedResolvConf, netutils.IP)) == 0 { + if len(GetNameservers(cleanedResolvConf, types.IP)) == 0 { logrus.Infof("No non-localhost DNS nameservers are left in resolv.conf. Using default external servers : %v", defaultIPv4Dns) dns := defaultIPv4Dns if ipv6Enabled { @@ -158,11 +158,11 @@ func GetNameservers(resolvConf []byte, kind int) []string { nameservers := []string{} for _, line := range getLines(resolvConf, []byte("#")) { var ns [][]byte - if kind == netutils.IP { + if kind == types.IP { ns = nsRegexp.FindSubmatch(line) - } else if kind == netutils.IPv4 { + } else if kind == types.IPv4 { ns = nsIPv4Regexpmatch.FindSubmatch(line) - } else if kind == netutils.IPv6 { + } else if kind == types.IPv6 { ns = nsIPv6Regexpmatch.FindSubmatch(line) } if len(ns) > 0 { @@ -177,7 +177,7 @@ func GetNameservers(resolvConf []byte, kind int) []string { // This function's output is intended for net.ParseCIDR func GetNameserversAsCIDR(resolvConf []byte) []string { nameservers := []string{} - for _, nameserver := range GetNameservers(resolvConf, netutils.IP) { + for _, nameserver := range GetNameservers(resolvConf, types.IP) { nameservers = append(nameservers, nameserver+"/32") } return nameservers diff --git a/vendor/src/github.com/docker/libnetwork/resolver.go b/vendor/src/github.com/docker/libnetwork/resolver.go index cff692fd1f..08a81eebcc 100644 --- a/vendor/src/github.com/docker/libnetwork/resolver.go +++ b/vendor/src/github.com/docker/libnetwork/resolver.go @@ -9,8 +9,7 @@ import ( "time" log "github.com/Sirupsen/logrus" - "github.com/docker/libnetwork/iptables" - "github.com/docker/libnetwork/netutils" + "github.com/docker/libnetwork/types" "github.com/miekg/dns" ) @@ -47,7 +46,7 @@ const ( maxExtDNS = 3 //max number of external servers to try extIOTimeout = 4 * time.Second defaultRespSize = 512 - maxConcurrent = 50 + maxConcurrent = 100 logInterval = 2 * time.Second maxDNSID = 65536 ) @@ -105,8 +104,6 @@ func (r *resolver) SetupFunc() func() { r.err = fmt.Errorf("error in opening name server socket %v", err) return } - laddr := r.conn.LocalAddr() - _, ipPort, _ := net.SplitHostPort(laddr.String()) // Listen on a TCP as well tcpaddr := &net.TCPAddr{ @@ -118,21 +115,6 @@ func (r *resolver) SetupFunc() func() { r.err = fmt.Errorf("error in opening name TCP server socket %v", err) return } - ltcpaddr := r.tcpListen.Addr() - _, tcpPort, _ := net.SplitHostPort(ltcpaddr.String()) - rules := [][]string{ - {"-t", "nat", "-A", "OUTPUT", "-d", resolverIP, "-p", "udp", "--dport", dnsPort, "-j", "DNAT", "--to-destination", laddr.String()}, - {"-t", "nat", "-A", "POSTROUTING", "-s", resolverIP, "-p", "udp", "--sport", ipPort, "-j", "SNAT", "--to-source", ":" + dnsPort}, - {"-t", "nat", "-A", "OUTPUT", "-d", resolverIP, "-p", "tcp", "--dport", dnsPort, "-j", "DNAT", "--to-destination", ltcpaddr.String()}, - {"-t", "nat", "-A", "POSTROUTING", "-s", resolverIP, "-p", "tcp", "--sport", tcpPort, "-j", "SNAT", "--to-source", ":" + dnsPort}, - } - - for _, rule := range rules { - r.err = iptables.RawCombinedOutputNative(rule...) - if r.err != nil { - return - } - } r.err = nil }) } @@ -142,6 +124,11 @@ func (r *resolver) Start() error { if r.err != nil { return r.err } + + if err := r.setupIPTable(); err != nil { + return fmt.Errorf("setting up IP table rules failed: %v", err) + } + s := &dns.Server{Handler: r, PacketConn: r.conn} r.server = s go func() { @@ -240,7 +227,7 @@ func (r *resolver) handleIPQuery(name string, query *dns.Msg, ipType int) (*dns. if len(addr) > 1 { addr = shuffleAddr(addr) } - if ipType == netutils.IPv4 { + if ipType == types.IPv4 { for _, ip := range addr { rr := new(dns.A) rr.Hdr = dns.RR_Header{Name: name, Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: respTTL} @@ -305,6 +292,7 @@ func (r *resolver) ServeDNS(w dns.ResponseWriter, query *dns.Msg) { extConn net.Conn resp *dns.Msg err error + writer dns.ResponseWriter ) if query == nil || len(query.Question) == 0 { @@ -312,9 +300,9 @@ func (r *resolver) ServeDNS(w dns.ResponseWriter, query *dns.Msg) { } name := query.Question[0].Name if query.Question[0].Qtype == dns.TypeA { - resp, err = r.handleIPQuery(name, query, netutils.IPv4) + resp, err = r.handleIPQuery(name, query, types.IPv4) } else if query.Question[0].Qtype == dns.TypeAAAA { - resp, err = r.handleIPQuery(name, query, netutils.IPv6) + resp, err = r.handleIPQuery(name, query, types.IPv6) } else if query.Question[0].Qtype == dns.TypePTR { resp, err = r.handlePTRQuery(name, query) } @@ -342,7 +330,9 @@ func (r *resolver) ServeDNS(w dns.ResponseWriter, query *dns.Msg) { if resp.Len() > maxSize { truncateResp(resp, maxSize, proto == "tcp") } + writer = w } else { + queryID := query.Id for i := 0; i < maxExtDNS; i++ { extDNS := &r.extDNSList[i] if extDNS.ipStr == "" { @@ -388,11 +378,11 @@ func (r *resolver) ServeDNS(w dns.ResponseWriter, query *dns.Msg) { // forwardQueryStart stores required context to mux multiple client queries over // one connection; and limits the number of outstanding concurrent queries. - if r.forwardQueryStart(w, query) == false { + if r.forwardQueryStart(w, query, queryID) == false { old := r.tStamp r.tStamp = time.Now() if r.tStamp.Sub(old) > logInterval { - log.Errorf("More than %v concurrent queries from %s", maxConcurrent, w.LocalAddr().String()) + log.Errorf("More than %v concurrent queries from %s", maxConcurrent, extConn.LocalAddr().String()) } continue } @@ -418,32 +408,33 @@ func (r *resolver) ServeDNS(w dns.ResponseWriter, query *dns.Msg) { // Retrieves the context for the forwarded query and returns the client connection // to send the reply to - w = r.forwardQueryEnd(w, resp) - if w == nil { + writer = r.forwardQueryEnd(w, resp) + if writer == nil { continue } resp.Compress = true break } - - if resp == nil || w == nil { + if resp == nil || writer == nil { return } } - err = w.WriteMsg(resp) - if err != nil { + if writer == nil { + return + } + if err = writer.WriteMsg(resp); err != nil { log.Errorf("error writing resolver resp, %s", err) } } -func (r *resolver) forwardQueryStart(w dns.ResponseWriter, msg *dns.Msg) bool { +func (r *resolver) forwardQueryStart(w dns.ResponseWriter, msg *dns.Msg, queryID uint16) bool { proto := w.LocalAddr().Network() dnsID := uint16(rand.Intn(maxDNSID)) cc := clientConn{ - dnsID: msg.Id, + dnsID: queryID, respWriter: w, } @@ -462,7 +453,7 @@ func (r *resolver) forwardQueryStart(w dns.ResponseWriter, msg *dns.Msg) bool { for ok := true; ok == true; dnsID = uint16(rand.Intn(maxDNSID)) { _, ok = r.client[dnsID] } - log.Debugf("client dns id %v, changed id %v", msg.Id, dnsID) + log.Debugf("client dns id %v, changed id %v", queryID, dnsID) r.client[dnsID] = cc msg.Id = dnsID default: @@ -497,6 +488,7 @@ func (r *resolver) forwardQueryEnd(w dns.ResponseWriter, msg *dns.Msg) dns.Respo log.Debugf("Can't retrieve client context for dns id %v", msg.Id) return nil } + log.Debugf("dns msg id %v, client id %v", msg.Id, cc.dnsID) delete(r.client, msg.Id) msg.Id = cc.dnsID w = cc.respWriter diff --git a/vendor/src/github.com/docker/libnetwork/resolver_unix.go b/vendor/src/github.com/docker/libnetwork/resolver_unix.go new file mode 100644 index 0000000000..2b3734fbac --- /dev/null +++ b/vendor/src/github.com/docker/libnetwork/resolver_unix.go @@ -0,0 +1,77 @@ +// +build !windows + +package libnetwork + +import ( + "fmt" + "net" + "os" + "os/exec" + "runtime" + + log "github.com/Sirupsen/logrus" + "github.com/docker/docker/pkg/reexec" + "github.com/docker/libnetwork/iptables" + "github.com/vishvananda/netns" +) + +func init() { + reexec.Register("setup-resolver", reexecSetupResolver) +} + +func reexecSetupResolver() { + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + if len(os.Args) < 4 { + log.Error("invalid number of arguments..") + os.Exit(1) + } + + _, ipPort, _ := net.SplitHostPort(os.Args[2]) + _, tcpPort, _ := net.SplitHostPort(os.Args[3]) + rules := [][]string{ + {"-t", "nat", "-A", "OUTPUT", "-d", resolverIP, "-p", "udp", "--dport", dnsPort, "-j", "DNAT", "--to-destination", os.Args[2]}, + {"-t", "nat", "-A", "POSTROUTING", "-s", resolverIP, "-p", "udp", "--sport", ipPort, "-j", "SNAT", "--to-source", ":" + dnsPort}, + {"-t", "nat", "-A", "OUTPUT", "-d", resolverIP, "-p", "tcp", "--dport", dnsPort, "-j", "DNAT", "--to-destination", os.Args[3]}, + {"-t", "nat", "-A", "POSTROUTING", "-s", resolverIP, "-p", "tcp", "--sport", tcpPort, "-j", "SNAT", "--to-source", ":" + dnsPort}, + } + + f, err := os.OpenFile(os.Args[1], os.O_RDONLY, 0) + if err != nil { + log.Errorf("failed get network namespace %q: %v", os.Args[1], err) + os.Exit(2) + } + defer f.Close() + + nsFD := f.Fd() + if err = netns.Set(netns.NsHandle(nsFD)); err != nil { + log.Errorf("setting into container net ns %v failed, %v", os.Args[1], err) + os.Exit(3) + } + + for _, rule := range rules { + if iptables.RawCombinedOutputNative(rule...) != nil { + log.Errorf("setting up rule failed, %v", rule) + } + } +} + +func (r *resolver) setupIPTable() error { + if r.err != nil { + return r.err + } + laddr := r.conn.LocalAddr().String() + ltcpaddr := r.tcpListen.Addr().String() + + cmd := &exec.Cmd{ + Path: reexec.Self(), + Args: append([]string{"setup-resolver"}, r.sb.Key(), laddr, ltcpaddr), + Stdout: os.Stdout, + Stderr: os.Stderr, + } + if err := cmd.Run(); err != nil { + return fmt.Errorf("reexec failed: %v", err) + } + return nil +} diff --git a/vendor/src/github.com/docker/libnetwork/resolver_windows.go b/vendor/src/github.com/docker/libnetwork/resolver_windows.go new file mode 100644 index 0000000000..aa33b1a2ec --- /dev/null +++ b/vendor/src/github.com/docker/libnetwork/resolver_windows.go @@ -0,0 +1,7 @@ +// +build windows + +package libnetwork + +func (r *resolver) setupIPTable() error { + return nil +} diff --git a/vendor/src/github.com/docker/libnetwork/sandbox.go b/vendor/src/github.com/docker/libnetwork/sandbox.go index 5517301bcc..4cdb017fb3 100644 --- a/vendor/src/github.com/docker/libnetwork/sandbox.go +++ b/vendor/src/github.com/docker/libnetwork/sandbox.go @@ -12,7 +12,6 @@ import ( log "github.com/Sirupsen/logrus" "github.com/docker/libnetwork/etchosts" "github.com/docker/libnetwork/netlabel" - "github.com/docker/libnetwork/netutils" "github.com/docker/libnetwork/osl" "github.com/docker/libnetwork/types" ) @@ -406,7 +405,7 @@ func (sb *sandbox) ResolveIP(ip string) string { for _, ep := range sb.getConnectedEndpoints() { n := ep.getNetwork() - sr, ok := n.getController().svcDb[n.ID()] + sr, ok := n.getController().svcRecords[n.ID()] if !ok { continue } @@ -436,6 +435,7 @@ func (sb *sandbox) ResolveName(name string, ipType int) ([]net.IP, bool) { // {a.b in network c.d}, // {a in network b.c.d}, + log.Debugf("Name To resolve: %v", name) name = strings.TrimSuffix(name, ".") reqName := []string{name} networkName := []string{""} @@ -456,7 +456,6 @@ func (sb *sandbox) ResolveName(name string, ipType int) ([]net.IP, bool) { epList := sb.getConnectedEndpoints() for i := 0; i < len(reqName); i++ { - log.Debugf("To resolve: %v in %v", reqName[i], networkName[i]) // First check for local container alias ip, ipv6Miss := sb.resolveName(reqName[i], networkName[i], epList, true, ipType) @@ -513,7 +512,7 @@ func (sb *sandbox) resolveName(req string, networkName string, epList []*endpoin ep.Unlock() } - sr, ok := n.getController().svcDb[n.ID()] + sr, ok := n.getController().svcRecords[n.ID()] if !ok { continue } @@ -522,7 +521,7 @@ func (sb *sandbox) resolveName(req string, networkName string, epList []*endpoin n.Lock() ip, ok = sr.svcMap[name] - if ipType == netutils.IPv6 { + if ipType == types.IPv6 { // If the name resolved to v4 address then its a valid name in // the docker network domain. If the network is not v6 enabled // set ipv6Miss to filter the DNS query from going to external @@ -972,6 +971,14 @@ func (eh epHeap) Less(i, j int) bool { return true } + if epi.getNetwork().Internal() { + return false + } + + if epj.getNetwork().Internal() { + return true + } + if ci != nil { cip, ok = ci.epPriority[eh[i].ID()] if !ok { diff --git a/vendor/src/github.com/docker/libnetwork/sandbox_dns_unix.go b/vendor/src/github.com/docker/libnetwork/sandbox_dns_unix.go index c8b595eb24..8d59e3d66a 100644 --- a/vendor/src/github.com/docker/libnetwork/sandbox_dns_unix.go +++ b/vendor/src/github.com/docker/libnetwork/sandbox_dns_unix.go @@ -11,7 +11,6 @@ import ( log "github.com/Sirupsen/logrus" "github.com/docker/libnetwork/etchosts" - "github.com/docker/libnetwork/netutils" "github.com/docker/libnetwork/resolvconf" "github.com/docker/libnetwork/types" ) @@ -91,6 +90,10 @@ func (sb *sandbox) buildHostsFile() error { func (sb *sandbox) updateHostsFile(ifaceIP string) error { var mhost string + if ifaceIP == "" { + return nil + } + if sb.config.originHostsPath != "" { return nil } @@ -166,7 +169,7 @@ func (sb *sandbox) setupDNS() error { if len(sb.config.dnsList) > 0 || len(sb.config.dnsSearchList) > 0 || len(sb.config.dnsOptionsList) > 0 { var ( err error - dnsList = resolvconf.GetNameservers(currRC.Content, netutils.IP) + dnsList = resolvconf.GetNameservers(currRC.Content, types.IP) dnsSearchList = resolvconf.GetSearchDomains(currRC.Content) dnsOptionsList = resolvconf.GetOptions(currRC.Content) ) @@ -275,7 +278,7 @@ func (sb *sandbox) rebuildDNS() error { // localhost entries have already been filtered out from the list // retain only the v4 servers in sb for forwarding the DNS queries - sb.extDNS = resolvconf.GetNameservers(currRC.Content, netutils.IPv4) + sb.extDNS = resolvconf.GetNameservers(currRC.Content, types.IPv4) var ( dnsList = []string{sb.resolver.NameServer()} @@ -284,7 +287,7 @@ func (sb *sandbox) rebuildDNS() error { ) // external v6 DNS servers has to be listed in resolv.conf - dnsList = append(dnsList, resolvconf.GetNameservers(currRC.Content, netutils.IPv6)...) + dnsList = append(dnsList, resolvconf.GetNameservers(currRC.Content, types.IPv6)...) // Resolver returns the options in the format resolv.conf expects dnsOptionsList = append(dnsOptionsList, sb.resolver.ResolverOptions()...) diff --git a/vendor/src/github.com/docker/libnetwork/sandbox_externalkey_solaris.go b/vendor/src/github.com/docker/libnetwork/sandbox_externalkey_solaris.go new file mode 100644 index 0000000000..7569e46b93 --- /dev/null +++ b/vendor/src/github.com/docker/libnetwork/sandbox_externalkey_solaris.go @@ -0,0 +1,45 @@ +// +build solaris + +package libnetwork + +import ( + "io" + "net" + + "github.com/docker/libnetwork/types" +) + +// processSetKeyReexec is a private function that must be called only on an reexec path +// It expects 3 args { [0] = "libnetwork-setkey", [1] = , [2] = } +// It also expects libcontainer.State as a json string in +// Refer to https://github.com/opencontainers/runc/pull/160/ for more information +func processSetKeyReexec() { +} + +// SetExternalKey provides a convenient way to set an External key to a sandbox +func SetExternalKey(controllerID string, containerID string, key string) error { + return types.NotImplementedErrorf("SetExternalKey isn't supported on non linux systems") +} + +func sendKey(c net.Conn, data setKeyData) error { + return types.NotImplementedErrorf("sendKey isn't supported on non linux systems") +} + +func processReturn(r io.Reader) error { + return types.NotImplementedErrorf("processReturn isn't supported on non linux systems") +} + +// no-op on non linux systems +func (c *controller) startExternalKeyListener() error { + return nil +} + +func (c *controller) acceptClientConnections(sock string, l net.Listener) { +} + +func (c *controller) processExternalKey(conn net.Conn) error { + return types.NotImplementedErrorf("processExternalKey isn't supported on non linux systems") +} + +func (c *controller) stopExternalKeyListener() { +} diff --git a/vendor/src/github.com/docker/libnetwork/sandbox_store.go b/vendor/src/github.com/docker/libnetwork/sandbox_store.go index 442aad1009..ae5ddc1566 100644 --- a/vendor/src/github.com/docker/libnetwork/sandbox_store.go +++ b/vendor/src/github.com/docker/libnetwork/sandbox_store.go @@ -213,7 +213,7 @@ func (c *controller) sandboxCleanup() { var ep *endpoint if err != nil { logrus.Errorf("getNetworkFromStore for nid %s failed while trying to build sandbox for cleanup: %v", eps.Nid, err) - n = &network{id: eps.Nid, ctrlr: c, drvOnce: &sync.Once{}} + n = &network{id: eps.Nid, ctrlr: c, drvOnce: &sync.Once{}, persist: true} ep = &endpoint{id: eps.Eid, network: n, sandboxID: sbs.ID} } else { ep, err = n.getEndpointFromStore(eps.Eid) diff --git a/vendor/src/github.com/docker/libnetwork/service.go b/vendor/src/github.com/docker/libnetwork/service.go new file mode 100644 index 0000000000..9caed0ae22 --- /dev/null +++ b/vendor/src/github.com/docker/libnetwork/service.go @@ -0,0 +1,80 @@ +package libnetwork + +import "net" + +type service struct { + name string + id string + backEnds map[string]map[string]net.IP +} + +func newService(name string, id string) *service { + return &service{ + name: name, + id: id, + backEnds: make(map[string]map[string]net.IP), + } +} + +func (c *controller) addServiceBinding(name, sid, nid, eid string, ip net.IP) error { + var s *service + + n, err := c.NetworkByID(nid) + if err != nil { + return err + } + + c.Lock() + s, ok := c.serviceBindings[sid] + if !ok { + s = newService(name, sid) + } + + netBackEnds, ok := s.backEnds[nid] + if !ok { + netBackEnds = make(map[string]net.IP) + s.backEnds[nid] = netBackEnds + } + + netBackEnds[eid] = ip + c.serviceBindings[sid] = s + c.Unlock() + + n.(*network).addSvcRecords(name, ip, nil, false) + return nil +} + +func (c *controller) rmServiceBinding(name, sid, nid, eid string, ip net.IP) error { + n, err := c.NetworkByID(nid) + if err != nil { + return err + } + + c.Lock() + s, ok := c.serviceBindings[sid] + if !ok { + c.Unlock() + return nil + } + + netBackEnds, ok := s.backEnds[nid] + if !ok { + c.Unlock() + return nil + } + + delete(netBackEnds, eid) + + if len(netBackEnds) == 0 { + delete(s.backEnds, nid) + } + + if len(s.backEnds) == 0 { + delete(c.serviceBindings, sid) + } + c.Unlock() + + n.(*network).deleteSvcRecords(name, ip, nil, false) + + return err +} diff --git a/vendor/src/github.com/docker/libnetwork/store.go b/vendor/src/github.com/docker/libnetwork/store.go index 2c439dcbd4..714d56bd5a 100644 --- a/vendor/src/github.com/docker/libnetwork/store.go +++ b/vendor/src/github.com/docker/libnetwork/store.go @@ -4,9 +4,20 @@ import ( "fmt" log "github.com/Sirupsen/logrus" + "github.com/docker/libkv/store/boltdb" + "github.com/docker/libkv/store/consul" + "github.com/docker/libkv/store/etcd" + "github.com/docker/libkv/store/zookeeper" "github.com/docker/libnetwork/datastore" ) +func registerKVStores() { + consul.Register() + zookeeper.Register() + etcd.Register() + boltdb.Register() +} + func (c *controller) initScopedStore(scope string, scfg *datastore.ScopeCfg) error { store, err := datastore.NewDataStore(scope, scfg) if err != nil { @@ -20,6 +31,8 @@ func (c *controller) initScopedStore(scope string, scfg *datastore.ScopeCfg) err } func (c *controller) initStores() error { + registerKVStores() + c.Lock() if c.cfg == nil { c.Unlock() @@ -208,8 +221,7 @@ func (n *network) getEndpointsFromStore() ([]*endpoint, error) { func (c *controller) updateToStore(kvObject datastore.KVObject) error { cs := c.getStore(kvObject.DataScope()) if cs == nil { - log.Warnf("datastore for scope %s not initialized. kv object %s is not added to the store", kvObject.DataScope(), datastore.Key(kvObject.Key()...)) - return nil + return fmt.Errorf("datastore for scope %q is not initialized ", kvObject.DataScope()) } if err := cs.PutObjectAtomic(kvObject); err != nil { @@ -225,8 +237,7 @@ func (c *controller) updateToStore(kvObject datastore.KVObject) error { func (c *controller) deleteFromStore(kvObject datastore.KVObject) error { cs := c.getStore(kvObject.DataScope()) if cs == nil { - log.Debugf("datastore for scope %s not initialized. kv object %s is not deleted from datastore", kvObject.DataScope(), datastore.Key(kvObject.Key()...)) - return nil + return fmt.Errorf("datastore for scope %q is not initialized ", kvObject.DataScope()) } retry: @@ -407,7 +418,7 @@ func (c *controller) processEndpointDelete(nmap map[string]*netWatch, ep *endpoi // This is the last container going away for the network. Destroy // this network's svc db entry - delete(c.svcDb, ep.getNetwork().ID()) + delete(c.svcRecords, ep.getNetwork().ID()) delete(nmap, ep.getNetwork().ID()) } diff --git a/vendor/src/github.com/docker/libnetwork/types/types.go b/vendor/src/github.com/docker/libnetwork/types/types.go index 28d33cacf5..c5ab053338 100644 --- a/vendor/src/github.com/docker/libnetwork/types/types.go +++ b/vendor/src/github.com/docker/libnetwork/types/types.go @@ -9,6 +9,13 @@ import ( "strings" ) +// constants for the IP address type +const ( + IP = iota // IPv4 and IPv6 + IPv4 + IPv6 +) + // UUID represents a globally unique ID of various resources like network and endpoint type UUID string @@ -323,6 +330,12 @@ func GetMinimalIPNet(nw *net.IPNet) *net.IPNet { return nw } +// IsIPNetValid returns true if the ipnet is a valid network/mask +// combination. Otherwise returns false. +func IsIPNetValid(nw *net.IPNet) bool { + return nw.String() != "0.0.0.0/0" +} + var v4inV6MaskPrefix = []byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff} // compareIPMask checks if the passed ip and mask are semantically compatible. From 6eb2b903a39f66ce88155eb1c062c085e4959e39 Mon Sep 17 00:00:00 2001 From: Alessandro Boch Date: Sun, 8 May 2016 00:33:16 -0700 Subject: [PATCH 3/3] Docker changes for libnetwork vendoring b66c038 Signed-off-by: Alessandro Boch --- daemon/daemon_unix.go | 10 +++++----- daemon/daemon_windows.go | 6 +++--- daemon/network.go | 2 +- integration-cli/docker_cli_run_test.go | 12 ++++++------ 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/daemon/daemon_unix.go b/daemon/daemon_unix.go index b6d840d222..5e342feb4f 100644 --- a/daemon/daemon_unix.go +++ b/daemon/daemon_unix.go @@ -33,8 +33,8 @@ import ( "github.com/docker/libnetwork" nwconfig "github.com/docker/libnetwork/config" "github.com/docker/libnetwork/drivers/bridge" - "github.com/docker/libnetwork/ipamutils" "github.com/docker/libnetwork/netlabel" + "github.com/docker/libnetwork/netutils" "github.com/docker/libnetwork/options" lntypes "github.com/docker/libnetwork/types" "github.com/opencontainers/runc/libcontainer/label" @@ -594,12 +594,12 @@ func (daemon *Daemon) initNetworkController(config *Config) (libnetwork.NetworkC } // Initialize default network on "null" - if _, err := controller.NewNetwork("null", "none", libnetwork.NetworkOptionPersist(false)); err != nil { + if _, err := controller.NewNetwork("null", "none", "", libnetwork.NetworkOptionPersist(false)); err != nil { return nil, fmt.Errorf("Error creating default \"null\" network: %v", err) } // Initialize default network on "host" - if _, err := controller.NewNetwork("host", "host", libnetwork.NetworkOptionPersist(false)); err != nil { + if _, err := controller.NewNetwork("host", "host", "", libnetwork.NetworkOptionPersist(false)); err != nil { return nil, fmt.Errorf("Error creating default \"host\" network: %v", err) } @@ -656,7 +656,7 @@ func initBridgeDriver(controller libnetwork.NetworkController, config *Config) e ipamV4Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)} - nw, nw6List, err := ipamutils.ElectInterfaceAddresses(bridgeName) + nw, nw6List, err := netutils.ElectInterfaceAddresses(bridgeName) if err == nil { ipamV4Conf.PreferredPool = lntypes.GetIPNetCanonical(nw).String() hip, _ := lntypes.GetHostPartIP(nw.IP, nw.Mask) @@ -734,7 +734,7 @@ func initBridgeDriver(controller libnetwork.NetworkController, config *Config) e v6Conf = append(v6Conf, ipamV6Conf) } // Initialize default network on "bridge" with the same name - _, err = controller.NewNetwork("bridge", "bridge", + _, err = controller.NewNetwork("bridge", "bridge", "", libnetwork.NetworkOptionEnableIPv6(config.bridgeConfig.EnableIPv6), libnetwork.NetworkOptionDriverOpts(netOption), libnetwork.NetworkOptionIpam("default", "", v4Conf, v6Conf, nil), diff --git a/daemon/daemon_windows.go b/daemon/daemon_windows.go index 83775c3fd2..5fd5d96537 100644 --- a/daemon/daemon_windows.go +++ b/daemon/daemon_windows.go @@ -221,7 +221,7 @@ func (daemon *Daemon) initNetworkController(config *Config) (libnetwork.NetworkC } } - _, err = controller.NewNetwork("null", "none", libnetwork.NetworkOptionPersist(false)) + _, err = controller.NewNetwork("null", "none", "", libnetwork.NetworkOptionPersist(false)) if err != nil { return nil, err } @@ -266,7 +266,7 @@ func (daemon *Daemon) initNetworkController(config *Config) (libnetwork.NetworkC } v6Conf := []*libnetwork.IpamConf{} - _, err := controller.NewNetwork(strings.ToLower(v.Type), name, + _, err := controller.NewNetwork(strings.ToLower(v.Type), name, "", libnetwork.NetworkOptionGeneric(options.Generic{ netlabel.GenericData: netOption, }), @@ -307,7 +307,7 @@ func initBridgeDriver(controller libnetwork.NetworkController, config *Config) e v4Conf := []*libnetwork.IpamConf{&ipamV4Conf} v6Conf := []*libnetwork.IpamConf{} - _, err := controller.NewNetwork(string(runconfig.DefaultDaemonNetworkMode()), runconfig.DefaultDaemonNetworkMode().NetworkName(), + _, err := controller.NewNetwork(string(runconfig.DefaultDaemonNetworkMode()), runconfig.DefaultDaemonNetworkMode().NetworkName(), "", libnetwork.NetworkOptionGeneric(options.Generic{ netlabel.GenericData: netOption, }), diff --git a/daemon/network.go b/daemon/network.go index d8d54df2e9..91a9c0bea7 100644 --- a/daemon/network.go +++ b/daemon/network.go @@ -134,7 +134,7 @@ func (daemon *Daemon) CreateNetwork(create types.NetworkCreateRequest) (*types.N if create.Internal { nwOptions = append(nwOptions, libnetwork.NetworkOptionInternalNetwork()) } - n, err := c.NewNetwork(driver, create.Name, nwOptions...) + n, err := c.NewNetwork(driver, create.Name, "", nwOptions...) if err != nil { return nil, err } diff --git a/integration-cli/docker_cli_run_test.go b/integration-cli/docker_cli_run_test.go index af6b241fa6..4d41d5cc9c 100644 --- a/integration-cli/docker_cli_run_test.go +++ b/integration-cli/docker_cli_run_test.go @@ -24,8 +24,8 @@ import ( "github.com/docker/docker/pkg/stringutils" "github.com/docker/docker/runconfig" "github.com/docker/go-connections/nat" - "github.com/docker/libnetwork/netutils" "github.com/docker/libnetwork/resolvconf" + "github.com/docker/libnetwork/types" "github.com/go-check/check" libcontainerUser "github.com/opencontainers/runc/libcontainer/user" ) @@ -1308,13 +1308,13 @@ func (s *DockerSuite) TestRunDnsOptionsBasedOnHostResolvConf(c *check.C) { c.Fatalf("/etc/resolv.conf does not exist") } - hostNamservers := resolvconf.GetNameservers(origResolvConf, netutils.IP) + hostNamservers := resolvconf.GetNameservers(origResolvConf, types.IP) hostSearch := resolvconf.GetSearchDomains(origResolvConf) var out string out, _ = dockerCmd(c, "run", "--dns=127.0.0.1", "busybox", "cat", "/etc/resolv.conf") - if actualNameservers := resolvconf.GetNameservers([]byte(out), netutils.IP); string(actualNameservers[0]) != "127.0.0.1" { + if actualNameservers := resolvconf.GetNameservers([]byte(out), types.IP); string(actualNameservers[0]) != "127.0.0.1" { c.Fatalf("expected '127.0.0.1', but says: %q", string(actualNameservers[0])) } @@ -1330,7 +1330,7 @@ func (s *DockerSuite) TestRunDnsOptionsBasedOnHostResolvConf(c *check.C) { out, _ = dockerCmd(c, "run", "--dns-search=mydomain", "busybox", "cat", "/etc/resolv.conf") - actualNameservers := resolvconf.GetNameservers([]byte(out), netutils.IP) + actualNameservers := resolvconf.GetNameservers([]byte(out), types.IP) if len(actualNameservers) != len(hostNamservers) { c.Fatalf("expected %q nameserver(s), but it has: %q", len(hostNamservers), len(actualNameservers)) } @@ -1361,11 +1361,11 @@ func (s *DockerSuite) TestRunDnsOptionsBasedOnHostResolvConf(c *check.C) { c.Fatalf("/etc/resolv.conf does not exist") } - hostNamservers = resolvconf.GetNameservers(resolvConf, netutils.IP) + hostNamservers = resolvconf.GetNameservers(resolvConf, types.IP) hostSearch = resolvconf.GetSearchDomains(resolvConf) out, _ = dockerCmd(c, "run", "busybox", "cat", "/etc/resolv.conf") - if actualNameservers = resolvconf.GetNameservers([]byte(out), netutils.IP); string(actualNameservers[0]) != "12.34.56.78" || len(actualNameservers) != 1 { + if actualNameservers = resolvconf.GetNameservers([]byte(out), types.IP); string(actualNameservers[0]) != "12.34.56.78" || len(actualNameservers) != 1 { c.Fatalf("expected '12.34.56.78', but has: %v", actualNameservers) }