diff --git a/serving/samples/autoscale-go/README.md b/serving/samples/autoscale-go/README.md index af37f8923..e27ea0197 100644 --- a/serving/samples/autoscale-go/README.md +++ b/serving/samples/autoscale-go/README.md @@ -1,10 +1,11 @@ # Autoscale Sample -A demonstration of the autoscaling capabilities of an Knative Serving Revision. +A demonstration of the autoscaling capabilities of a Knative Serving Revision. ## Prerequisites 1. A Kubernetes cluster with [Knative Serving](https://github.com/knative/docs/blob/master/install/README.md) installed. +1. A [metrics installation](https://github.com/knative/docs/blob/master/serving/installing-logging-metrics-traces.md) for viewing scaling graphs (optional). 1. Install [Docker](https://docs.docker.com/get-started/#prepare-your-docker-environment). 1. Check out the code: ``` @@ -51,54 +52,106 @@ Build the application container and publish it to a container registry: 1. Deploy the Knative Serving sample: ``` - kubectl apply -f serving/samples/autoscale-go/sample.yaml + kubectl apply -f serving/samples/autoscale-go/service.yaml ``` 1. Find the ingress hostname and IP and export as an environment variable: ``` - export SERVICE_HOST=`kubectl get route autoscale-route -o jsonpath="{.status.domain}"` - export SERVICE_IP=`kubectl get svc knative-ingressgateway -n istio-system -o jsonpath="{.status.loadBalancer.ingress[*].ip}"` + export IP_ADDRESS=`kubectl get svc knative-ingressgateway -n istio-system -o jsonpath="{.status.loadBalancer.ingress[*].ip}"` ``` ## View the Autoscaling Capabilities -1. Request the largest prime less than 40,000,000 from the autoscale app. Note that it consumes about 1 cpu/sec. +1. Make a request to the autoscale app to see it consume some resources. ``` - time curl --header "Host:$SERVICE_HOST" http://${SERVICE_IP?}/primes/40000000 + curl --header "Host: autoscale-go.default.example.com" "http://${IP_ADDRESS?}?sleep=100&prime=1000000&bloat=50" + ``` + ``` + Allocated 50 Mb of memory. + The largest prime less than 1000000 is 999983. + Slept for 100.13 milliseconds. ``` -1. Ramp up traffic on the autoscale app (about 300 QPS): +1. Ramp up traffic to maintain 10 in-flight requests. + ``` - kubectl delete namespace hey --ignore-not-found && kubectl create namespace hey + go run serving/samples/autoscale-go/test/test.go -sleep 100 -prime 1000000 -bloat 50 -qps 9999 -concurrency 10 ``` ``` - for i in `seq 2 2 60`; do - kubectl -n hey run hey-$i --image josephburnett/hey --restart Never -- \ - -n 999999 -c $i -z 2m -host $SERVICE_HOST \ - "http://${SERVICE_IP?}/primes/40000000" - sleep 1 - done + REQUEST STATS: + Total: 34 Inflight: 10 Done: 34 Success Rate: 100.00% Avg Latency: 0.2584 sec + Total: 69 Inflight: 10 Done: 35 Success Rate: 100.00% Avg Latency: 0.2750 sec + Total: 108 Inflight: 10 Done: 39 Success Rate: 100.00% Avg Latency: 0.2598 sec + Total: 148 Inflight: 10 Done: 40 Success Rate: 100.00% Avg Latency: 0.2565 sec + Total: 185 Inflight: 10 Done: 37 Success Rate: 100.00% Avg Latency: 0.2624 sec + ... ``` + > Note: Use CTRL+C to exit the load test. 1. Watch the Knative Serving deployment pod count increase. ``` kubectl get deploy --watch ``` > Note: Use CTRL+C to exit watch mode. - -1. Watch the pod traffic ramp up. + +## Analysis + +### Algorithm + +Knative Serving autoscaling is based on the average number of in-flight requests per pod (concurrency). The system has a default [target concurency of 1.0](https://github.com/knative/serving/blob/5441a18b360805d261528b2ac8ac13124e826946/config/config-autoscaler.yaml#L27). + +For example, if a Revision is receiving 35 requests per second, each of which takes about about .25 seconds, Knative Serving will determine the Revision needs about 9 pods + +``` +35 * .25 = 8.75 +ceil(8.75) = 9 +``` + +### Dashboards + +View the Knative Serving Scaling and Request dashboards (if configured). + +``` +kubectl port-forward -n monitoring $(kubectl get pods -n monitoring --selector=app=grafana --output=jsonpath="{.items..metadata.name}") 3000 +``` + +![scale dashboard](scale-dashboard.png) + +![request dashboard](request-dashboard.png) + +### Other Experiments + +1. Maintain 100 concurrent requests. ``` - kubectl get pods -n hey --show-all --watch + go run serving/samples/autoscale-go/test/test.go -qps 9999 -concurrency 100 ``` -1. Look at the latency, requests/sec and success rate of each pod. +1. Maintain 100 qps with fast requests. ``` - for i in `seq 2 2 60`; do kubectl -n hey logs hey-$i ; done + go run serving/samples/autoscale-go/test/test.go -qps 100 -concurrency 9999 + ``` + +1. Maintain 100 qps with slow requests. + ``` + go run serving/samples/autoscale-go/test/test.go -qps 100 -concurrency 9999 -sleep 500 + ``` + +1. Heavy CPU usage. + ``` + go run serving/samples/autoscale-go/test/test.go -qps 9999 -concurrency 10 -prime 40000000 + ``` + +1. Heavy memory usage. + ``` + go run serving/samples/autoscale-go/test/test.go -qps 9999 -concurrency 5 -bloat 1000 ``` ## Cleanup ``` -kubectl delete namespace hey kubectl delete -f serving/samples/autoscale-go/sample.yaml ``` + +## Further reading + +1. [Autoscaling Developer Documentation](https://github.com/knative/serving/blob/master/docs/scaling/DEVELOPMENT.md) diff --git a/serving/samples/autoscale-go/autoscale.go b/serving/samples/autoscale-go/autoscale.go index b2934cb41..d241ce2e1 100644 --- a/serving/samples/autoscale-go/autoscale.go +++ b/serving/samples/autoscale-go/autoscale.go @@ -16,16 +16,18 @@ limitations under the License. package main import ( - "encoding/json" + "fmt" "math" "net/http" "strconv" + "sync" + "time" ) // Algorithm from https://stackoverflow.com/a/21854246 // Only primes less than or equal to N will be generated -func primes(N int) []int { +func allPrimes(N int) []int { var x, y, n int nsqrt := math.Sqrt(float64(N)) @@ -71,22 +73,87 @@ func primes(N int) []int { return primes } -const primesPath = "/primes/" +func bloat(mb int) string { + b := make([]byte, mb*1024*1024) + b[0] = 1 + b[len(b)-1] = 1 + return fmt.Sprintf("Allocated %v Mb of memory.\n", mb) +} + +func prime(max int) string { + p := allPrimes(max) + if len(p) > 0 { + return fmt.Sprintf("The largest prime less than %v is %v.\n", max, p[len(p)-1]) + } else { + return fmt.Sprintf("There are no primes smaller than %v.\n", max) + } +} + +func sleep(ms int) string { + start := time.Now().UnixNano() + time.Sleep(time.Duration(ms) * time.Millisecond) + end := time.Now().UnixNano() + return fmt.Sprintf("Slept for %.2f milliseconds.\n", float64(end-start)/1000000) +} + +func parseIntParam(r *http.Request, param string) (int, bool, error) { + if value := r.URL.Query().Get(param); value != "" { + i, err := strconv.Atoi(value) + if err != nil { + return 0, false, err + } + if i == 0 { + return i, false, nil + } + return i, true, nil + } + return 0, false, nil +} func handler(w http.ResponseWriter, r *http.Request) { - w.Header().Set("Content-Type", "application/json") - param := r.URL.Path[len(primesPath):] - n, err := strconv.Atoi(param) + // Validate inputs. + ms, hasMs, err := parseIntParam(r, "sleep") if err != nil { - w.WriteHeader(http.StatusBadRequest) - } else { - w.WriteHeader(http.StatusOK) - p := primes(n) - json.NewEncoder(w).Encode(p[len(p)-1:]) + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + max, hasMax, err := parseIntParam(r, "prime") + if err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + mb, hasMb, err := parseIntParam(r, "bloat") + if err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + // Consume time, cpu and memory in parallel. + var wg sync.WaitGroup + defer wg.Wait() + if hasMs { + wg.Add(1) + go func() { + defer wg.Done() + fmt.Fprint(w, sleep(ms)) + }() + } + if hasMax { + wg.Add(1) + go func() { + defer wg.Done() + fmt.Fprint(w, prime(max)) + }() + } + if hasMb { + wg.Add(1) + go func() { + defer wg.Done() + fmt.Fprint(w, bloat(mb)) + }() } } func main() { - http.HandleFunc(primesPath, handler) + http.HandleFunc("/", handler) http.ListenAndServe(":8080", nil) } diff --git a/serving/samples/autoscale-go/hey/Dockerfile b/serving/samples/autoscale-go/hey/Dockerfile deleted file mode 100644 index e908d16d0..000000000 --- a/serving/samples/autoscale-go/hey/Dockerfile +++ /dev/null @@ -1,5 +0,0 @@ -FROM golang - -RUN go get -u github.com/rakyll/hey - -ENTRYPOINT ["hey"] \ No newline at end of file diff --git a/serving/samples/autoscale-go/request-dashboard.png b/serving/samples/autoscale-go/request-dashboard.png new file mode 100644 index 000000000..40e220980 Binary files /dev/null and b/serving/samples/autoscale-go/request-dashboard.png differ diff --git a/serving/samples/autoscale-go/sample.yaml b/serving/samples/autoscale-go/sample.yaml deleted file mode 100644 index fe2cea885..000000000 --- a/serving/samples/autoscale-go/sample.yaml +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright 2018 The Knative Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -apiVersion: serving.knative.dev/v1alpha1 -kind: Configuration -metadata: - name: autoscale-configuration - namespace: default -spec: - revisionTemplate: - metadata: - labels: - knative.dev/type: app - spec: - container: - # This is the Go import path for the binary to containerize - # and substitute here. - image: github.com/knative/docs/serving/samples/autoscale-go - # When scaling up Knative controller doesn't have a way to - # know if the user container is good for serving. Having a - # readiness probe prevents traffic to be routed to a pod - # before the user container is ready. - readinessProbe: - httpGet: - path: "primes/4" - periodSeconds: 2 ---- -apiVersion: serving.knative.dev/v1alpha1 -kind: Route -metadata: - name: autoscale-route - namespace: default -spec: - traffic: - - configurationName: autoscale-configuration - percent: 100 diff --git a/serving/samples/autoscale-go/scale-dashboard.png b/serving/samples/autoscale-go/scale-dashboard.png new file mode 100644 index 000000000..b249a6a6d Binary files /dev/null and b/serving/samples/autoscale-go/scale-dashboard.png differ diff --git a/serving/samples/autoscale-go/service.yaml b/serving/samples/autoscale-go/service.yaml new file mode 100644 index 000000000..299e73036 --- /dev/null +++ b/serving/samples/autoscale-go/service.yaml @@ -0,0 +1,25 @@ +# Copyright 2018 The Knative Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +apiVersion: serving.knative.dev/v1alpha1 +kind: Service +metadata: + name: autoscale-go + namespace: default +spec: + runLatest: + configuration: + revisionTemplate: + spec: + container: + image: github.com/knative/docs/serving/samples/autoscale-go diff --git a/serving/samples/autoscale-go/test/test.go b/serving/samples/autoscale-go/test/test.go new file mode 100644 index 000000000..703aa73ff --- /dev/null +++ b/serving/samples/autoscale-go/test/test.go @@ -0,0 +1,134 @@ +package main + +import ( + "flag" + "fmt" + "net/http" + "os" + "sync/atomic" + "time" +) + +var ( + sleep = flag.Int("sleep", 0, "milliseconds to sleep") + prime = flag.Int("prime", 0, "calculate largest prime less than") + bloat = flag.Int("bloat", 0, "mb of memory to consume") + ip = flag.String("ip", "", "ip address of knative ingress") + port = flag.String("port", "80", "port of call") + host = flag.String("host", "autoscale-go.default.example.com", "host name of revision under test") + qps = flag.Int("qps", 10, "max requests per second") + concurrency = flag.Int("concurrency", 10, "max in-flight requests") + duration = flag.Duration("duration", time.Minute, "duration of the test") + verbose = flag.Bool("verbose", false, "verbose output for debugging") +) + +type result struct { + success bool + statusCode int + latency int64 +} + +func get(url string, client *http.Client, report chan *result) { + start := time.Now() + result := &result{} + defer func() { + end := time.Now() + result.latency = end.UnixNano() - start.UnixNano() + report <- result + }() + + req, err := http.NewRequest("GET", url, nil) + if err != nil { + if *verbose { + fmt.Printf("%v\n", err) + } + return + } + req.Host = *host + res, err := client.Do(req) + if err != nil { + if *verbose { + fmt.Printf("%v\n", err) + } + return + } + result.statusCode = res.StatusCode + if result.statusCode != http.StatusOK { + if *verbose { + fmt.Printf("%+v\n", res) + } + return + } + result.success = true +} + +func reporter(stopCh <-chan time.Time, report chan *result, inflight *int64) { + tickerCh := time.NewTicker(time.Second).C + var ( + total int64 + count int64 + nanoseconds int64 + successful int64 + ) + fmt.Println("REQUEST STATS:") + for { + select { + case <-stopCh: + return + case <-tickerCh: + fmt.Printf("Total: %v\tInflight: %v\tDone: %v ", total, atomic.LoadInt64(inflight), count) + if count > 0 { + fmt.Printf("\tSuccess Rate: %.2f%%\tAvg Latency: %.4f sec\n", float64(successful)/float64(count)*100, float64(nanoseconds)/float64(count)/(1000000000)) + } else { + fmt.Printf("\n") + } + count = 0 + nanoseconds = 0 + successful = 0 + case r := <-report: + total++ + count++ + nanoseconds += r.latency + if r.success { + successful++ + } + } + } +} + +func main() { + flag.Parse() + if *ip == "" { + ipAddress := os.Getenv("IP_ADDRESS") + ip = &ipAddress + } + if *ip == "" { + panic("need either $IP_ADDRESS env var or --ip flag") + } + url := fmt.Sprintf( + "http://%v:%v?sleep=%v&prime=%v&bloat=%v", + *ip, *port, *sleep, *prime, *bloat) + client := &http.Client{} + + stopCh := time.After(*duration) + report := make(chan *result, 10000) + var inflight int64 + + go reporter(stopCh, report, &inflight) + + qpsCh := time.NewTicker(time.Duration(time.Second.Nanoseconds() / int64(*qps))).C + for { + select { + case <-stopCh: + return + case <-qpsCh: + if atomic.LoadInt64(&inflight) < int64(*concurrency) { + atomic.AddInt64(&inflight, 1) + go func() { + get(url, client, report) + atomic.AddInt64(&inflight, -1) + }() + } + } + } +}