diff --git a/serving/samples/autoscale-go/README.md b/serving/samples/autoscale-go/README.md
index af37f8923..e27ea0197 100644
--- a/serving/samples/autoscale-go/README.md
+++ b/serving/samples/autoscale-go/README.md
@@ -1,10 +1,11 @@
 # Autoscale Sample
 
-A demonstration of the autoscaling capabilities of an Knative Serving Revision.
+A demonstration of the autoscaling capabilities of a Knative Serving Revision.
 
 ## Prerequisites
 
 1. A Kubernetes cluster with [Knative Serving](https://github.com/knative/docs/blob/master/install/README.md) installed.
+1. A [metrics installation](https://github.com/knative/docs/blob/master/serving/installing-logging-metrics-traces.md) for viewing scaling graphs (optional).
 1. Install [Docker](https://docs.docker.com/get-started/#prepare-your-docker-environment).
 1. Check out the code:
 ```
@@ -51,54 +52,106 @@ Build the application container and publish it to a container registry:
 
 1. Deploy the Knative Serving sample:
    ```
-   kubectl apply -f serving/samples/autoscale-go/sample.yaml
+   kubectl apply -f serving/samples/autoscale-go/service.yaml
    ```
 
 1. Find the ingress hostname and IP and export as an environment variable:
    ```
-   export SERVICE_HOST=`kubectl get route autoscale-route -o jsonpath="{.status.domain}"`
-   export SERVICE_IP=`kubectl get svc knative-ingressgateway -n istio-system -o jsonpath="{.status.loadBalancer.ingress[*].ip}"`
+   export IP_ADDRESS=`kubectl get svc knative-ingressgateway -n istio-system -o jsonpath="{.status.loadBalancer.ingress[*].ip}"`
    ```
 
 ## View the Autoscaling Capabilities
 
-1. Request the largest prime less than 40,000,000 from the autoscale app.  Note that it consumes about 1 cpu/sec.
+1. Make a request to the autoscale app to see it consume some resources.
    ```
-   time curl --header "Host:$SERVICE_HOST" http://${SERVICE_IP?}/primes/40000000
+   curl --header "Host: autoscale-go.default.example.com" "http://${IP_ADDRESS?}?sleep=100&prime=1000000&bloat=50"
+   ```
+   ```
+   Allocated 50 Mb of memory.
+   The largest prime less than 1000000 is 999983.
+   Slept for 100.13 milliseconds.
    ```
 
-1. Ramp up traffic on the autoscale app (about 300 QPS):
+1. Ramp up traffic to maintain 10 in-flight requests.
+
    ```
-   kubectl delete namespace hey --ignore-not-found && kubectl create namespace hey
+   go run serving/samples/autoscale-go/test/test.go -sleep 100 -prime 1000000 -bloat 50 -qps 9999 -concurrency 10
    ```
    ```
-   for i in `seq 2 2 60`; do
-       kubectl -n hey run hey-$i --image josephburnett/hey --restart Never -- \
-         -n 999999 -c $i -z 2m -host $SERVICE_HOST \
-         "http://${SERVICE_IP?}/primes/40000000"
-       sleep 1
-   done
+   REQUEST STATS:
+   Total: 34       Inflight: 10    Done: 34        Success Rate: 100.00%   Avg Latency: 0.2584 sec
+   Total: 69       Inflight: 10    Done: 35        Success Rate: 100.00%   Avg Latency: 0.2750 sec
+   Total: 108      Inflight: 10    Done: 39        Success Rate: 100.00%   Avg Latency: 0.2598 sec
+   Total: 148      Inflight: 10    Done: 40        Success Rate: 100.00%   Avg Latency: 0.2565 sec
+   Total: 185      Inflight: 10    Done: 37        Success Rate: 100.00%   Avg Latency: 0.2624 sec
+   ...
    ```
+   > Note: Use CTRL+C to exit the load test.
 
 1. Watch the Knative Serving deployment pod count increase.
    ```
    kubectl get deploy --watch
    ```
    > Note: Use CTRL+C to exit watch mode.
-   
-1. Watch the pod traffic ramp up.
+
+## Analysis
+
+### Algorithm
+
+Knative Serving autoscaling is based on the average number of in-flight requests per pod (concurrency). The system has a default [target concurency of 1.0](https://github.com/knative/serving/blob/5441a18b360805d261528b2ac8ac13124e826946/config/config-autoscaler.yaml#L27).
+
+For example, if a Revision is receiving 35 requests per second, each of which takes about about .25 seconds, Knative Serving will determine the Revision needs about 9 pods
+
+```
+35 * .25 = 8.75
+ceil(8.75) = 9
+```
+
+### Dashboards
+
+View the Knative Serving Scaling and Request dashboards (if configured).
+
+```
+kubectl port-forward -n monitoring $(kubectl get pods -n monitoring --selector=app=grafana --output=jsonpath="{.items..metadata.name}") 3000
+```
+
+![scale dashboard](scale-dashboard.png)
+
+![request dashboard](request-dashboard.png)
+
+### Other Experiments
+
+1. Maintain 100 concurrent requests.
    ```
-   kubectl get pods -n hey --show-all --watch
+   go run serving/samples/autoscale-go/test/test.go -qps 9999 -concurrency 100
    ```
 
-1. Look at the latency, requests/sec and success rate of each pod.
+1. Maintain 100 qps with fast requests.
    ```
-   for i in `seq 2 2 60`; do kubectl -n hey logs hey-$i ; done
+   go run serving/samples/autoscale-go/test/test.go -qps 100 -concurrency 9999
+   ```
+
+1. Maintain 100 qps with slow requests.
+   ```
+   go run serving/samples/autoscale-go/test/test.go -qps 100 -concurrency 9999 -sleep 500
+   ```
+
+1. Heavy CPU usage.
+   ```
+   go run serving/samples/autoscale-go/test/test.go -qps 9999 -concurrency 10 -prime 40000000
+   ```
+
+1. Heavy memory usage.
+   ```
+   go run serving/samples/autoscale-go/test/test.go -qps 9999 -concurrency 5 -bloat 1000
    ```
 
 ## Cleanup
 
 ```
-kubectl delete namespace hey
 kubectl delete -f serving/samples/autoscale-go/sample.yaml
 ```
+
+## Further reading
+
+1. [Autoscaling Developer Documentation](https://github.com/knative/serving/blob/master/docs/scaling/DEVELOPMENT.md)
diff --git a/serving/samples/autoscale-go/autoscale.go b/serving/samples/autoscale-go/autoscale.go
index b2934cb41..d241ce2e1 100644
--- a/serving/samples/autoscale-go/autoscale.go
+++ b/serving/samples/autoscale-go/autoscale.go
@@ -16,16 +16,18 @@ limitations under the License.
 package main
 
 import (
-	"encoding/json"
+	"fmt"
 	"math"
 	"net/http"
 	"strconv"
+	"sync"
+	"time"
 )
 
 // Algorithm from https://stackoverflow.com/a/21854246
 
 // Only primes less than or equal to N will be generated
-func primes(N int) []int {
+func allPrimes(N int) []int {
 
 	var x, y, n int
 	nsqrt := math.Sqrt(float64(N))
@@ -71,22 +73,87 @@ func primes(N int) []int {
 	return primes
 }
 
-const primesPath = "/primes/"
+func bloat(mb int) string {
+	b := make([]byte, mb*1024*1024)
+	b[0] = 1
+	b[len(b)-1] = 1
+	return fmt.Sprintf("Allocated %v Mb of memory.\n", mb)
+}
+
+func prime(max int) string {
+	p := allPrimes(max)
+	if len(p) > 0 {
+		return fmt.Sprintf("The largest prime less than %v is %v.\n", max, p[len(p)-1])
+	} else {
+		return fmt.Sprintf("There are no primes smaller than %v.\n", max)
+	}
+}
+
+func sleep(ms int) string {
+	start := time.Now().UnixNano()
+	time.Sleep(time.Duration(ms) * time.Millisecond)
+	end := time.Now().UnixNano()
+	return fmt.Sprintf("Slept for %.2f milliseconds.\n", float64(end-start)/1000000)
+}
+
+func parseIntParam(r *http.Request, param string) (int, bool, error) {
+	if value := r.URL.Query().Get(param); value != "" {
+		i, err := strconv.Atoi(value)
+		if err != nil {
+			return 0, false, err
+		}
+		if i == 0 {
+			return i, false, nil
+		}
+		return i, true, nil
+	}
+	return 0, false, nil
+}
 
 func handler(w http.ResponseWriter, r *http.Request) {
-	w.Header().Set("Content-Type", "application/json")
-	param := r.URL.Path[len(primesPath):]
-	n, err := strconv.Atoi(param)
+	// Validate inputs.
+	ms, hasMs, err := parseIntParam(r, "sleep")
 	if err != nil {
-		w.WriteHeader(http.StatusBadRequest)
-	} else {
-		w.WriteHeader(http.StatusOK)
-		p := primes(n)
-		json.NewEncoder(w).Encode(p[len(p)-1:])
+		http.Error(w, err.Error(), http.StatusBadRequest)
+		return
+	}
+	max, hasMax, err := parseIntParam(r, "prime")
+	if err != nil {
+		http.Error(w, err.Error(), http.StatusBadRequest)
+		return
+	}
+	mb, hasMb, err := parseIntParam(r, "bloat")
+	if err != nil {
+		http.Error(w, err.Error(), http.StatusBadRequest)
+		return
+	}
+	// Consume time, cpu and memory in parallel.
+	var wg sync.WaitGroup
+	defer wg.Wait()
+	if hasMs {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			fmt.Fprint(w, sleep(ms))
+		}()
+	}
+	if hasMax {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			fmt.Fprint(w, prime(max))
+		}()
+	}
+	if hasMb {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			fmt.Fprint(w, bloat(mb))
+		}()
 	}
 }
 
 func main() {
-	http.HandleFunc(primesPath, handler)
+	http.HandleFunc("/", handler)
 	http.ListenAndServe(":8080", nil)
 }
diff --git a/serving/samples/autoscale-go/hey/Dockerfile b/serving/samples/autoscale-go/hey/Dockerfile
deleted file mode 100644
index e908d16d0..000000000
--- a/serving/samples/autoscale-go/hey/Dockerfile
+++ /dev/null
@@ -1,5 +0,0 @@
-FROM golang
-
-RUN go get -u github.com/rakyll/hey
-
-ENTRYPOINT ["hey"]
\ No newline at end of file
diff --git a/serving/samples/autoscale-go/request-dashboard.png b/serving/samples/autoscale-go/request-dashboard.png
new file mode 100644
index 000000000..40e220980
Binary files /dev/null and b/serving/samples/autoscale-go/request-dashboard.png differ
diff --git a/serving/samples/autoscale-go/sample.yaml b/serving/samples/autoscale-go/sample.yaml
deleted file mode 100644
index fe2cea885..000000000
--- a/serving/samples/autoscale-go/sample.yaml
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright 2018 The Knative Authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-apiVersion: serving.knative.dev/v1alpha1
-kind: Configuration
-metadata:
-  name: autoscale-configuration
-  namespace: default
-spec:
-  revisionTemplate:
-    metadata:
-      labels:
-        knative.dev/type: app
-    spec:
-      container:
-        # This is the Go import path for the binary to containerize
-        # and substitute here.
-        image: github.com/knative/docs/serving/samples/autoscale-go
-        # When scaling up Knative controller doesn't have a way to
-        # know if the user container is good for serving.  Having a
-        # readiness probe prevents traffic to be routed to a pod
-        # before the user container is ready.
-        readinessProbe:
-          httpGet:
-            path: "primes/4"
-          periodSeconds: 2
----
-apiVersion: serving.knative.dev/v1alpha1
-kind: Route
-metadata:
-  name: autoscale-route
-  namespace: default
-spec:
-  traffic:
-  - configurationName: autoscale-configuration
-    percent: 100
diff --git a/serving/samples/autoscale-go/scale-dashboard.png b/serving/samples/autoscale-go/scale-dashboard.png
new file mode 100644
index 000000000..b249a6a6d
Binary files /dev/null and b/serving/samples/autoscale-go/scale-dashboard.png differ
diff --git a/serving/samples/autoscale-go/service.yaml b/serving/samples/autoscale-go/service.yaml
new file mode 100644
index 000000000..299e73036
--- /dev/null
+++ b/serving/samples/autoscale-go/service.yaml
@@ -0,0 +1,25 @@
+# Copyright 2018 The Knative Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: serving.knative.dev/v1alpha1
+kind: Service
+metadata:
+  name: autoscale-go
+  namespace: default
+spec:
+  runLatest:
+    configuration:
+      revisionTemplate:
+        spec:
+          container:
+            image: github.com/knative/docs/serving/samples/autoscale-go
diff --git a/serving/samples/autoscale-go/test/test.go b/serving/samples/autoscale-go/test/test.go
new file mode 100644
index 000000000..703aa73ff
--- /dev/null
+++ b/serving/samples/autoscale-go/test/test.go
@@ -0,0 +1,134 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+	"net/http"
+	"os"
+	"sync/atomic"
+	"time"
+)
+
+var (
+	sleep       = flag.Int("sleep", 0, "milliseconds to sleep")
+	prime       = flag.Int("prime", 0, "calculate largest prime less than")
+	bloat       = flag.Int("bloat", 0, "mb of memory to consume")
+	ip          = flag.String("ip", "", "ip address of knative ingress")
+	port        = flag.String("port", "80", "port of call")
+	host        = flag.String("host", "autoscale-go.default.example.com", "host name of revision under test")
+	qps         = flag.Int("qps", 10, "max requests per second")
+	concurrency = flag.Int("concurrency", 10, "max in-flight requests")
+	duration    = flag.Duration("duration", time.Minute, "duration of the test")
+	verbose     = flag.Bool("verbose", false, "verbose output for debugging")
+)
+
+type result struct {
+	success    bool
+	statusCode int
+	latency    int64
+}
+
+func get(url string, client *http.Client, report chan *result) {
+	start := time.Now()
+	result := &result{}
+	defer func() {
+		end := time.Now()
+		result.latency = end.UnixNano() - start.UnixNano()
+		report <- result
+	}()
+
+	req, err := http.NewRequest("GET", url, nil)
+	if err != nil {
+		if *verbose {
+			fmt.Printf("%v\n", err)
+		}
+		return
+	}
+	req.Host = *host
+	res, err := client.Do(req)
+	if err != nil {
+		if *verbose {
+			fmt.Printf("%v\n", err)
+		}
+		return
+	}
+	result.statusCode = res.StatusCode
+	if result.statusCode != http.StatusOK {
+		if *verbose {
+			fmt.Printf("%+v\n", res)
+		}
+		return
+	}
+	result.success = true
+}
+
+func reporter(stopCh <-chan time.Time, report chan *result, inflight *int64) {
+	tickerCh := time.NewTicker(time.Second).C
+	var (
+		total       int64
+		count       int64
+		nanoseconds int64
+		successful  int64
+	)
+	fmt.Println("REQUEST STATS:")
+	for {
+		select {
+		case <-stopCh:
+			return
+		case <-tickerCh:
+			fmt.Printf("Total: %v\tInflight: %v\tDone: %v ", total, atomic.LoadInt64(inflight), count)
+			if count > 0 {
+				fmt.Printf("\tSuccess Rate: %.2f%%\tAvg Latency: %.4f sec\n", float64(successful)/float64(count)*100, float64(nanoseconds)/float64(count)/(1000000000))
+			} else {
+				fmt.Printf("\n")
+			}
+			count = 0
+			nanoseconds = 0
+			successful = 0
+		case r := <-report:
+			total++
+			count++
+			nanoseconds += r.latency
+			if r.success {
+				successful++
+			}
+		}
+	}
+}
+
+func main() {
+	flag.Parse()
+	if *ip == "" {
+		ipAddress := os.Getenv("IP_ADDRESS")
+		ip = &ipAddress
+	}
+	if *ip == "" {
+		panic("need either $IP_ADDRESS env var or --ip flag")
+	}
+	url := fmt.Sprintf(
+		"http://%v:%v?sleep=%v&prime=%v&bloat=%v",
+		*ip, *port, *sleep, *prime, *bloat)
+	client := &http.Client{}
+
+	stopCh := time.After(*duration)
+	report := make(chan *result, 10000)
+	var inflight int64
+
+	go reporter(stopCh, report, &inflight)
+
+	qpsCh := time.NewTicker(time.Duration(time.Second.Nanoseconds() / int64(*qps))).C
+	for {
+		select {
+		case <-stopCh:
+			return
+		case <-qpsCh:
+			if atomic.LoadInt64(&inflight) < int64(*concurrency) {
+				atomic.AddInt64(&inflight, 1)
+				go func() {
+					get(url, client, report)
+					atomic.AddInt64(&inflight, -1)
+				}()
+			}
+		}
+	}
+}