vendor: update import paths to "gonum.org/v1"

Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-01-09 12:37:46 -08:00 · 2018-01-09 12:37:46 -08:00 · e7294311a0
parent f4955798bc
commit e7294311a0
302 changed files with 10101 additions and 4365 deletions
--- a/Gopkg.lock
+++ b/Gopkg.lock
@ -99,70 +99,6 @@
  revision = "1e59b77b52bf8e4b449a57e6f79f21226d571845"
  source = "https://github.com/golang/protobuf"

-[[projects]]
-  branch = "master"
-  name = "github.com/gonum/blas"
-  packages = [
-    ".",
-    "blas64",
-    "native",
-    "native/internal/math32"
-  ]
-  revision = "37e82626499e1df7c54aeaba0959fd6e7e8dc1e4"
-
-[[projects]]
-  branch = "master"
-  name = "github.com/gonum/floats"
-  packages = ["."]
-  revision = "f74b330d45c56584a6ea7a27f5c64ea2900631e9"
-
-[[projects]]
-  branch = "master"
-  name = "github.com/gonum/internal"
-  packages = [
-    "asm/f32",
-    "asm/f64"
-  ]
-  revision = "e57e4534cf9b3b00ef6c0175f59d8d2d34f60914"
-
-[[projects]]
-  branch = "master"
-  name = "github.com/gonum/lapack"
-  packages = [
-    ".",
-    "lapack64",
-    "native"
-  ]
-  revision = "5ed4b826becd1807e09377508f51756586d1a98c"
-
-[[projects]]
-  branch = "master"
-  name = "github.com/gonum/matrix"
-  packages = [
-    ".",
-    "mat64"
-  ]
-  revision = "dd6034299e4242c9f0ea36735e6d4264dfcb3f9f"
-
-[[projects]]
-  name = "github.com/gonum/plot"
-  packages = [
-    ".",
-    "palette",
-    "plotter",
-    "plotutil",
-    "tools/bezier",
-    "vg",
-    "vg/draw",
-    "vg/fonts",
-    "vg/vgeps",
-    "vg/vgimg",
-    "vg/vgpdf",
-    "vg/vgsvg"
-  ]
-  revision = "51b62dc5319d7fce41240d13e780a93e640b9a38"
-  source = "https://github.com/gonum/plot"
-
 [[projects]]
  name = "github.com/googleapis/gax-go"
  packages = ["."]
@ -344,6 +280,44 @@
  revision = "6dc17368e09b0e8634d71cac8168d853e869a0c7"
  source = "https://github.com/golang/time"

+[[projects]]
+  branch = "master"
+  name = "gonum.org/v1/gonum"
+  packages = [
+    "blas",
+    "blas/blas64",
+    "blas/gonum",
+    "floats",
+    "internal/asm/c128",
+    "internal/asm/f32",
+    "internal/asm/f64",
+    "internal/math32",
+    "lapack",
+    "lapack/gonum",
+    "lapack/lapack64",
+    "mat"
+  ]
+  revision = "69fc04c7c31754cf196d3bcc70c3bc0eec9da1b7"
+
+[[projects]]
+  name = "gonum.org/v1/plot"
+  packages = [
+    ".",
+    "palette",
+    "plotter",
+    "plotutil",
+    "tools/bezier",
+    "vg",
+    "vg/draw",
+    "vg/fonts",
+    "vg/vgeps",
+    "vg/vgimg",
+    "vg/vgpdf",
+    "vg/vgsvg"
+  ]
+  revision = "feab214a240f4312b98ab52baf662b55ff1ee377"
+  source = "https://github.com/gonum/plot"
+
 [[projects]]
  name = "google.golang.org/api"
  packages = [
@ -421,6 +395,6 @@
 [solve-meta]
  analyzer-name = "dep"
  analyzer-version = 1
-  inputs-digest = "057a961c21585211116e57f99b60e5bdd28cfe46411715ec4e13dbcf12ddd876"
+  inputs-digest = "21abf25cf507cffab0266c5836fb8acc3cb14f96b0f3fba49e0c55b85fedf5e5"
  solver-name = "gps-cdcl"
  solver-version = 1
--- a/Gopkg.toml
+++ b/Gopkg.toml
@ -88,9 +88,9 @@


 [[constraint]]
-  name = "github.com/gonum/plot"
+  name = "gonum.org/v1/plot"
  source = "https://github.com/gonum/plot"
-  revision = "51b62dc5319d7fce41240d13e780a93e640b9a38"
+  revision = "feab214a240f4312b98ab52baf662b55ff1ee377"


 [[constraint]]
--- a/analyze/05_plot.go
+++ b/analyze/05_plot.go
@ -19,11 +19,11 @@ import (

 	"github.com/coreos/dbtester/dbtesterpb"

-	"github.com/gonum/plot"
-	"github.com/gonum/plot/plotter"
-	"github.com/gonum/plot/plotutil"
-	"github.com/gonum/plot/vg"
 	"github.com/gyuho/dataframe"
+	"gonum.org/v1/plot"
+	"gonum.org/v1/plot/plotter"
+	"gonum.org/v1/plot/plotutil"
+	"gonum.org/v1/plot/vg"
 )

 var (
--- a/dbtesterpb/util.go
+++ b/dbtesterpb/util.go
@ -18,7 +18,7 @@ import (
 	"image/color"
 	"sort"

-	"github.com/gonum/plot/plotutil"
+	"gonum.org/v1/plot/plotutil"
 )

 // IsValidDatabaseID returns false if the database id is not supported.
--- a/vendor/github.com/gonum/blas/native/general_double.go
+++ b/vendor/github.com/gonum/blas/native/general_double.go
@ -1,155 +0,0 @@
-// Copyright ©2014 The gonum Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package native
-
-import (
-	"errors"
-	"fmt"
-	"math"
-)
-
-func newGeneral64(r, c int) general64 {
-	return general64{
-		data:   make([]float64, r*c),
-		rows:   r,
-		cols:   c,
-		stride: c,
-	}
-}
-
-type general64 struct {
-	data       []float64
-	rows, cols int
-	stride     int
-}
-
-// adds element-wise into receiver. rows and columns must match
-func (g general64) add(h general64) {
-	if debug {
-		if g.rows != h.rows {
-			panic("blas: row size mismatch")
-		}
-		if g.cols != h.cols {
-			panic("blas: col size mismatch")
-		}
-	}
-	for i := 0; i < g.rows; i++ {
-		gtmp := g.data[i*g.stride : i*g.stride+g.cols]
-		for j, v := range h.data[i*h.stride : i*h.stride+h.cols] {
-			gtmp[j] += v
-		}
-	}
-}
-
-// at returns the value at the ith row and jth column. For speed reasons, the
-// rows and columns are not bounds checked.
-func (g general64) at(i, j int) float64 {
-	if debug {
-		if i < 0 || i >= g.rows {
-			panic("blas: row out of bounds")
-		}
-		if j < 0 || j >= g.cols {
-			panic("blas: col out of bounds")
-		}
-	}
-	return g.data[i*g.stride+j]
-}
-
-func (g general64) check(c byte) error {
-	if g.rows < 0 {
-		return errors.New("blas: rows < 0")
-	}
-	if g.cols < 0 {
-		return errors.New("blas: cols < 0")
-	}
-	if g.stride < 1 {
-		return errors.New("blas: stride < 1")
-	}
-	if g.stride < g.cols {
-		return errors.New("blas: illegal stride")
-	}
-	if (g.rows-1)*g.stride+g.cols > len(g.data) {
-		return fmt.Errorf("blas: index of %c out of range", c)
-	}
-	return nil
-}
-
-func (g general64) clone() general64 {
-	data := make([]float64, len(g.data))
-	copy(data, g.data)
-	return general64{
-		data:   data,
-		rows:   g.rows,
-		cols:   g.cols,
-		stride: g.stride,
-	}
-}
-
-// assumes they are the same size
-func (g general64) copy(h general64) {
-	if debug {
-		if g.rows != h.rows {
-			panic("blas: row mismatch")
-		}
-		if g.cols != h.cols {
-			panic("blas: col mismatch")
-		}
-	}
-	for k := 0; k < g.rows; k++ {
-		copy(g.data[k*g.stride:(k+1)*g.stride], h.data[k*h.stride:(k+1)*h.stride])
-	}
-}
-
-func (g general64) equal(a general64) bool {
-	if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
-		return false
-	}
-	for i, v := range g.data {
-		if a.data[i] != v {
-			return false
-		}
-	}
-	return true
-}
-
-/*
-// print is to aid debugging. Commented out to avoid fmt import
-func (g general64) print() {
-	fmt.Println("r = ", g.rows, "c = ", g.cols, "stride: ", g.stride)
-	for i := 0; i < g.rows; i++ {
-		fmt.Println(g.data[i*g.stride : (i+1)*g.stride])
-	}
-
-}
-*/
-
-func (g general64) view(i, j, r, c int) general64 {
-	if debug {
-		if i < 0 || i+r > g.rows {
-			panic("blas: row out of bounds")
-		}
-		if j < 0 || j+c > g.cols {
-			panic("blas: col out of bounds")
-		}
-	}
-	return general64{
-		data:   g.data[i*g.stride+j : (i+r-1)*g.stride+j+c],
-		rows:   r,
-		cols:   c,
-		stride: g.stride,
-	}
-}
-
-func (g general64) equalWithinAbs(a general64, tol float64) bool {
-	if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
-		return false
-	}
-	for i, v := range g.data {
-		if math.Abs(a.data[i]-v) > tol {
-			return false
-		}
-	}
-	return true
-}
--- a/vendor/github.com/gonum/blas/native/general_single.go
+++ b/vendor/github.com/gonum/blas/native/general_single.go
@ -1,157 +0,0 @@
-// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
-
-// Copyright ©2014 The gonum Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package native
-
-import (
-	"errors"
-	"fmt"
-	math "github.com/gonum/blas/native/internal/math32"
-)
-
-func newGeneral32(r, c int) general32 {
-	return general32{
-		data:   make([]float32, r*c),
-		rows:   r,
-		cols:   c,
-		stride: c,
-	}
-}
-
-type general32 struct {
-	data       []float32
-	rows, cols int
-	stride     int
-}
-
-// adds element-wise into receiver. rows and columns must match
-func (g general32) add(h general32) {
-	if debug {
-		if g.rows != h.rows {
-			panic("blas: row size mismatch")
-		}
-		if g.cols != h.cols {
-			panic("blas: col size mismatch")
-		}
-	}
-	for i := 0; i < g.rows; i++ {
-		gtmp := g.data[i*g.stride : i*g.stride+g.cols]
-		for j, v := range h.data[i*h.stride : i*h.stride+h.cols] {
-			gtmp[j] += v
-		}
-	}
-}
-
-// at returns the value at the ith row and jth column. For speed reasons, the
-// rows and columns are not bounds checked.
-func (g general32) at(i, j int) float32 {
-	if debug {
-		if i < 0 || i >= g.rows {
-			panic("blas: row out of bounds")
-		}
-		if j < 0 || j >= g.cols {
-			panic("blas: col out of bounds")
-		}
-	}
-	return g.data[i*g.stride+j]
-}
-
-func (g general32) check(c byte) error {
-	if g.rows < 0 {
-		return errors.New("blas: rows < 0")
-	}
-	if g.cols < 0 {
-		return errors.New("blas: cols < 0")
-	}
-	if g.stride < 1 {
-		return errors.New("blas: stride < 1")
-	}
-	if g.stride < g.cols {
-		return errors.New("blas: illegal stride")
-	}
-	if (g.rows-1)*g.stride+g.cols > len(g.data) {
-		return fmt.Errorf("blas: index of %c out of range", c)
-	}
-	return nil
-}
-
-func (g general32) clone() general32 {
-	data := make([]float32, len(g.data))
-	copy(data, g.data)
-	return general32{
-		data:   data,
-		rows:   g.rows,
-		cols:   g.cols,
-		stride: g.stride,
-	}
-}
-
-// assumes they are the same size
-func (g general32) copy(h general32) {
-	if debug {
-		if g.rows != h.rows {
-			panic("blas: row mismatch")
-		}
-		if g.cols != h.cols {
-			panic("blas: col mismatch")
-		}
-	}
-	for k := 0; k < g.rows; k++ {
-		copy(g.data[k*g.stride:(k+1)*g.stride], h.data[k*h.stride:(k+1)*h.stride])
-	}
-}
-
-func (g general32) equal(a general32) bool {
-	if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
-		return false
-	}
-	for i, v := range g.data {
-		if a.data[i] != v {
-			return false
-		}
-	}
-	return true
-}
-
-/*
-// print is to aid debugging. Commented out to avoid fmt import
-func (g general32) print() {
-	fmt.Println("r = ", g.rows, "c = ", g.cols, "stride: ", g.stride)
-	for i := 0; i < g.rows; i++ {
-		fmt.Println(g.data[i*g.stride : (i+1)*g.stride])
-	}
-
-}
-*/
-
-func (g general32) view(i, j, r, c int) general32 {
-	if debug {
-		if i < 0 || i+r > g.rows {
-			panic("blas: row out of bounds")
-		}
-		if j < 0 || j+c > g.cols {
-			panic("blas: col out of bounds")
-		}
-	}
-	return general32{
-		data:   g.data[i*g.stride+j : (i+r-1)*g.stride+j+c],
-		rows:   r,
-		cols:   c,
-		stride: g.stride,
-	}
-}
-
-func (g general32) equalWithinAbs(a general32, tol float32) bool {
-	if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
-		return false
-	}
-	for i, v := range g.data {
-		if math.Abs(a.data[i]-v) > tol {
-			return false
-		}
-	}
-	return true
-}
--- a/vendor/github.com/gonum/internal/asm/f32/ddot.go
+++ b/vendor/github.com/gonum/internal/asm/f32/ddot.go
@ -1,33 +0,0 @@
-// Copyright ©2015 The gonum Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package f32
-
-// DdotUnitary is
-//   for i, v := range x {
-//   	sum += float64(y[i]) * float64(v)
-//   }
-//   return
-func DdotUnitary(x, y []float32) (sum float64) {
-	for i, v := range x {
-		sum += float64(y[i]) * float64(v)
-	}
-	return
-}
-
-// DdotInc is
-//  for i := 0; i < int(n); i++ {
-//  	sum += float64(y[iy]) * float64(x[ix])
-//  	ix += incX
-//  	iy += incY
-//  }
-//  return
-func DdotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float64) {
-	for i := 0; i < int(n); i++ {
-		sum += float64(y[iy]) * float64(x[ix])
-		ix += incX
-		iy += incY
-	}
-	return
-}
--- a/vendor/github.com/gonum/internal/asm/f32/dot.go
+++ b/vendor/github.com/gonum/internal/asm/f32/dot.go
@ -1,33 +0,0 @@
-// Copyright ©2015 The gonum Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package f32
-
-// DotUnitary is
-//  for i, v := range x {
-//  	sum += y[i] * v
-//  }
-//  return sum
-func DotUnitary(x, y []float32) (sum float32) {
-	for i, v := range x {
-		sum += y[i] * v
-	}
-	return sum
-}
-
-// DotInc is
-//  for i := 0; i < int(n); i++ {
-//  	sum += y[iy] * x[ix]
-//  	ix += incX
-//  	iy += incY
-//  }
-//  return sum
-func DotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float32) {
-	for i := 0; i < int(n); i++ {
-		sum += y[iy] * x[ix]
-		ix += incX
-		iy += incY
-	}
-	return sum
-}
--- a/vendor/github.com/gonum/matrix/doc.go
+++ b/vendor/github.com/gonum/matrix/doc.go
@ -1,103 +0,0 @@
-// Generated by running
-//  go generate github.com/gonum/matrix
-// DO NOT EDIT.
-
-// Copyright ©2015 The gonum Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package matrix provides common error handling mechanisms for matrix operations
-// in mat64 and cmat128.
-//
-// Overview
-//
-// This section provides a quick overview of the matrix package. The following
-// sections provide more in depth commentary.
-//
-// matrix provides:
-//  - Error type definitions
-//  - Error recovery mechanisms
-//  - Common constants used by mat64 and cmat128
-//
-// Errors
-//
-// The mat64 and cmat128 matrix packages share a common set of errors
-// provided by matrix via the matrix.Error type.
-//
-// Errors are either returned directly or used as the parameter of a panic
-// depending on the class of error encountered. Returned errors indicate
-// that a call was not able to complete successfully while panics generally
-// indicate a programmer or unrecoverable error.
-//
-// Examples of each type are found in the mat64 Solve methods, which find
-// x such that A*x = b.
-//
-// An error value is returned from the function or method when the operation
-// can meaningfully fail. The Solve operation cannot complete if A is
-// singular. However, determining the singularity of A is most easily
-// discovered during the Solve procedure itself and is a valid result from
-// the operation, so in this case an error is returned.
-//
-// A function will panic when the input parameters are inappropriate for
-// the function. In Solve, for example, the number of rows of each input
-// matrix must be equal because of the rules of matrix multiplication.
-// Similarly, for solving A*x = b, a non-zero receiver must have the same
-// number of rows as A has columns and must have the same number of columns
-// as b. In all cases where a function will panic, conditions that would
-// lead to a panic can easily be checked prior to a call.
-//
-// Error Recovery
-//
-// When a matrix.Error is the parameter of a panic, the panic can be
-// recovered by a Maybe function, which will then return the error.
-// Panics that are not of type matrix.Error are re-panicked by the
-// Maybe functions.
-//
-// Invariants
-//
-// Matrix input arguments to functions are never directly modified. If an operation
-// changes Matrix data, the mutated matrix will be the receiver of a function.
-//
-// For convenience, a matrix may be used as both a receiver and as an input, e.g.
-//  a.Pow(a, 6)
-//  v.SolveVec(a.T(), v)
-// though in many cases this will cause an allocation (see Element Aliasing).
-// An exception to this rule is Copy, which does not allow a.Copy(a.T()).
-//
-// Element Aliasing
-//
-// Most methods in the matrix packages modify receiver data. It is forbidden for the modified
-// data region of the receiver to overlap the used data area of the input
-// arguments. The exception to this rule is when the method receiver is equal to one
-// of the input arguments, as in the a.Pow(a, 6) call above, or its implicit transpose.
-//
-// This prohibition is to help avoid subtle mistakes when the method needs to read
-// from and write to the same data region. There are ways to make mistakes using the
-// matrix API, and matrix functions will detect and complain about those.
-// There are many ways to make mistakes by excursion from the matrix API via
-// interaction with raw matrix values.
-//
-// If you need to read the rest of this section to understand the behavior of
-// your program, you are being clever. Don't be clever. If you must be clever,
-// blas64/cblas128 and lapack64/clapack128 may be used to call the behavior directly.
-//
-// The matrix packages will use the following rules to detect overlap between the receiver and one
-// of the inputs:
-//  - the input implements one of the Raw methods, and
-//  - the Raw type matches that of the receiver or
-//    one is a RawMatrixer and the other is a RawVectorer, and
-//  - the address ranges of the backing data slices overlap, and
-//  - the strides differ or there is an overlap in the used data elements.
-// If such an overlap is detected, the method will panic.
-//
-// The following cases will not panic:
-//  - the data slices do not overlap,
-//  - there is pointer identity between the receiver and input values after
-//    the value has been untransposed if necessary.
-//
-// The matrix packages will not attempt to detect element overlap if the input does not implement a
-// Raw method, or if the Raw method differs from that of the receiver except when a
-// conversion has occurred through a matrix API function. Method behavior is undefined
-// if there is undetected overlap.
-//
-package matrix
--- a/vendor/github.com/gonum/matrix/gendoc.go
+++ b/vendor/github.com/gonum/matrix/gendoc.go
@ -1,343 +0,0 @@
-// Copyright ©2015 The gonum Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//+build ignore
-
-// gendoc creates the matrix, mat64 and cmat128 package doc comments.
-package main
-
-import (
-	"fmt"
-	"log"
-	"os"
-	"path/filepath"
-	"strings"
-	"text/template"
-	"unicode/utf8"
-)
-
-var docs = template.Must(template.New("docs").Funcs(funcs).Parse(`{{define "common"}}// Generated by running
-//  go generate github.com/gonum/matrix
-// DO NOT EDIT.
-
-// Copyright ©2015 The gonum Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package {{.Name}} provides {{.Provides}}
-//
-// Overview
-//
-// This section provides a quick overview of the {{.Name}} package. The following
-// sections provide more in depth commentary.
-//
-{{.Overview}}
-//{{end}}
-{{define "interfaces"}}// The Matrix Interfaces
-//
-// The Matrix interface is the common link between the concrete types. The Matrix
-// interface is defined by three functions: Dims, which returns the dimensions
-// of the Matrix, At, which returns the element in the specified location, and
-// T for returning a Transpose (discussed later). All of the concrete types can
-// perform these behaviors and so implement the interface. Methods and functions
-// are designed to use this interface, so in particular the method
-//  func (m *Dense) Mul(a, b Matrix)
-// constructs a *Dense from the result of a multiplication with any Matrix types,
-// not just *Dense. Where more restrictive requirements must be met, there are also the
-// Symmetric and Triangular interfaces. For example, in
-//  func (s *SymDense) AddSym(a, b Symmetric)
-// the Symmetric interface guarantees a symmetric result.
-//
-// Transposes
-//
-// The T method is used for transposition. For example, c.Mul(a.T(), b) computes
-// c = a^T * b. The {{if .ExamplePackage}}{{.ExamplePackage}}{{else}}{{.Name}}{{end}} types implement this method using an implicit transpose —
-// see the Transpose type for more details. Note that some operations have a
-// transpose as part of their definition, as in *SymDense.SymOuterK.
-//{{end}}
-{{define "factorization"}}// Matrix Factorization
-//
-// Matrix factorizations, such as the LU decomposition, typically have their own
-// specific data storage, and so are each implemented as a specific type. The
-// factorization can be computed through a call to Factorize
-//  var lu {{if .ExamplePackage}}{{.ExamplePackage}}{{else}}{{.Name}}{{end}}.LU
-//  lu.Factorize(a)
-// The elements of the factorization can be extracted through methods on the
-// appropriate type, i.e. *TriDense.LFromLU and *TriDense.UFromLU. Alternatively,
-// they can be used directly, as in *Dense.SolveLU. Some factorizations can be
-// updated directly, without needing to update the original matrix and refactorize,
-// as in *LU.RankOne.
-//{{end}}
-{{define "blas"}}// BLAS and LAPACK
-//
-// BLAS and LAPACK are the standard APIs for linear algebra routines. Many
-// operations in {{if .Description}}{{.Description}}{{else}}{{.Name}}{{end}} are implemented using calls to the wrapper functions
-// in gonum/blas/{{.BLAS|alts}} and gonum/lapack/{{.LAPACK|alts}}. By default, {{.BLAS|join "/"}} and
-// {{.LAPACK|join "/"}} call the native Go implementations of the routines. Alternatively,
-// it is possible to use C-based implementations of the APIs through the respective
-// cgo packages and "Use" functions. The Go implementation of LAPACK makes calls
-// through {{.BLAS|join "/"}}, so if a cgo BLAS implementation is registered, the {{.LAPACK|join "/"}}
-// calls will be partially executed in Go and partially executed in C.
-//{{end}}
-{{define "switching"}}// Type Switching
-//
-// The Matrix abstraction enables efficiency as well as interoperability. Go's
-// type reflection capabilities are used to choose the most efficient routine
-// given the specific concrete types. For example, in
-//  c.Mul(a, b)
-// if a and b both implement RawMatrixer, that is, they can be represented as a
-// {{.BLAS|alts}}.General, {{.BLAS|alts}}.Gemm (general matrix multiplication) is called, while
-// instead if b is a RawSymmetricer {{.BLAS|alts}}.Symm is used (general-symmetric
-// multiplication), and if b is a *Vector {{.BLAS|alts}}.Gemv is used.
-//
-// There are many possible type combinations and special cases. No specific guarantees
-// are made about the performance of any method, and in particular, note that an
-// abstract matrix type may be copied into a concrete type of the corresponding
-// value. If there are specific special cases that are needed, please submit a
-// pull-request or file an issue.
-//{{end}}
-{{define "invariants"}}// Invariants
-//
-// Matrix input arguments to functions are never directly modified. If an operation
-// changes Matrix data, the mutated matrix will be the receiver of a function.
-//
-// For convenience, a matrix may be used as both a receiver and as an input, e.g.
-//  a.Pow(a, 6)
-//  v.SolveVec(a.T(), v)
-// though in many cases this will cause an allocation (see Element Aliasing).
-// An exception to this rule is Copy, which does not allow a.Copy(a.T()).
-//{{end}}
-{{define "aliasing"}}// Element Aliasing
-//
-// Most methods in {{if .Description}}{{.Description}}{{else}}{{.Name}}{{end}} modify receiver data. It is forbidden for the modified
-// data region of the receiver to overlap the used data area of the input
-// arguments. The exception to this rule is when the method receiver is equal to one
-// of the input arguments, as in the a.Pow(a, 6) call above, or its implicit transpose.
-//
-// This prohibition is to help avoid subtle mistakes when the method needs to read
-// from and write to the same data region. There are ways to make mistakes using the
-// {{.Name}} API, and {{.Name}} functions will detect and complain about those.
-// There are many ways to make mistakes by excursion from the {{.Name}} API via
-// interaction with raw matrix values.
-//
-// If you need to read the rest of this section to understand the behavior of
-// your program, you are being clever. Don't be clever. If you must be clever,
-// {{.BLAS|join "/"}} and {{.LAPACK|join "/"}} may be used to call the behavior directly.
-//
-// {{if .Description}}{{.Description|sentence}}{{else}}{{.Name}}{{end}} will use the following rules to detect overlap between the receiver and one
-// of the inputs:
-//  - the input implements one of the Raw methods, and
-//  - the Raw type matches that of the receiver or
-//    one is a RawMatrixer and the other is a RawVectorer, and
-//  - the address ranges of the backing data slices overlap, and
-//  - the strides differ or there is an overlap in the used data elements.
-// If such an overlap is detected, the method will panic.
-//
-// The following cases will not panic:
-//  - the data slices do not overlap,
-//  - there is pointer identity between the receiver and input values after
-//    the value has been untransposed if necessary.
-//
-// {{if .Description}}{{.Description|sentence}}{{else}}{{.Name}}{{end}} will not attempt to detect element overlap if the input does not implement a
-// Raw method, or if the Raw method differs from that of the receiver except when a
-// conversion has occurred through a {{.Name}} API function. Method behavior is undefined
-// if there is undetected overlap.
-//{{end}}`))
-
-type Package struct {
-	path string
-
-	Name           string
-	Provides       string
-	Description    string
-	ExamplePackage string
-	Overview       string
-
-	BLAS   []string
-	LAPACK []string
-
-	template string
-}
-
-var pkgs = []Package{
-	{
-		path: ".",
-
-		Name:        "matrix",
-		Description: "the matrix packages",
-		Provides: `common error handling mechanisms for matrix operations
-// in mat64 and cmat128.`,
-		ExamplePackage: "mat64",
-
-		Overview: `// matrix provides:
-//  - Error type definitions
-//  - Error recovery mechanisms
-//  - Common constants used by mat64 and cmat128
-//
-// Errors
-//
-// The mat64 and cmat128 matrix packages share a common set of errors
-// provided by matrix via the matrix.Error type.
-//
-// Errors are either returned directly or used as the parameter of a panic
-// depending on the class of error encountered. Returned errors indicate
-// that a call was not able to complete successfully while panics generally
-// indicate a programmer or unrecoverable error.
-//
-// Examples of each type are found in the mat64 Solve methods, which find
-// x such that A*x = b.
-//
-// An error value is returned from the function or method when the operation
-// can meaningfully fail. The Solve operation cannot complete if A is
-// singular. However, determining the singularity of A is most easily
-// discovered during the Solve procedure itself and is a valid result from
-// the operation, so in this case an error is returned.
-//
-// A function will panic when the input parameters are inappropriate for
-// the function. In Solve, for example, the number of rows of each input
-// matrix must be equal because of the rules of matrix multiplication.
-// Similarly, for solving A*x = b, a non-zero receiver must have the same
-// number of rows as A has columns and must have the same number of columns
-// as b. In all cases where a function will panic, conditions that would
-// lead to a panic can easily be checked prior to a call.
-//
-// Error Recovery
-//
-// When a matrix.Error is the parameter of a panic, the panic can be
-// recovered by a Maybe function, which will then return the error.
-// Panics that are not of type matrix.Error are re-panicked by the
-// Maybe functions.`,
-		BLAS:   []string{"blas64", "cblas128"},
-		LAPACK: []string{"lapack64", "clapack128"},
-
-		template: `{{template "common" .}}
-{{template "invariants" .}}
-{{template "aliasing" .}}
-package {{.Name}}
-`,
-	},
-	{
-		path: "mat64",
-
-		Name: "mat64",
-		Provides: `implementations of float64 matrix structures and
-// linear algebra operations on them.`,
-
-		Overview: `// mat64 provides:
-//  - Interfaces for Matrix classes (Matrix, Symmetric, Triangular)
-//  - Concrete implementations (Dense, SymDense, TriDense)
-//  - Methods and functions for using matrix data (Add, Trace, SymRankOne)
-//  - Types for constructing and using matrix factorizations (QR, LU)
-//
-// A matrix may be constructed through the corresponding New function. If no
-// backing array is provided the matrix will be initialized to all zeros.
-//  // Allocate a zeroed matrix of size 3×5
-//  zero := mat64.NewDense(3, 5, nil)
-// If a backing data slice is provided, the matrix will have those elements.
-// Matrices are all stored in row-major format.
-//  // Generate a 6×6 matrix of random values.
-//  data := make([]float64, 36)
-//  for i := range data {
-//  	data[i] = rand.NormFloat64()
-//  }
-//  a := mat64.NewDense(6, 6, data)
-//
-// Operations involving matrix data are implemented as functions when the values
-// of the matrix remain unchanged
-//  tr := mat64.Trace(a)
-// and are implemented as methods when the operation modifies the receiver.
-//  zero.Copy(a)
-//
-// Receivers must be the correct size for the matrix operations, otherwise the
-// operation will panic. As a special case for convenience, a zero-sized matrix
-// will be modified to have the correct size, allocating data if necessary.
-//  var c mat64.Dense // construct a new zero-sized matrix
-//  c.Mul(a, a)       // c is automatically adjusted to be 6×6`,
-
-		BLAS:   []string{"blas64"},
-		LAPACK: []string{"lapack64"},
-
-		template: `{{template "common" .}}
-{{template "interfaces" .}}
-{{template "factorization" .}}
-{{template "blas" .}}
-{{template "switching" .}}
-{{template "invariants" .}}
-{{template "aliasing" .}}
-package {{.Name}}
-`,
-	},
-	{
-		path: "cmat128",
-
-		Name: "cmat128",
-		Provides: `implementations of complex128 matrix structures and
-// linear algebra operations on them.`,
-
-		Overview: `// cmat128 provides:
-//  - Interfaces for a complex Matrix`,
-
-		BLAS:   []string{"cblas128"},
-		LAPACK: []string{"clapack128"},
-
-		template: `{{template "common" . }}
-{{template "blas" .}}
-{{template "switching" .}}
-{{template "invariants" .}}
-{{template "aliasing" .}}
-package {{.Name}}
-`,
-	},
-}
-
-var funcs = template.FuncMap{
-	"sentence": sentence,
-	"alts":     alts,
-	"join":     join,
-}
-
-// sentence converts a string to sentence case where the string is the prefix of the sentence.
-func sentence(s string) string {
-	if len(s) == 0 {
-		return ""
-	}
-	_, size := utf8.DecodeRune([]byte(s))
-	return strings.ToUpper(s[:size]) + s[size:]
-}
-
-// alts renders a []string as a glob alternatives list.
-func alts(s []string) string {
-	switch len(s) {
-	case 0:
-		return ""
-	case 1:
-		return s[0]
-	default:
-		return fmt.Sprintf("{%s}", strings.Join(s, ","))
-	}
-}
-
-// join is strings.Join with the parameter order changed.
-func join(sep string, s []string) string {
-	return strings.Join(s, sep)
-}
-
-func main() {
-	for _, pkg := range pkgs {
-		t, err := template.Must(docs.Clone()).Parse(pkg.template)
-		if err != nil {
-			log.Fatalf("failed to parse template: %v", err)
-		}
-		file := filepath.Join(pkg.path, "doc.go")
-		f, err := os.Create(file)
-		if err != nil {
-			log.Fatalf("failed to create %q: %v", file, err)
-		}
-		err = t.Execute(f, pkg)
-		if err != nil {
-			log.Fatalf("failed to execute template: %v", err)
-		}
-		f.Close()
-	}
-}
--- a/vendor/github.com/gonum/matrix/generate.go
+++ b/vendor/github.com/gonum/matrix/generate.go
@ -1,7 +0,0 @@
-// Copyright ©2015 The gonum Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:generate go run gendoc.go
-
-package matrix
--- a/vendor/github.com/gonum/matrix/mat64/index_bound_checks.go
+++ b/vendor/github.com/gonum/matrix/mat64/index_bound_checks.go
@ -1,145 +0,0 @@
-// Copyright ©2014 The gonum Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This file must be kept in sync with index_no_bound_checks.go.
-
-//+build bounds
-
-package mat64
-
-import "github.com/gonum/matrix"
-
-// At returns the element at row i, column j.
-func (m *Dense) At(i, j int) float64 {
-	return m.at(i, j)
-}
-
-func (m *Dense) at(i, j int) float64 {
-	if uint(i) >= uint(m.mat.Rows) {
-		panic(matrix.ErrRowAccess)
-	}
-	if uint(j) >= uint(m.mat.Cols) {
-		panic(matrix.ErrColAccess)
-	}
-	return m.mat.Data[i*m.mat.Stride+j]
-}
-
-// Set sets the element at row i, column j to the value v.
-func (m *Dense) Set(i, j int, v float64) {
-	m.set(i, j, v)
-}
-
-func (m *Dense) set(i, j int, v float64) {
-	if uint(i) >= uint(m.mat.Rows) {
-		panic(matrix.ErrRowAccess)
-	}
-	if uint(j) >= uint(m.mat.Cols) {
-		panic(matrix.ErrColAccess)
-	}
-	m.mat.Data[i*m.mat.Stride+j] = v
-}
-
-// At returns the element at row i.
-// It panics if i is out of bounds or if j is not zero.
-func (v *Vector) At(i, j int) float64 {
-	if j != 0 {
-		panic(matrix.ErrColAccess)
-	}
-	return v.at(i)
-}
-
-func (v *Vector) at(i int) float64 {
-	if uint(i) >= uint(v.n) {
-		panic(matrix.ErrRowAccess)
-	}
-	return v.mat.Data[i*v.mat.Inc]
-}
-
-// SetVec sets the element at row i to the value val.
-// It panics if i is out of bounds.
-func (v *Vector) SetVec(i int, val float64) {
-	v.setVec(i, val)
-}
-
-func (v *Vector) setVec(i int, val float64) {
-	if uint(i) >= uint(v.n) {
-		panic(matrix.ErrVectorAccess)
-	}
-	v.mat.Data[i*v.mat.Inc] = val
-}
-
-// At returns the element at row i and column j.
-func (t *SymDense) At(i, j int) float64 {
-	return t.at(i, j)
-}
-
-func (t *SymDense) at(i, j int) float64 {
-	if uint(i) >= uint(t.mat.N) {
-		panic(matrix.ErrRowAccess)
-	}
-	if uint(j) >= uint(t.mat.N) {
-		panic(matrix.ErrColAccess)
-	}
-	if i > j {
-		i, j = j, i
-	}
-	return t.mat.Data[i*t.mat.Stride+j]
-}
-
-// SetSym sets the elements at (i,j) and (j,i) to the value v.
-func (t *SymDense) SetSym(i, j int, v float64) {
-	t.set(i, j, v)
-}
-
-func (t *SymDense) set(i, j int, v float64) {
-	if uint(i) >= uint(t.mat.N) {
-		panic(matrix.ErrRowAccess)
-	}
-	if uint(j) >= uint(t.mat.N) {
-		panic(matrix.ErrColAccess)
-	}
-	if i > j {
-		i, j = j, i
-	}
-	t.mat.Data[i*t.mat.Stride+j] = v
-}
-
-// At returns the element at row i, column j.
-func (t *TriDense) At(i, j int) float64 {
-	return t.at(i, j)
-}
-
-func (t *TriDense) at(i, j int) float64 {
-	if uint(i) >= uint(t.mat.N) {
-		panic(matrix.ErrRowAccess)
-	}
-	if uint(j) >= uint(t.mat.N) {
-		panic(matrix.ErrColAccess)
-	}
-	isUpper := t.isUpper()
-	if (isUpper && i > j) || (!isUpper && i < j) {
-		return 0
-	}
-	return t.mat.Data[i*t.mat.Stride+j]
-}
-
-// SetTri sets the element of the triangular matrix at row i, column j to the value v.
-// It panics if the location is outside the appropriate half of the matrix.
-func (t *TriDense) SetTri(i, j int, v float64) {
-	t.set(i, j, v)
-}
-
-func (t *TriDense) set(i, j int, v float64) {
-	if uint(i) >= uint(t.mat.N) {
-		panic(matrix.ErrRowAccess)
-	}
-	if uint(j) >= uint(t.mat.N) {
-		panic(matrix.ErrColAccess)
-	}
-	isUpper := t.isUpper()
-	if (isUpper && i > j) || (!isUpper && i < j) {
-		panic(matrix.ErrTriangleSet)
-	}
-	t.mat.Data[i*t.mat.Stride+j] = v
-}
--- a/vendor/github.com/gonum/matrix/mat64/index_no_bound_checks.go
+++ b/vendor/github.com/gonum/matrix/mat64/index_no_bound_checks.go
@ -1,145 +0,0 @@
-// Copyright ©2014 The gonum Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This file must be kept in sync with index_bound_checks.go.
-
-//+build !bounds
-
-package mat64
-
-import "github.com/gonum/matrix"
-
-// At returns the element at row i, column j.
-func (m *Dense) At(i, j int) float64 {
-	if uint(i) >= uint(m.mat.Rows) {
-		panic(matrix.ErrRowAccess)
-	}
-	if uint(j) >= uint(m.mat.Cols) {
-		panic(matrix.ErrColAccess)
-	}
-	return m.at(i, j)
-}
-
-func (m *Dense) at(i, j int) float64 {
-	return m.mat.Data[i*m.mat.Stride+j]
-}
-
-// Set sets the element at row i, column j to the value v.
-func (m *Dense) Set(i, j int, v float64) {
-	if uint(i) >= uint(m.mat.Rows) {
-		panic(matrix.ErrRowAccess)
-	}
-	if uint(j) >= uint(m.mat.Cols) {
-		panic(matrix.ErrColAccess)
-	}
-	m.set(i, j, v)
-}
-
-func (m *Dense) set(i, j int, v float64) {
-	m.mat.Data[i*m.mat.Stride+j] = v
-}
-
-// At returns the element at row i.
-// It panics if i is out of bounds or if j is not zero.
-func (v *Vector) At(i, j int) float64 {
-	if uint(i) >= uint(v.n) {
-		panic(matrix.ErrRowAccess)
-	}
-	if j != 0 {
-		panic(matrix.ErrColAccess)
-	}
-	return v.at(i)
-}
-
-func (v *Vector) at(i int) float64 {
-	return v.mat.Data[i*v.mat.Inc]
-}
-
-// SetVec sets the element at row i to the value val.
-// It panics if i is out of bounds.
-func (v *Vector) SetVec(i int, val float64) {
-	if uint(i) >= uint(v.n) {
-		panic(matrix.ErrVectorAccess)
-	}
-	v.setVec(i, val)
-}
-
-func (v *Vector) setVec(i int, val float64) {
-	v.mat.Data[i*v.mat.Inc] = val
-}
-
-// At returns the element at row i and column j.
-func (s *SymDense) At(i, j int) float64 {
-	if uint(i) >= uint(s.mat.N) {
-		panic(matrix.ErrRowAccess)
-	}
-	if uint(j) >= uint(s.mat.N) {
-		panic(matrix.ErrColAccess)
-	}
-	return s.at(i, j)
-}
-
-func (s *SymDense) at(i, j int) float64 {
-	if i > j {
-		i, j = j, i
-	}
-	return s.mat.Data[i*s.mat.Stride+j]
-}
-
-// SetSym sets the elements at (i,j) and (j,i) to the value v.
-func (s *SymDense) SetSym(i, j int, v float64) {
-	if uint(i) >= uint(s.mat.N) {
-		panic(matrix.ErrRowAccess)
-	}
-	if uint(j) >= uint(s.mat.N) {
-		panic(matrix.ErrColAccess)
-	}
-	s.set(i, j, v)
-}
-
-func (s *SymDense) set(i, j int, v float64) {
-	if i > j {
-		i, j = j, i
-	}
-	s.mat.Data[i*s.mat.Stride+j] = v
-}
-
-// At returns the element at row i, column j.
-func (t *TriDense) At(i, j int) float64 {
-	if uint(i) >= uint(t.mat.N) {
-		panic(matrix.ErrRowAccess)
-	}
-	if uint(j) >= uint(t.mat.N) {
-		panic(matrix.ErrColAccess)
-	}
-	return t.at(i, j)
-}
-
-func (t *TriDense) at(i, j int) float64 {
-	isUpper := t.triKind()
-	if (isUpper && i > j) || (!isUpper && i < j) {
-		return 0
-	}
-	return t.mat.Data[i*t.mat.Stride+j]
-}
-
-// SetTri sets the element at row i, column j to the value v.
-// It panics if the location is outside the appropriate half of the matrix.
-func (t *TriDense) SetTri(i, j int, v float64) {
-	if uint(i) >= uint(t.mat.N) {
-		panic(matrix.ErrRowAccess)
-	}
-	if uint(j) >= uint(t.mat.N) {
-		panic(matrix.ErrColAccess)
-	}
-	isUpper := t.isUpper()
-	if (isUpper && i > j) || (!isUpper && i < j) {
-		panic(matrix.ErrTriangleSet)
-	}
-	t.set(i, j, v)
-}
-
-func (t *TriDense) set(i, j int, v float64) {
-	t.mat.Data[i*t.mat.Stride+j] = v
-}
--- a/vendor/github.com/gonum/matrix/mat64/inner.go
+++ b/vendor/github.com/gonum/matrix/mat64/inner.go
@ -1,103 +0,0 @@
-// Copyright ©2014 The gonum Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package mat64
-
-import (
-	"github.com/gonum/blas"
-	"github.com/gonum/internal/asm/f64"
-	"github.com/gonum/matrix"
-)
-
-// Inner computes the generalized inner product
-//   x^T A y
-// between vectors x and y with matrix A. This is only a true inner product if
-// A is symmetric positive definite, though the operation works for any matrix A.
-//
-// Inner panics if x.Len != m or y.Len != n when A is an m x n matrix.
-func Inner(x *Vector, A Matrix, y *Vector) float64 {
-	m, n := A.Dims()
-	if x.Len() != m {
-		panic(matrix.ErrShape)
-	}
-	if y.Len() != n {
-		panic(matrix.ErrShape)
-	}
-	if m == 0 || n == 0 {
-		return 0
-	}
-
-	var sum float64
-
-	switch b := A.(type) {
-	case RawSymmetricer:
-		bmat := b.RawSymmetric()
-		if bmat.Uplo != blas.Upper {
-			// Panic as a string not a mat64.Error.
-			panic(badSymTriangle)
-		}
-		for i := 0; i < x.Len(); i++ {
-			xi := x.at(i)
-			if xi != 0 {
-				if y.mat.Inc == 1 {
-					sum += xi * f64.DotUnitary(
-						bmat.Data[i*bmat.Stride+i:i*bmat.Stride+n],
-						y.mat.Data[i:],
-					)
-				} else {
-					sum += xi * f64.DotInc(
-						bmat.Data[i*bmat.Stride+i:i*bmat.Stride+n],
-						y.mat.Data[i*y.mat.Inc:], uintptr(n-i),
-						1, uintptr(y.mat.Inc),
-						0, 0,
-					)
-				}
-			}
-			yi := y.at(i)
-			if i != n-1 && yi != 0 {
-				if x.mat.Inc == 1 {
-					sum += yi * f64.DotUnitary(
-						bmat.Data[i*bmat.Stride+i+1:i*bmat.Stride+n],
-						x.mat.Data[i+1:],
-					)
-				} else {
-					sum += yi * f64.DotInc(
-						bmat.Data[i*bmat.Stride+i+1:i*bmat.Stride+n],
-						x.mat.Data[(i+1)*x.mat.Inc:], uintptr(n-i-1),
-						1, uintptr(x.mat.Inc),
-						0, 0,
-					)
-				}
-			}
-		}
-	case RawMatrixer:
-		bmat := b.RawMatrix()
-		for i := 0; i < x.Len(); i++ {
-			xi := x.at(i)
-			if xi != 0 {
-				if y.mat.Inc == 1 {
-					sum += xi * f64.DotUnitary(
-						bmat.Data[i*bmat.Stride:i*bmat.Stride+n],
-						y.mat.Data,
-					)
-				} else {
-					sum += xi * f64.DotInc(
-						bmat.Data[i*bmat.Stride:i*bmat.Stride+n],
-						y.mat.Data, uintptr(n),
-						1, uintptr(y.mat.Inc),
-						0, 0,
-					)
-				}
-			}
-		}
-	default:
-		for i := 0; i < x.Len(); i++ {
-			xi := x.at(i)
-			for j := 0; j < y.Len(); j++ {
-				sum += xi * A.At(i, j) * y.at(j)
-			}
-		}
-	}
-	return sum
-}
--- a/vendor/github.com/gonum/matrix/mat64/lq.go
+++ b/vendor/github.com/gonum/matrix/mat64/lq.go
@ -1,217 +0,0 @@
-// Copyright ©2013 The gonum Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package mat64
-
-import (
-	"math"
-
-	"github.com/gonum/blas"
-	"github.com/gonum/blas/blas64"
-	"github.com/gonum/lapack/lapack64"
-	"github.com/gonum/matrix"
-)
-
-// LQ is a type for creating and using the LQ factorization of a matrix.
-type LQ struct {
-	lq   *Dense
-	tau  []float64
-	cond float64
-}
-
-func (lq *LQ) updateCond() {
-	// A = LQ, where Q is orthonormal. Orthonormal multiplications do not change
-	// the condition number. Thus, ||A|| = ||L|| ||Q|| = ||Q||.
-	m := lq.lq.mat.Rows
-	work := make([]float64, 3*m)
-	iwork := make([]int, m)
-	l := lq.lq.asTriDense(m, blas.NonUnit, blas.Lower)
-	v := lapack64.Trcon(matrix.CondNorm, l.mat, work, iwork)
-	lq.cond = 1 / v
-}
-
-// Factorize computes the LQ factorization of an m×n matrix a where n <= m. The LQ
-// factorization always exists even if A is singular.
-//
-// The LQ decomposition is a factorization of the matrix A such that A = L * Q.
-// The matrix Q is an orthonormal n×n matrix, and L is an m×n upper triangular matrix.
-// L and Q can be extracted from the LFromLQ and QFromLQ methods on Dense.
-func (lq *LQ) Factorize(a Matrix) {
-	m, n := a.Dims()
-	if m > n {
-		panic(matrix.ErrShape)
-	}
-	k := min(m, n)
-	if lq.lq == nil {
-		lq.lq = &Dense{}
-	}
-	lq.lq.Clone(a)
-	work := make([]float64, 1)
-	lq.tau = make([]float64, k)
-	lapack64.Gelqf(lq.lq.mat, lq.tau, work, -1)
-	work = make([]float64, int(work[0]))
-	lapack64.Gelqf(lq.lq.mat, lq.tau, work, len(work))
-	lq.updateCond()
-}
-
-// TODO(btracey): Add in the "Reduced" forms for extracting the m×m orthogonal
-// and upper triangular matrices.
-
-// LFromLQ extracts the m×n lower trapezoidal matrix from a LQ decomposition.
-func (m *Dense) LFromLQ(lq *LQ) {
-	r, c := lq.lq.Dims()
-	m.reuseAs(r, c)
-
-	// Disguise the LQ as a lower triangular
-	t := &TriDense{
-		mat: blas64.Triangular{
-			N:      r,
-			Stride: lq.lq.mat.Stride,
-			Data:   lq.lq.mat.Data,
-			Uplo:   blas.Lower,
-			Diag:   blas.NonUnit,
-		},
-		cap: lq.lq.capCols,
-	}
-	m.Copy(t)
-
-	if r == c {
-		return
-	}
-	// Zero right of the triangular.
-	for i := 0; i < r; i++ {
-		zero(m.mat.Data[i*m.mat.Stride+r : i*m.mat.Stride+c])
-	}
-}
-
-// QFromLQ extracts the n×n orthonormal matrix Q from an LQ decomposition.
-func (m *Dense) QFromLQ(lq *LQ) {
-	r, c := lq.lq.Dims()
-	m.reuseAs(c, c)
-
-	// Set Q = I.
-	for i := 0; i < c; i++ {
-		v := m.mat.Data[i*m.mat.Stride : i*m.mat.Stride+c]
-		zero(v)
-		v[i] = 1
-	}
-
-	// Construct Q from the elementary reflectors.
-	h := blas64.General{
-		Rows:   c,
-		Cols:   c,
-		Stride: c,
-		Data:   make([]float64, c*c),
-	}
-	qCopy := getWorkspace(c, c, false)
-	v := blas64.Vector{
-		Inc:  1,
-		Data: make([]float64, c),
-	}
-	for i := 0; i < r; i++ {
-		// Set h = I.
-		zero(h.Data)
-		for j := 0; j < len(h.Data); j += c + 1 {
-			h.Data[j] = 1
-		}
-
-		// Set the vector data as the elementary reflector.
-		for j := 0; j < i; j++ {
-			v.Data[j] = 0
-		}
-		v.Data[i] = 1
-		for j := i + 1; j < c; j++ {
-			v.Data[j] = lq.lq.mat.Data[i*lq.lq.mat.Stride+j]
-		}
-
-		// Compute the multiplication matrix.
-		blas64.Ger(-lq.tau[i], v, v, h)
-		qCopy.Copy(m)
-		blas64.Gemm(blas.NoTrans, blas.NoTrans,
-			1, h, qCopy.mat,
-			0, m.mat)
-	}
-}
-
-// SolveLQ finds a minimum-norm solution to a system of linear equations defined
-// by the matrices A and b, where A is an m×n matrix represented in its LQ factorized
-// form. If A is singular or near-singular a Condition error is returned. Please
-// see the documentation for Condition for more information.
-//
-// The minimization problem solved depends on the input parameters.
-//  If trans == false, find the minimum norm solution of A * X = b.
-//  If trans == true, find X such that ||A*X - b||_2 is minimized.
-// The solution matrix, X, is stored in place into the receiver.
-func (m *Dense) SolveLQ(lq *LQ, trans bool, b Matrix) error {
-	r, c := lq.lq.Dims()
-	br, bc := b.Dims()
-
-	// The LQ solve algorithm stores the result in-place into the right hand side.
-	// The storage for the answer must be large enough to hold both b and x.
-	// However, this method's receiver must be the size of x. Copy b, and then
-	// copy the result into m at the end.
-	if trans {
-		if c != br {
-			panic(matrix.ErrShape)
-		}
-		m.reuseAs(r, bc)
-	} else {
-		if r != br {
-			panic(matrix.ErrShape)
-		}
-		m.reuseAs(c, bc)
-	}
-	// Do not need to worry about overlap between m and b because x has its own
-	// independent storage.
-	x := getWorkspace(max(r, c), bc, false)
-	x.Copy(b)
-	t := lq.lq.asTriDense(lq.lq.mat.Rows, blas.NonUnit, blas.Lower).mat
-	if trans {
-		work := make([]float64, 1)
-		lapack64.Ormlq(blas.Left, blas.NoTrans, lq.lq.mat, lq.tau, x.mat, work, -1)
-		work = make([]float64, int(work[0]))
-		lapack64.Ormlq(blas.Left, blas.NoTrans, lq.lq.mat, lq.tau, x.mat, work, len(work))
-
-		ok := lapack64.Trtrs(blas.Trans, t, x.mat)
-		if !ok {
-			return matrix.Condition(math.Inf(1))
-		}
-	} else {
-		ok := lapack64.Trtrs(blas.NoTrans, t, x.mat)
-		if !ok {
-			return matrix.Condition(math.Inf(1))
-		}
-		for i := r; i < c; i++ {
-			zero(x.mat.Data[i*x.mat.Stride : i*x.mat.Stride+bc])
-		}
-		work := make([]float64, 1)
-		lapack64.Ormlq(blas.Left, blas.Trans, lq.lq.mat, lq.tau, x.mat, work, -1)
-		work = make([]float64, int(work[0]))
-		lapack64.Ormlq(blas.Left, blas.Trans, lq.lq.mat, lq.tau, x.mat, work, len(work))
-	}
-	// M was set above to be the correct size for the result.
-	m.Copy(x)
-	putWorkspace(x)
-	if lq.cond > matrix.ConditionTolerance {
-		return matrix.Condition(lq.cond)
-	}
-	return nil
-}
-
-// SolveLQVec finds a minimum-norm solution to a system of linear equations.
-// Please see Dense.SolveLQ for the full documentation.
-func (v *Vector) SolveLQVec(lq *LQ, trans bool, b *Vector) error {
-	if v != b {
-		v.checkOverlap(b.mat)
-	}
-	r, c := lq.lq.Dims()
-	// The Solve implementation is non-trivial, so rather than duplicate the code,
-	// instead recast the Vectors as Dense and call the matrix code.
-	if trans {
-		v.reuseAs(r)
-	} else {
-		v.reuseAs(c)
-	}
-	return v.asDense().SolveLQ(lq, trans, b.asDense())
-}
--- a/vendor/github.com/gonum/matrix/mat64/qr.go
+++ b/vendor/github.com/gonum/matrix/mat64/qr.go
@ -1,183 +0,0 @@
-// Copyright ©2013 The gonum Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package mat64
-
-import (
-	"math"
-
-	"github.com/gonum/blas"
-	"github.com/gonum/blas/blas64"
-	"github.com/gonum/lapack/lapack64"
-	"github.com/gonum/matrix"
-)
-
-// QR is a type for creating and using the QR factorization of a matrix.
-type QR struct {
-	qr   *Dense
-	tau  []float64
-	cond float64
-}
-
-func (qr *QR) updateCond() {
-	// A = QR, where Q is orthonormal. Orthonormal multiplications do not change
-	// the condition number. Thus, ||A|| = ||Q|| ||R|| = ||R||.
-	n := qr.qr.mat.Cols
-	work := make([]float64, 3*n)
-	iwork := make([]int, n)
-	r := qr.qr.asTriDense(n, blas.NonUnit, blas.Upper)
-	v := lapack64.Trcon(matrix.CondNorm, r.mat, work, iwork)
-	qr.cond = 1 / v
-}
-
-// Factorize computes the QR factorization of an m×n matrix a where m >= n. The QR
-// factorization always exists even if A is singular.
-//
-// The QR decomposition is a factorization of the matrix A such that A = Q * R.
-// The matrix Q is an orthonormal m×m matrix, and R is an m×n upper triangular matrix.
-// Q and R can be extracted from the QFromQR and RFromQR methods on Dense.
-func (qr *QR) Factorize(a Matrix) {
-	m, n := a.Dims()
-	if m < n {
-		panic(matrix.ErrShape)
-	}
-	k := min(m, n)
-	if qr.qr == nil {
-		qr.qr = &Dense{}
-	}
-	qr.qr.Clone(a)
-	work := make([]float64, 1)
-	qr.tau = make([]float64, k)
-	lapack64.Geqrf(qr.qr.mat, qr.tau, work, -1)
-
-	work = make([]float64, int(work[0]))
-	lapack64.Geqrf(qr.qr.mat, qr.tau, work, len(work))
-	qr.updateCond()
-}
-
-// TODO(btracey): Add in the "Reduced" forms for extracting the n×n orthogonal
-// and upper triangular matrices.
-
-// RFromQR extracts the m×n upper trapezoidal matrix from a QR decomposition.
-func (m *Dense) RFromQR(qr *QR) {
-	r, c := qr.qr.Dims()
-	m.reuseAs(r, c)
-
-	// Disguise the QR as an upper triangular
-	t := &TriDense{
-		mat: blas64.Triangular{
-			N:      c,
-			Stride: qr.qr.mat.Stride,
-			Data:   qr.qr.mat.Data,
-			Uplo:   blas.Upper,
-			Diag:   blas.NonUnit,
-		},
-		cap: qr.qr.capCols,
-	}
-	m.Copy(t)
-
-	// Zero below the triangular.
-	for i := r; i < c; i++ {
-		zero(m.mat.Data[i*m.mat.Stride : i*m.mat.Stride+c])
-	}
-}
-
-// QFromQR extracts the m×m orthonormal matrix Q from a QR decomposition.
-func (m *Dense) QFromQR(qr *QR) {
-	r, _ := qr.qr.Dims()
-	m.reuseAsZeroed(r, r)
-
-	// Set Q = I.
-	for i := 0; i < r*r; i += r + 1 {
-		m.mat.Data[i] = 1
-	}
-
-	// Construct Q from the elementary reflectors.
-	work := make([]float64, 1)
-	lapack64.Ormqr(blas.Left, blas.NoTrans, qr.qr.mat, qr.tau, m.mat, work, -1)
-	work = make([]float64, int(work[0]))
-	lapack64.Ormqr(blas.Left, blas.NoTrans, qr.qr.mat, qr.tau, m.mat, work, len(work))
-}
-
-// SolveQR finds a minimum-norm solution to a system of linear equations defined
-// by the matrices A and b, where A is an m×n matrix represented in its QR factorized
-// form. If A is singular or near-singular a Condition error is returned. Please
-// see the documentation for Condition for more information.
-//
-// The minimization problem solved depends on the input parameters.
-//  If trans == false, find X such that ||A*X - b||_2 is minimized.
-//  If trans == true, find the minimum norm solution of A^T * X = b.
-// The solution matrix, X, is stored in place into the receiver.
-func (m *Dense) SolveQR(qr *QR, trans bool, b Matrix) error {
-	r, c := qr.qr.Dims()
-	br, bc := b.Dims()
-
-	// The QR solve algorithm stores the result in-place into the right hand side.
-	// The storage for the answer must be large enough to hold both b and x.
-	// However, this method's receiver must be the size of x. Copy b, and then
-	// copy the result into m at the end.
-	if trans {
-		if c != br {
-			panic(matrix.ErrShape)
-		}
-		m.reuseAs(r, bc)
-	} else {
-		if r != br {
-			panic(matrix.ErrShape)
-		}
-		m.reuseAs(c, bc)
-	}
-	// Do not need to worry about overlap between m and b because x has its own
-	// independent storage.
-	x := getWorkspace(max(r, c), bc, false)
-	x.Copy(b)
-	t := qr.qr.asTriDense(qr.qr.mat.Cols, blas.NonUnit, blas.Upper).mat
-	if trans {
-		ok := lapack64.Trtrs(blas.Trans, t, x.mat)
-		if !ok {
-			return matrix.Condition(math.Inf(1))
-		}
-		for i := c; i < r; i++ {
-			zero(x.mat.Data[i*x.mat.Stride : i*x.mat.Stride+bc])
-		}
-		work := make([]float64, 1)
-		lapack64.Ormqr(blas.Left, blas.NoTrans, qr.qr.mat, qr.tau, x.mat, work, -1)
-		work = make([]float64, int(work[0]))
-		lapack64.Ormqr(blas.Left, blas.NoTrans, qr.qr.mat, qr.tau, x.mat, work, len(work))
-	} else {
-		work := make([]float64, 1)
-		lapack64.Ormqr(blas.Left, blas.Trans, qr.qr.mat, qr.tau, x.mat, work, -1)
-		work = make([]float64, int(work[0]))
-		lapack64.Ormqr(blas.Left, blas.Trans, qr.qr.mat, qr.tau, x.mat, work, len(work))
-
-		ok := lapack64.Trtrs(blas.NoTrans, t, x.mat)
-		if !ok {
-			return matrix.Condition(math.Inf(1))
-		}
-	}
-	// M was set above to be the correct size for the result.
-	m.Copy(x)
-	putWorkspace(x)
-	if qr.cond > matrix.ConditionTolerance {
-		return matrix.Condition(qr.cond)
-	}
-	return nil
-}
-
-// SolveQRVec finds a minimum-norm solution to a system of linear equations.
-// Please see Dense.SolveQR for the full documentation.
-func (v *Vector) SolveQRVec(qr *QR, trans bool, b *Vector) error {
-	if v != b {
-		v.checkOverlap(b.mat)
-	}
-	r, c := qr.qr.Dims()
-	// The Solve implementation is non-trivial, so rather than duplicate the code,
-	// instead recast the Vectors as Dense and call the matrix code.
-	if trans {
-		v.reuseAs(r)
-	} else {
-		v.reuseAs(c)
-	}
-	return v.asDense().SolveQR(qr, trans, b.asDense())
-}
--- a/vendor/github.com/gonum/matrix/mat64/shadow.go
+++ b/vendor/github.com/gonum/matrix/mat64/shadow.go
@ -1,281 +0,0 @@
-// Copyright ©2015 The gonum Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package mat64
-
-import (
-	"github.com/gonum/blas"
-	"github.com/gonum/blas/blas64"
-)
-
-const (
-	// regionOverlap is the panic string used for the general case
-	// of a matrix region overlap between a source and destination.
-	regionOverlap = "mat64: bad region: overlap"
-
-	// regionIdentity is the panic string used for the specific
-	// case of complete agreement between a source and a destination.
-	regionIdentity = "mat64: bad region: identical"
-
-	// mismatchedStrides is the panic string used for overlapping
-	// data slices with differing strides.
-	mismatchedStrides = "mat64: bad region: different strides"
-)
-
-// checkOverlap returns false if the receiver does not overlap data elements
-// referenced by the parameter and panics otherwise.
-//
-// checkOverlap methods return a boolean to allow the check call to be added to a
-// boolean expression, making use of short-circuit operators.
-
-func (m *Dense) checkOverlap(a blas64.General) bool {
-	mat := m.RawMatrix()
-	if cap(mat.Data) == 0 || cap(a.Data) == 0 {
-		return false
-	}
-
-	off := offset(mat.Data[:1], a.Data[:1])
-
-	if off == 0 {
-		// At least one element overlaps.
-		if mat.Cols == a.Cols && mat.Rows == a.Rows && mat.Stride == a.Stride {
-			panic(regionIdentity)
-		}
-		panic(regionOverlap)
-	}
-
-	if off > 0 && len(mat.Data) <= off {
-		// We know m is completely before a.
-		return false
-	}
-	if off < 0 && len(a.Data) <= -off {
-		// We know m is completely after a.
-		return false
-	}
-
-	if mat.Stride != a.Stride {
-		// Too hard, so assume the worst.
-		panic(mismatchedStrides)
-	}
-
-	if off < 0 {
-		off = -off
-		mat.Cols, a.Cols = a.Cols, mat.Cols
-	}
-	if rectanglesOverlap(off, mat.Cols, a.Cols, mat.Stride) {
-		panic(regionOverlap)
-	}
-	return false
-}
-
-func (s *SymDense) checkOverlap(a blas64.Symmetric) bool {
-	mat := s.RawSymmetric()
-	if cap(mat.Data) == 0 || cap(a.Data) == 0 {
-		return false
-	}
-
-	off := offset(mat.Data[:1], a.Data[:1])
-
-	if off == 0 {
-		// At least one element overlaps.
-		if mat.N == a.N && mat.Stride == a.Stride {
-			panic(regionIdentity)
-		}
-		panic(regionOverlap)
-	}
-
-	if off > 0 && len(mat.Data) <= off {
-		// We know s is completely before a.
-		return false
-	}
-	if off < 0 && len(a.Data) <= -off {
-		// We know s is completely after a.
-		return false
-	}
-
-	if mat.Stride != a.Stride {
-		// Too hard, so assume the worst.
-		panic(mismatchedStrides)
-	}
-
-	if off < 0 {
-		off = -off
-		mat.N, a.N = a.N, mat.N
-		// If we created the matrix it will always
-		// be in the upper triangle, but don't trust
-		// that this is the case.
-		mat.Uplo, a.Uplo = a.Uplo, mat.Uplo
-	}
-	if trianglesOverlap(off, mat.N, a.N, mat.Stride, mat.Uplo == blas.Upper, a.Uplo == blas.Upper) {
-		panic(regionOverlap)
-	}
-	return false
-}
-
-func (t *TriDense) checkOverlap(a blas64.Triangular) bool {
-	mat := t.RawTriangular()
-	if cap(mat.Data) == 0 || cap(a.Data) == 0 {
-		return false
-	}
-
-	off := offset(mat.Data[:1], a.Data[:1])
-
-	if off == 0 {
-		// At least one element overlaps.
-		if mat.N == a.N && mat.Stride == a.Stride {
-			panic(regionIdentity)
-		}
-		panic(regionOverlap)
-	}
-
-	if off > 0 && len(mat.Data) <= off {
-		// We know t is completely before a.
-		return false
-	}
-	if off < 0 && len(a.Data) <= -off {
-		// We know t is completely after a.
-		return false
-	}
-
-	if mat.Stride != a.Stride {
-		// Too hard, so assume the worst.
-		panic(mismatchedStrides)
-	}
-
-	if off < 0 {
-		off = -off
-		mat.N, a.N = a.N, mat.N
-		mat.Uplo, a.Uplo = a.Uplo, mat.Uplo
-	}
-	if trianglesOverlap(off, mat.N, a.N, mat.Stride, mat.Uplo == blas.Upper, a.Uplo == blas.Upper) {
-		panic(regionOverlap)
-	}
-	return false
-}
-
-func (v *Vector) checkOverlap(a blas64.Vector) bool {
-	mat := v.mat
-	if cap(mat.Data) == 0 || cap(a.Data) == 0 {
-		return false
-	}
-
-	off := offset(mat.Data[:1], a.Data[:1])
-
-	if off == 0 {
-		// At least one element overlaps.
-		if mat.Inc == a.Inc && len(mat.Data) == len(a.Data) {
-			panic(regionIdentity)
-		}
-		panic(regionOverlap)
-	}
-
-	if off > 0 && len(mat.Data) <= off {
-		// We know v is completely before a.
-		return false
-	}
-	if off < 0 && len(a.Data) <= -off {
-		// We know v is completely after a.
-		return false
-	}
-
-	if mat.Inc != a.Inc {
-		// Too hard, so assume the worst.
-		panic(mismatchedStrides)
-	}
-
-	if mat.Inc == 1 || off&mat.Inc == 0 {
-		panic(regionOverlap)
-	}
-	return false
-}
-
-// rectanglesOverlap returns whether the strided rectangles a and b overlap
-// when b is offset by off elements after a but has at least one element before
-// the end of a. off must be positive. a and b have aCols and bCols respectively.
-//
-// rectanglesOverlap works by shifting both matrices left such that the left
-// column of a is at 0. The column indexes are flattened by obtaining the shifted
-// relative left and right column positions modulo the common stride. This allows
-// direct comparison of the column offsets when the matrix backing data slices
-// are known to overlap.
-func rectanglesOverlap(off, aCols, bCols, stride int) bool {
-	if stride == 1 {
-		// Unit stride means overlapping data
-		// slices must overlap as matrices.
-		return true
-	}
-
-	// Flatten the shifted matrix column positions
-	// so a starts at 0, modulo the common stride.
-	const aFrom = 0
-	aTo := aCols
-	// The mod stride operations here make the from
-	// and to indexes comparable between a and b when
-	// the data slices of a and b overlap.
-	bFrom := off % stride
-	bTo := (bFrom + bCols) % stride
-
-	if bTo == 0 || bFrom < bTo {
-		// b matrix is not wrapped: compare for
-		// simple overlap.
-		return bFrom < aTo
-	}
-
-	// b strictly wraps and so must overlap with a.
-	return true
-}
-
-// trianglesOverlap returns whether the strided triangles a and b overlap
-// when b is offset by off elements after a but has at least one element before
-// the end of a. off must be positive. a and b are aSize×aSize and bSize×bSize
-// respectively.
-func trianglesOverlap(off, aSize, bSize, stride int, aUpper, bUpper bool) bool {
-	if !rectanglesOverlap(off, aSize, bSize, stride) {
-		// Fast return if bounding rectangles do not overlap.
-		return false
-	}
-
-	// Find location of b relative to a.
-	rowOffset := off / stride
-	colOffset := off % stride
-	if (off+bSize)%stride < colOffset {
-		// We have wrapped, so readjust offsets.
-		rowOffset++
-		colOffset -= stride
-	}
-
-	if aUpper {
-		// Check whether the upper left of b
-		// is in the triangle of a
-		if rowOffset >= 0 && rowOffset <= colOffset {
-			return true
-		}
-		// Check whether the upper right of b
-		// is in the triangle of a.
-		return bUpper && rowOffset < colOffset+bSize
-	}
-
-	// Check whether the upper left of b
-	// is in the triangle of a
-	if colOffset >= 0 && rowOffset >= colOffset {
-		return true
-	}
-	if bUpper {
-		// Check whether the upper right corner of b
-		// is in a or the upper row of b spans a row
-		// of a.
-		return rowOffset > colOffset+bSize || colOffset < 0
-	}
-	if colOffset < 0 {
-		// Check whether the lower left of a
-		// is in the triangle of b or below
-		// the diagonal of a. This requires a
-		// swap of reference origin.
-		return -rowOffset+aSize > -colOffset
-	}
-	// Check whether the lower left of b
-	// is in the triangle of a or below
-	// the diagonal of a.
-	return rowOffset+bSize > colOffset
-}
--- a/vendor/github.com/gonum/matrix/mat64/vector.go
+++ b/vendor/github.com/gonum/matrix/mat64/vector.go
@ -1,468 +0,0 @@
-// Copyright ©2013 The gonum Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package mat64
-
-import (
-	"github.com/gonum/blas"
-	"github.com/gonum/blas/blas64"
-	"github.com/gonum/internal/asm/f64"
-	"github.com/gonum/matrix"
-)
-
-var (
-	vector *Vector
-
-	_ Matrix = vector
-
-	_ Reseter = vector
-)
-
-// Vector represents a column vector.
-type Vector struct {
-	mat blas64.Vector
-	n   int
-	// A BLAS vector can have a negative increment, but allowing this
-	// in the mat64 type complicates a lot of code, and doesn't gain anything.
-	// Vector must have positive increment in this package.
-}
-
-// NewVector creates a new Vector of length n. If data == nil,
-// a new slice is allocated for the backing slice. If len(data) == n, data is
-// used as the backing slice, and changes to the elements of the returned Vector
-// will be reflected in data. If neither of these is true, NewVector will panic.
-func NewVector(n int, data []float64) *Vector {
-	if len(data) != n && data != nil {
-		panic(matrix.ErrShape)
-	}
-	if data == nil {
-		data = make([]float64, n)
-	}
-	return &Vector{
-		mat: blas64.Vector{
-			Inc:  1,
-			Data: data,
-		},
-		n: n,
-	}
-}
-
-// ViewVec returns a sub-vector view of the receiver starting at element i and
-// extending n rows. If i is out of range, n is zero, or the view extends
-// beyond the bounds of the Vector, ViewVec will panic with ErrIndexOutOfRange.
-// The returned Vector retains reference to the underlying vector.
-//
-// ViewVec is deprecated and should not be used. It will be removed at a later date.
-func (v *Vector) ViewVec(i, n int) *Vector {
-	return v.SliceVec(i, i+n)
-}
-
-// SliceVec returns a new Vector that shares backing data with the receiver.
-// The returned matrix starts at i of the recevier and extends k-i elements.
-// SliceVec panics with ErrIndexOutOfRange if the slice is outside the bounds
-// of the receiver.
-func (v *Vector) SliceVec(i, k int) *Vector {
-	if i < 0 || k <= i || v.n < k {
-		panic(matrix.ErrIndexOutOfRange)
-	}
-	return &Vector{
-		n: k - i,
-		mat: blas64.Vector{
-			Inc:  v.mat.Inc,
-			Data: v.mat.Data[i*v.mat.Inc : (k-1)*v.mat.Inc+1],
-		},
-	}
-}
-
-func (v *Vector) Dims() (r, c int) {
-	if v.isZero() {
-		return 0, 0
-	}
-	return v.n, 1
-}
-
-// Len returns the length of the vector.
-func (v *Vector) Len() int {
-	return v.n
-}
-
-// T performs an implicit transpose by returning the receiver inside a Transpose.
-func (v *Vector) T() Matrix {
-	return Transpose{v}
-}
-
-// Reset zeros the length of the vector so that it can be reused as the
-// receiver of a dimensionally restricted operation.
-//
-// See the Reseter interface for more information.
-func (v *Vector) Reset() {
-	// No change of Inc or n to 0 may be
-	// made unless both are set to 0.
-	v.mat.Inc = 0
-	v.n = 0
-	v.mat.Data = v.mat.Data[:0]
-}
-
-// CloneVec makes a copy of a into the receiver, overwriting the previous value
-// of the receiver.
-func (v *Vector) CloneVec(a *Vector) {
-	if v == a {
-		return
-	}
-	v.n = a.n
-	v.mat = blas64.Vector{
-		Inc:  1,
-		Data: use(v.mat.Data, v.n),
-	}
-	blas64.Copy(v.n, a.mat, v.mat)
-}
-
-func (v *Vector) RawVector() blas64.Vector {
-	return v.mat
-}
-
-// CopyVec makes a copy of elements of a into the receiver. It is similar to the
-// built-in copy; it copies as much as the overlap between the two vectors and
-// returns the number of elements it copied.
-func (v *Vector) CopyVec(a *Vector) int {
-	n := min(v.Len(), a.Len())
-	if v != a {
-		blas64.Copy(n, a.mat, v.mat)
-	}
-	return n
-}
-
-// ScaleVec scales the vector a by alpha, placing the result in the receiver.
-func (v *Vector) ScaleVec(alpha float64, a *Vector) {
-	n := a.Len()
-	if v != a {
-		v.reuseAs(n)
-		if v.mat.Inc == 1 && a.mat.Inc == 1 {
-			f64.ScalUnitaryTo(v.mat.Data, alpha, a.mat.Data)
-			return
-		}
-		f64.ScalIncTo(v.mat.Data, uintptr(v.mat.Inc),
-			alpha, a.mat.Data, uintptr(n), uintptr(a.mat.Inc))
-		return
-	}
-	if v.mat.Inc == 1 {
-		f64.ScalUnitary(alpha, v.mat.Data)
-		return
-	}
-	f64.ScalInc(alpha, v.mat.Data, uintptr(n), uintptr(v.mat.Inc))
-}
-
-// AddScaledVec adds the vectors a and alpha*b, placing the result in the receiver.
-func (v *Vector) AddScaledVec(a *Vector, alpha float64, b *Vector) {
-	if alpha == 1 {
-		v.AddVec(a, b)
-		return
-	}
-	if alpha == -1 {
-		v.SubVec(a, b)
-		return
-	}
-
-	ar := a.Len()
-	br := b.Len()
-
-	if ar != br {
-		panic(matrix.ErrShape)
-	}
-
-	if v != a {
-		v.checkOverlap(a.mat)
-	}
-	if v != b {
-		v.checkOverlap(b.mat)
-	}
-
-	v.reuseAs(ar)
-
-	switch {
-	case alpha == 0: // v <- a
-		v.CopyVec(a)
-	case v == a && v == b: // v <- v + alpha * v = (alpha + 1) * v
-		blas64.Scal(ar, alpha+1, v.mat)
-	case v == a && v != b: // v <- v + alpha * b
-		if v.mat.Inc == 1 && b.mat.Inc == 1 {
-			// Fast path for a common case.
-			f64.AxpyUnitaryTo(v.mat.Data, alpha, b.mat.Data, a.mat.Data)
-		} else {
-			f64.AxpyInc(alpha, b.mat.Data, v.mat.Data,
-				uintptr(ar), uintptr(b.mat.Inc), uintptr(v.mat.Inc), 0, 0)
-		}
-	default: // v <- a + alpha * b or v <- a + alpha * v
-		if v.mat.Inc == 1 && a.mat.Inc == 1 && b.mat.Inc == 1 {
-			// Fast path for a common case.
-			f64.AxpyUnitaryTo(v.mat.Data, alpha, b.mat.Data, a.mat.Data)
-		} else {
-			f64.AxpyIncTo(v.mat.Data, uintptr(v.mat.Inc), 0,
-				alpha, b.mat.Data, a.mat.Data,
-				uintptr(ar), uintptr(b.mat.Inc), uintptr(a.mat.Inc), 0, 0)
-		}
-	}
-}
-
-// AddVec adds the vectors a and b, placing the result in the receiver.
-func (v *Vector) AddVec(a, b *Vector) {
-	ar := a.Len()
-	br := b.Len()
-
-	if ar != br {
-		panic(matrix.ErrShape)
-	}
-
-	if v != a {
-		v.checkOverlap(a.mat)
-	}
-	if v != b {
-		v.checkOverlap(b.mat)
-	}
-
-	v.reuseAs(ar)
-
-	if v.mat.Inc == 1 && a.mat.Inc == 1 && b.mat.Inc == 1 {
-		// Fast path for a common case.
-		f64.AxpyUnitaryTo(v.mat.Data, 1, b.mat.Data, a.mat.Data)
-		return
-	}
-	f64.AxpyIncTo(v.mat.Data, uintptr(v.mat.Inc), 0,
-		1, b.mat.Data, a.mat.Data,
-		uintptr(ar), uintptr(b.mat.Inc), uintptr(a.mat.Inc), 0, 0)
-}
-
-// SubVec subtracts the vector b from a, placing the result in the receiver.
-func (v *Vector) SubVec(a, b *Vector) {
-	ar := a.Len()
-	br := b.Len()
-
-	if ar != br {
-		panic(matrix.ErrShape)
-	}
-
-	if v != a {
-		v.checkOverlap(a.mat)
-	}
-	if v != b {
-		v.checkOverlap(b.mat)
-	}
-
-	v.reuseAs(ar)
-
-	if v.mat.Inc == 1 && a.mat.Inc == 1 && b.mat.Inc == 1 {
-		// Fast path for a common case.
-		f64.AxpyUnitaryTo(v.mat.Data, -1, b.mat.Data, a.mat.Data)
-		return
-	}
-	f64.AxpyIncTo(v.mat.Data, uintptr(v.mat.Inc), 0,
-		-1, b.mat.Data, a.mat.Data,
-		uintptr(ar), uintptr(b.mat.Inc), uintptr(a.mat.Inc), 0, 0)
-}
-
-// MulElemVec performs element-wise multiplication of a and b, placing the result
-// in the receiver.
-func (v *Vector) MulElemVec(a, b *Vector) {
-	ar := a.Len()
-	br := b.Len()
-
-	if ar != br {
-		panic(matrix.ErrShape)
-	}
-
-	if v != a {
-		v.checkOverlap(a.mat)
-	}
-	if v != b {
-		v.checkOverlap(b.mat)
-	}
-
-	v.reuseAs(ar)
-
-	amat, bmat := a.RawVector(), b.RawVector()
-	for i := 0; i < v.n; i++ {
-		v.mat.Data[i*v.mat.Inc] = amat.Data[i*amat.Inc] * bmat.Data[i*bmat.Inc]
-	}
-}
-
-// DivElemVec performs element-wise division of a by b, placing the result
-// in the receiver.
-func (v *Vector) DivElemVec(a, b *Vector) {
-	ar := a.Len()
-	br := b.Len()
-
-	if ar != br {
-		panic(matrix.ErrShape)
-	}
-
-	if v != a {
-		v.checkOverlap(a.mat)
-	}
-	if v != b {
-		v.checkOverlap(b.mat)
-	}
-
-	v.reuseAs(ar)
-
-	amat, bmat := a.RawVector(), b.RawVector()
-	for i := 0; i < v.n; i++ {
-		v.mat.Data[i*v.mat.Inc] = amat.Data[i*amat.Inc] / bmat.Data[i*bmat.Inc]
-	}
-}
-
-// MulVec computes a * b. The result is stored into the receiver.
-// MulVec panics if the number of columns in a does not equal the number of rows in b.
-func (v *Vector) MulVec(a Matrix, b *Vector) {
-	r, c := a.Dims()
-	br := b.Len()
-	if c != br {
-		panic(matrix.ErrShape)
-	}
-
-	if v != b {
-		v.checkOverlap(b.mat)
-	}
-
-	a, trans := untranspose(a)
-	ar, ac := a.Dims()
-	v.reuseAs(r)
-	var restore func()
-	if v == a {
-		v, restore = v.isolatedWorkspace(a.(*Vector))
-		defer restore()
-	} else if v == b {
-		v, restore = v.isolatedWorkspace(b)
-		defer restore()
-	}
-
-	switch a := a.(type) {
-	case *Vector:
-		if v != a {
-			v.checkOverlap(a.mat)
-		}
-
-		if a.Len() == 1 {
-			// {1,1} x {1,n}
-			av := a.At(0, 0)
-			for i := 0; i < b.Len(); i++ {
-				v.mat.Data[i*v.mat.Inc] = av * b.mat.Data[i*b.mat.Inc]
-			}
-			return
-		}
-		if b.Len() == 1 {
-			// {1,n} x {1,1}
-			bv := b.At(0, 0)
-			for i := 0; i < a.Len(); i++ {
-				v.mat.Data[i*v.mat.Inc] = bv * a.mat.Data[i*a.mat.Inc]
-			}
-			return
-		}
-		// {n,1} x {1,n}
-		var sum float64
-		for i := 0; i < c; i++ {
-			sum += a.At(i, 0) * b.At(i, 0)
-		}
-		v.SetVec(0, sum)
-		return
-	case RawSymmetricer:
-		amat := a.RawSymmetric()
-		blas64.Symv(1, amat, b.mat, 0, v.mat)
-	case RawTriangular:
-		v.CopyVec(b)
-		amat := a.RawTriangular()
-		ta := blas.NoTrans
-		if trans {
-			ta = blas.Trans
-		}
-		blas64.Trmv(ta, amat, v.mat)
-	case RawMatrixer:
-		amat := a.RawMatrix()
-		// We don't know that a is a *Dense, so make
-		// a temporary Dense to check overlap.
-		(&Dense{mat: amat}).checkOverlap(v.asGeneral())
-		t := blas.NoTrans
-		if trans {
-			t = blas.Trans
-		}
-		blas64.Gemv(t, 1, amat, b.mat, 0, v.mat)
-	default:
-		if trans {
-			col := make([]float64, ar)
-			for c := 0; c < ac; c++ {
-				for i := range col {
-					col[i] = a.At(i, c)
-				}
-				var f float64
-				for i, e := range col {
-					f += e * b.mat.Data[i*b.mat.Inc]
-				}
-				v.mat.Data[c*v.mat.Inc] = f
-			}
-		} else {
-			row := make([]float64, ac)
-			for r := 0; r < ar; r++ {
-				for i := range row {
-					row[i] = a.At(r, i)
-				}
-				var f float64
-				for i, e := range row {
-					f += e * b.mat.Data[i*b.mat.Inc]
-				}
-				v.mat.Data[r*v.mat.Inc] = f
-			}
-		}
-	}
-}
-
-// reuseAs resizes an empty vector to a r×1 vector,
-// or checks that a non-empty matrix is r×1.
-func (v *Vector) reuseAs(r int) {
-	if v.isZero() {
-		v.mat = blas64.Vector{
-			Inc:  1,
-			Data: use(v.mat.Data, r),
-		}
-		v.n = r
-		return
-	}
-	if r != v.n {
-		panic(matrix.ErrShape)
-	}
-}
-
-func (v *Vector) isZero() bool {
-	// It must be the case that v.Dims() returns
-	// zeros in this case. See comment in Reset().
-	return v.mat.Inc == 0
-}
-
-func (v *Vector) isolatedWorkspace(a *Vector) (n *Vector, restore func()) {
-	l := a.Len()
-	n = getWorkspaceVec(l, false)
-	return n, func() {
-		v.CopyVec(n)
-		putWorkspaceVec(n)
-	}
-}
-
-// asDense returns a Dense representation of the receiver with the same
-// underlying data.
-func (v *Vector) asDense() *Dense {
-	return &Dense{
-		mat:     v.asGeneral(),
-		capRows: v.n,
-		capCols: 1,
-	}
-}
-
-// asGeneral returns a blas64.General representation of the receiver with the
-// same underlying data.
-func (v *Vector) asGeneral() blas64.General {
-	return blas64.General{
-		Rows:   v.n,
-		Cols:   1,
-		Stride: v.mat.Inc,
-		Data:   v.mat.Data,
-	}
-}
--- a/vendor/github.com/gonum/plot/plotter/bubbles.go
+++ b/vendor/github.com/gonum/plot/plotter/bubbles.go
@ -1,118 +0,0 @@
-// Copyright ©2015 The gonum Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package plotter
-
-import (
-	"errors"
-	"image/color"
-	"math"
-
-	"github.com/gonum/plot"
-	"github.com/gonum/plot/vg"
-	"github.com/gonum/plot/vg/draw"
-)
-
-// Bubbles implements the Plotter interface, drawing
-// a bubble plot of x, y, z triples where the z value
-// determines the radius of the bubble.
-type Bubbles struct {
-	XYZs
-
-	// Color is the color of the bubbles.
-	color.Color
-
-	// MinRadius and MaxRadius give the minimum
-	// and maximum bubble radius respectively.
-	// The radii of each bubble is interpolated linearly
-	// between these two values.
-	MinRadius, MaxRadius vg.Length
-
-	// MinZ and MaxZ are the minimum and
-	// maximum Z values from the data.
-	MinZ, MaxZ float64
-}
-
-// NewBubbles creates as new bubble plot plotter for
-// the given data, with a minimum and maximum
-// bubble radius.
-func NewBubbles(xyz XYZer, min, max vg.Length) (*Bubbles, error) {
-	cpy, err := CopyXYZs(xyz)
-	if err != nil {
-		return nil, err
-	}
-	if min > max {
-		return nil, errors.New("Min bubble radius is greater than the max radius")
-	}
-	minz := cpy[0].Z
-	maxz := cpy[0].Z
-	for _, d := range cpy {
-		minz = math.Min(minz, d.Z)
-		maxz = math.Max(maxz, d.Z)
-	}
-	return &Bubbles{
-		XYZs:      cpy,
-		MinRadius: min,
-		MaxRadius: max,
-		MinZ:      minz,
-		MaxZ:      maxz,
-	}, nil
-}
-
-// Plot implements the Plot method of the plot.Plotter interface.
-func (bs *Bubbles) Plot(c draw.Canvas, plt *plot.Plot) {
-	trX, trY := plt.Transforms(&c)
-
-	c.SetColor(bs.Color)
-
-	for _, d := range bs.XYZs {
-		x := trX(d.X)
-		y := trY(d.Y)
-		pt := vg.Point{X: x, Y: y}
-		if !c.Contains(pt) {
-			continue
-		}
-
-		rad := bs.radius(d.Z)
-
-		// draw a circle centered at x, y
-		var p vg.Path
-		p.Move(vg.Point{X: x + rad, Y: y})
-		p.Arc(pt, rad, 0, 2*math.Pi)
-		p.Close()
-		c.Fill(p)
-	}
-}
-
-// radius returns the radius of a bubble by linear interpolation.
-func (bs *Bubbles) radius(z float64) vg.Length {
-	rng := bs.MaxRadius - bs.MinRadius
-	if bs.MaxZ == bs.MinZ {
-		return rng/2 + bs.MinRadius
-	}
-	d := (z - bs.MinZ) / (bs.MaxZ - bs.MinZ)
-	return vg.Length(d)*rng + bs.MinRadius
-}
-
-// DataRange implements the DataRange method
-// of the plot.DataRanger interface.
-func (bs *Bubbles) DataRange() (xmin, xmax, ymin, ymax float64) {
-	return XYRange(XYValues{bs.XYZs})
-}
-
-// GlyphBoxes implements the GlyphBoxes method
-// of the plot.GlyphBoxer interface.
-func (bs *Bubbles) GlyphBoxes(plt *plot.Plot) []plot.GlyphBox {
-	boxes := make([]plot.GlyphBox, len(bs.XYZs))
-	for i, d := range bs.XYZs {
-		boxes[i].X = plt.X.Norm(d.X)
-		boxes[i].Y = plt.Y.Norm(d.Y)
-		r := bs.radius(d.Z)
-		boxes[i].Rectangle = vg.Rectangle{
-			Min: vg.Point{X: -r, Y: -r},
-			Max: vg.Point{X: +r, Y: +r},
-		}
-	}
-	return boxes
-}
--- a/vendor/gonum.org/v1/gonum/LICENSE
+++ b/vendor/gonum.org/v1/gonum/LICENSE
@ -0,0 +1,23 @@
+Copyright ©2013 The Gonum Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the gonum project nor the names of its authors and
+      contributors may be used to endorse or promote products derived from this
+      software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/vendor/gonum.org/v1/gonum/blas/blas.go
+++ b/vendor/gonum.org/v1/gonum/blas/blas.go
@ -1,110 +1,9 @@
-// Copyright ©2013 The gonum Authors. All rights reserved.
+// Copyright ©2013 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-/*
-Package blas provides interfaces for the BLAS linear algebra standard.
+//go:generate ./conversions.bash

-All methods must perform appropriate parameter checking and panic if
-provided parameters that do not conform to the requirements specified
-by the BLAS standard.
-
-Quick Reference Guide to the BLAS from http://www.netlib.org/lapack/lug/node145.html
-
-This version is modified to remove the "order" option. All matrix operations are
-on row-order matrices.
-
-Level 1 BLAS
-
-	        dim scalar vector   vector   scalars              5-element prefixes
-	                                                          struct
-
-	_rotg (                                      a, b )                S, D
-	_rotmg(                              d1, d2, a, b )                S, D
-	_rot  ( n,         x, incX, y, incY,               c, s )          S, D
-	_rotm ( n,         x, incX, y, incY,                      param )  S, D
-	_swap ( n,         x, incX, y, incY )                              S, D, C, Z
-	_scal ( n,  alpha, x, incX )                                       S, D, C, Z, Cs, Zd
-	_copy ( n,         x, incX, y, incY )                              S, D, C, Z
-	_axpy ( n,  alpha, x, incX, y, incY )                              S, D, C, Z
-	_dot  ( n,         x, incX, y, incY )                              S, D, Ds
-	_dotu ( n,         x, incX, y, incY )                              C, Z
-	_dotc ( n,         x, incX, y, incY )                              C, Z
-	__dot ( n,  alpha, x, incX, y, incY )                              Sds
-	_nrm2 ( n,         x, incX )                                       S, D, Sc, Dz
-	_asum ( n,         x, incX )                                       S, D, Sc, Dz
-	I_amax( n,         x, incX )                                       s, d, c, z
-
-Level 2 BLAS
-
-	        options                   dim   b-width scalar matrix  vector   scalar vector   prefixes
-
-	_gemv (        trans,      m, n,         alpha, a, lda, x, incX, beta,  y, incY ) S, D, C, Z
-	_gbmv (        trans,      m, n, kL, kU, alpha, a, lda, x, incX, beta,  y, incY ) S, D, C, Z
-	_hemv ( uplo,                 n,         alpha, a, lda, x, incX, beta,  y, incY ) C, Z
-	_hbmv ( uplo,                 n, k,      alpha, a, lda, x, incX, beta,  y, incY ) C, Z
-	_hpmv ( uplo,                 n,         alpha, ap,     x, incX, beta,  y, incY ) C, Z
-	_symv ( uplo,                 n,         alpha, a, lda, x, incX, beta,  y, incY ) S, D
-	_sbmv ( uplo,                 n, k,      alpha, a, lda, x, incX, beta,  y, incY ) S, D
-	_spmv ( uplo,                 n,         alpha, ap,     x, incX, beta,  y, incY ) S, D
-	_trmv ( uplo, trans, diag,    n,                a, lda, x, incX )                 S, D, C, Z
-	_tbmv ( uplo, trans, diag,    n, k,             a, lda, x, incX )                 S, D, C, Z
-	_tpmv ( uplo, trans, diag,    n,                ap,     x, incX )                 S, D, C, Z
-	_trsv ( uplo, trans, diag,    n,                a, lda, x, incX )                 S, D, C, Z
-	_tbsv ( uplo, trans, diag,    n, k,             a, lda, x, incX )                 S, D, C, Z
-	_tpsv ( uplo, trans, diag,    n,                ap,     x, incX )                 S, D, C, Z
-
-	        options                   dim   scalar vector   vector   matrix  prefixes
-
-	_ger  (                    m, n, alpha, x, incX, y, incY, a, lda ) S, D
-	_geru (                    m, n, alpha, x, incX, y, incY, a, lda ) C, Z
-	_gerc (                    m, n, alpha, x, incX, y, incY, a, lda ) C, Z
-	_her  ( uplo,                 n, alpha, x, incX,          a, lda ) C, Z
-	_hpr  ( uplo,                 n, alpha, x, incX,          ap )     C, Z
-	_her2 ( uplo,                 n, alpha, x, incX, y, incY, a, lda ) C, Z
-	_hpr2 ( uplo,                 n, alpha, x, incX, y, incY, ap )     C, Z
-	_syr  ( uplo,                 n, alpha, x, incX,          a, lda ) S, D
-	_spr  ( uplo,                 n, alpha, x, incX,          ap )     S, D
-	_syr2 ( uplo,                 n, alpha, x, incX, y, incY, a, lda ) S, D
-	_spr2 ( uplo,                 n, alpha, x, incX, y, incY, ap )     S, D
-
-Level 3 BLAS
-
-	        options                                 dim      scalar matrix  matrix  scalar matrix  prefixes
-
-	_gemm (             transA, transB,      m, n, k, alpha, a, lda, b, ldb, beta,  c, ldc ) S, D, C, Z
-	_symm ( side, uplo,                      m, n,    alpha, a, lda, b, ldb, beta,  c, ldc ) S, D, C, Z
-	_hemm ( side, uplo,                      m, n,    alpha, a, lda, b, ldb, beta,  c, ldc ) C, Z
-	_syrk (       uplo, trans,                  n, k, alpha, a, lda,         beta,  c, ldc ) S, D, C, Z
-	_herk (       uplo, trans,                  n, k, alpha, a, lda,         beta,  c, ldc ) C, Z
-	_syr2k(       uplo, trans,                  n, k, alpha, a, lda, b, ldb, beta,  c, ldc ) S, D, C, Z
-	_her2k(       uplo, trans,                  n, k, alpha, a, lda, b, ldb, beta,  c, ldc ) C, Z
-	_trmm ( side, uplo, transA,        diag, m, n,    alpha, a, lda, b, ldb )                S, D, C, Z
-	_trsm ( side, uplo, transA,        diag, m, n,    alpha, a, lda, b, ldb )                S, D, C, Z
-
-Meaning of prefixes
-
-	S - float32	C - complex64
-	D - float64	Z - complex128
-
-Matrix types
-
-	GE - GEneral 		GB - General Band
-	SY - SYmmetric 		SB - Symmetric Band 	SP - Symmetric Packed
-	HE - HErmitian 		HB - Hermitian Band 	HP - Hermitian Packed
-	TR - TRiangular 	TB - Triangular Band 	TP - Triangular Packed
-
-Options
-
-	trans 	= NoTrans, Trans, ConjTrans
-	uplo 	= Upper, Lower
-	diag 	= Nonunit, Unit
-	side 	= Left, Right (A or op(A) on the left, or A or op(A) on the right)
-
-For real matrices, Trans and ConjTrans have the same meaning.
-For Hermitian matrices, trans = Trans is not allowed.
-For complex symmetric matrices, trans = ConjTrans is not allowed.
-*/
 package blas

 // Flag constants indicate Givens transformation H matrix state.
--- a/vendor/gonum.org/v1/gonum/blas/blas64/blas64.go
+++ b/vendor/gonum.org/v1/gonum/blas/blas64/blas64.go
@ -1,16 +1,15 @@
-// Copyright ©2015 The gonum Authors. All rights reserved.
+// Copyright ©2015 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-// Package blas64 provides a simple interface to the float64 BLAS API.
 package blas64

 import (
-	"github.com/gonum/blas"
-	"github.com/gonum/blas/native"
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/gonum"
 )

-var blas64 blas.Float64 = native.Implementation{}
+var blas64 blas.Float64 = gonum.Implementation{}

 // Use sets the BLAS float64 implementation to be used by subsequent BLAS calls.
 // The default implementation is native.Implementation.
--- a/vendor/gonum.org/v1/gonum/blas/blas64/conv.go
+++ b/vendor/gonum.org/v1/gonum/blas/blas64/conv.go
@ -0,0 +1,277 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package blas64
+
+import "gonum.org/v1/gonum/blas"
+
+// GeneralCols represents a matrix using the conventional column-major storage scheme.
+type GeneralCols General
+
+// From fills the receiver with elements from a. The receiver
+// must have the same dimensions as a and have adequate backing
+// data storage.
+func (t GeneralCols) From(a General) {
+	if t.Rows != a.Rows || t.Cols != a.Cols {
+		panic("blas64: mismatched dimension")
+	}
+	if len(t.Data) < (t.Cols-1)*t.Stride+t.Rows {
+		panic("blas64: short data slice")
+	}
+	for i := 0; i < a.Rows; i++ {
+		for j, v := range a.Data[i*a.Stride : i*a.Stride+a.Cols] {
+			t.Data[i+j*t.Stride] = v
+		}
+	}
+}
+
+// From fills the receiver with elements from a. The receiver
+// must have the same dimensions as a and have adequate backing
+// data storage.
+func (t General) From(a GeneralCols) {
+	if t.Rows != a.Rows || t.Cols != a.Cols {
+		panic("blas64: mismatched dimension")
+	}
+	if len(t.Data) < (t.Rows-1)*t.Stride+t.Cols {
+		panic("blas64: short data slice")
+	}
+	for j := 0; j < a.Cols; j++ {
+		for i, v := range a.Data[j*a.Stride : j*a.Stride+a.Rows] {
+			t.Data[i*t.Stride+j] = v
+		}
+	}
+}
+
+// TriangularCols represents a matrix using the conventional column-major storage scheme.
+type TriangularCols Triangular
+
+// From fills the receiver with elements from a. The receiver
+// must have the same dimensions, uplo and diag as a and have
+// adequate backing data storage.
+func (t TriangularCols) From(a Triangular) {
+	if t.N != a.N {
+		panic("blas64: mismatched dimension")
+	}
+	if t.Uplo != a.Uplo {
+		panic("blas64: mismatched BLAS uplo")
+	}
+	if t.Diag != a.Diag {
+		panic("blas64: mismatched BLAS diag")
+	}
+	switch a.Uplo {
+	default:
+		panic("blas64: bad BLAS uplo")
+	case blas.Upper:
+		for i := 0; i < a.N; i++ {
+			for j := i; j < a.N; j++ {
+				t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
+			}
+		}
+	case blas.Lower:
+		for i := 0; i < a.N; i++ {
+			for j := 0; j <= i; j++ {
+				t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
+			}
+		}
+	case blas.All:
+		for i := 0; i < a.N; i++ {
+			for j := 0; j < a.N; j++ {
+				t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
+			}
+		}
+	}
+}
+
+// From fills the receiver with elements from a. The receiver
+// must have the same dimensions, uplo and diag as a and have
+// adequate backing data storage.
+func (t Triangular) From(a TriangularCols) {
+	if t.N != a.N {
+		panic("blas64: mismatched dimension")
+	}
+	if t.Uplo != a.Uplo {
+		panic("blas64: mismatched BLAS uplo")
+	}
+	if t.Diag != a.Diag {
+		panic("blas64: mismatched BLAS diag")
+	}
+	switch a.Uplo {
+	default:
+		panic("blas64: bad BLAS uplo")
+	case blas.Upper:
+		for i := 0; i < a.N; i++ {
+			for j := i; j < a.N; j++ {
+				t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
+			}
+		}
+	case blas.Lower:
+		for i := 0; i < a.N; i++ {
+			for j := 0; j <= i; j++ {
+				t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
+			}
+		}
+	case blas.All:
+		for i := 0; i < a.N; i++ {
+			for j := 0; j < a.N; j++ {
+				t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
+			}
+		}
+	}
+}
+
+// BandCols represents a matrix using the band column-major storage scheme.
+type BandCols Band
+
+// From fills the receiver with elements from a. The receiver
+// must have the same dimensions and bandwidth as a and have
+// adequate backing data storage.
+func (t BandCols) From(a Band) {
+	if t.Rows != a.Rows || t.Cols != a.Cols {
+		panic("blas64: mismatched dimension")
+	}
+	if t.KL != a.KL || t.KU != a.KU {
+		panic("blas64: mismatched bandwidth")
+	}
+	if a.Stride < a.KL+a.KU+1 {
+		panic("blas64: short stride for source")
+	}
+	if t.Stride < t.KL+t.KU+1 {
+		panic("blas64: short stride for destination")
+	}
+	for i := 0; i < a.Rows; i++ {
+		for j := max(0, i-a.KL); j < min(i+a.KU+1, a.Cols); j++ {
+			t.Data[i+t.KU-j+j*t.Stride] = a.Data[j+a.KL-i+i*a.Stride]
+		}
+	}
+}
+
+// From fills the receiver with elements from a. The receiver
+// must have the same dimensions and bandwidth as a and have
+// adequate backing data storage.
+func (t Band) From(a BandCols) {
+	if t.Rows != a.Rows || t.Cols != a.Cols {
+		panic("blas64: mismatched dimension")
+	}
+	if t.KL != a.KL || t.KU != a.KU {
+		panic("blas64: mismatched bandwidth")
+	}
+	if a.Stride < a.KL+a.KU+1 {
+		panic("blas64: short stride for source")
+	}
+	if t.Stride < t.KL+t.KU+1 {
+		panic("blas64: short stride for destination")
+	}
+	for j := 0; j < a.Cols; j++ {
+		for i := max(0, j-a.KU); i < min(j+a.KL+1, a.Rows); i++ {
+			t.Data[j+a.KL-i+i*a.Stride] = a.Data[i+t.KU-j+j*t.Stride]
+		}
+	}
+}
+
+// TriangularBandCols represents a symmetric matrix using the band column-major storage scheme.
+type TriangularBandCols TriangularBand
+
+// From fills the receiver with elements from a. The receiver
+// must have the same dimensions, bandwidth and uplo as a and
+// have adequate backing data storage.
+func (t TriangularBandCols) From(a TriangularBand) {
+	if t.N != a.N {
+		panic("blas64: mismatched dimension")
+	}
+	if t.K != a.K {
+		panic("blas64: mismatched bandwidth")
+	}
+	if a.Stride < a.K+1 {
+		panic("blas64: short stride for source")
+	}
+	if t.Stride < t.K+1 {
+		panic("blas64: short stride for destination")
+	}
+	if t.Uplo != a.Uplo {
+		panic("blas64: mismatched BLAS uplo")
+	}
+	if t.Diag != a.Diag {
+		panic("blas64: mismatched BLAS diag")
+	}
+	dst := BandCols{
+		Rows: t.N, Cols: t.N,
+		Stride: t.Stride,
+		Data:   t.Data,
+	}
+	src := Band{
+		Rows: a.N, Cols: a.N,
+		Stride: a.Stride,
+		Data:   a.Data,
+	}
+	switch a.Uplo {
+	default:
+		panic("blas64: bad BLAS uplo")
+	case blas.Upper:
+		dst.KU = t.K
+		src.KU = a.K
+	case blas.Lower:
+		dst.KL = t.K
+		src.KL = a.K
+	}
+	dst.From(src)
+}
+
+// From fills the receiver with elements from a. The receiver
+// must have the same dimensions, bandwidth and uplo as a and
+// have adequate backing data storage.
+func (t TriangularBand) From(a TriangularBandCols) {
+	if t.N != a.N {
+		panic("blas64: mismatched dimension")
+	}
+	if t.K != a.K {
+		panic("blas64: mismatched bandwidth")
+	}
+	if a.Stride < a.K+1 {
+		panic("blas64: short stride for source")
+	}
+	if t.Stride < t.K+1 {
+		panic("blas64: short stride for destination")
+	}
+	if t.Uplo != a.Uplo {
+		panic("blas64: mismatched BLAS uplo")
+	}
+	if t.Diag != a.Diag {
+		panic("blas64: mismatched BLAS diag")
+	}
+	dst := Band{
+		Rows: t.N, Cols: t.N,
+		Stride: t.Stride,
+		Data:   t.Data,
+	}
+	src := BandCols{
+		Rows: a.N, Cols: a.N,
+		Stride: a.Stride,
+		Data:   a.Data,
+	}
+	switch a.Uplo {
+	default:
+		panic("blas64: bad BLAS uplo")
+	case blas.Upper:
+		dst.KU = t.K
+		src.KU = a.K
+	case blas.Lower:
+		dst.KL = t.K
+		src.KL = a.K
+	}
+	dst.From(src)
+}
+
+func min(a, b int) int {
+	if a < b {
+		return a
+	}
+	return b
+}
+
+func max(a, b int) int {
+	if a > b {
+		return a
+	}
+	return b
+}
--- a/vendor/gonum.org/v1/gonum/blas/blas64/conv_symmetric.go
+++ b/vendor/gonum.org/v1/gonum/blas/blas64/conv_symmetric.go
@ -0,0 +1,153 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package blas64
+
+import "gonum.org/v1/gonum/blas"
+
+// SymmetricCols represents a matrix using the conventional column-major storage scheme.
+type SymmetricCols Symmetric
+
+// From fills the receiver with elements from a. The receiver
+// must have the same dimensions and uplo as a and have adequate
+// backing data storage.
+func (t SymmetricCols) From(a Symmetric) {
+	if t.N != a.N {
+		panic("blas64: mismatched dimension")
+	}
+	if t.Uplo != a.Uplo {
+		panic("blas64: mismatched BLAS uplo")
+	}
+	switch a.Uplo {
+	default:
+		panic("blas64: bad BLAS uplo")
+	case blas.Upper:
+		for i := 0; i < a.N; i++ {
+			for j := i; j < a.N; j++ {
+				t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
+			}
+		}
+	case blas.Lower:
+		for i := 0; i < a.N; i++ {
+			for j := 0; j <= i; j++ {
+				t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
+			}
+		}
+	}
+}
+
+// From fills the receiver with elements from a. The receiver
+// must have the same dimensions and uplo as a and have adequate
+// backing data storage.
+func (t Symmetric) From(a SymmetricCols) {
+	if t.N != a.N {
+		panic("blas64: mismatched dimension")
+	}
+	if t.Uplo != a.Uplo {
+		panic("blas64: mismatched BLAS uplo")
+	}
+	switch a.Uplo {
+	default:
+		panic("blas64: bad BLAS uplo")
+	case blas.Upper:
+		for i := 0; i < a.N; i++ {
+			for j := i; j < a.N; j++ {
+				t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
+			}
+		}
+	case blas.Lower:
+		for i := 0; i < a.N; i++ {
+			for j := 0; j <= i; j++ {
+				t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
+			}
+		}
+	}
+}
+
+// SymmetricBandCols represents a symmetric matrix using the band column-major storage scheme.
+type SymmetricBandCols SymmetricBand
+
+// From fills the receiver with elements from a. The receiver
+// must have the same dimensions, bandwidth and uplo as a and
+// have adequate backing data storage.
+func (t SymmetricBandCols) From(a SymmetricBand) {
+	if t.N != a.N {
+		panic("blas64: mismatched dimension")
+	}
+	if t.K != a.K {
+		panic("blas64: mismatched bandwidth")
+	}
+	if a.Stride < a.K+1 {
+		panic("blas64: short stride for source")
+	}
+	if t.Stride < t.K+1 {
+		panic("blas64: short stride for destination")
+	}
+	if t.Uplo != a.Uplo {
+		panic("blas64: mismatched BLAS uplo")
+	}
+	dst := BandCols{
+		Rows: t.N, Cols: t.N,
+		Stride: t.Stride,
+		Data:   t.Data,
+	}
+	src := Band{
+		Rows: a.N, Cols: a.N,
+		Stride: a.Stride,
+		Data:   a.Data,
+	}
+	switch a.Uplo {
+	default:
+		panic("blas64: bad BLAS uplo")
+	case blas.Upper:
+		dst.KU = t.K
+		src.KU = a.K
+	case blas.Lower:
+		dst.KL = t.K
+		src.KL = a.K
+	}
+	dst.From(src)
+}
+
+// From fills the receiver with elements from a. The receiver
+// must have the same dimensions, bandwidth and uplo as a and
+// have adequate backing data storage.
+func (t SymmetricBand) From(a SymmetricBandCols) {
+	if t.N != a.N {
+		panic("blas64: mismatched dimension")
+	}
+	if t.K != a.K {
+		panic("blas64: mismatched bandwidth")
+	}
+	if a.Stride < a.K+1 {
+		panic("blas64: short stride for source")
+	}
+	if t.Stride < t.K+1 {
+		panic("blas64: short stride for destination")
+	}
+	if t.Uplo != a.Uplo {
+		panic("blas64: mismatched BLAS uplo")
+	}
+	dst := Band{
+		Rows: t.N, Cols: t.N,
+		Stride: t.Stride,
+		Data:   t.Data,
+	}
+	src := BandCols{
+		Rows: a.N, Cols: a.N,
+		Stride: a.Stride,
+		Data:   a.Data,
+	}
+	switch a.Uplo {
+	default:
+		panic("blas64: bad BLAS uplo")
+	case blas.Upper:
+		dst.KU = t.K
+		src.KU = a.K
+	case blas.Lower:
+		dst.KL = t.K
+		src.KL = a.K
+	}
+	dst.From(src)
+}
--- a/vendor/gonum.org/v1/gonum/blas/blas64/doc.go
+++ b/vendor/gonum.org/v1/gonum/blas/blas64/doc.go
@ -0,0 +1,6 @@
+// Copyright ©2017 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package blas64 provides a simple interface to the float64 BLAS API.
+package blas64 // import "gonum.org/v1/gonum/blas/blas64"
--- a/vendor/gonum.org/v1/gonum/blas/doc.go
+++ b/vendor/gonum.org/v1/gonum/blas/doc.go
@ -0,0 +1,108 @@
+// Copyright ©2017 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+Package blas provides interfaces for the BLAS linear algebra standard.
+
+All methods must perform appropriate parameter checking and panic if
+provided parameters that do not conform to the requirements specified
+by the BLAS standard.
+
+Quick Reference Guide to the BLAS from http://www.netlib.org/lapack/lug/node145.html
+
+This version is modified to remove the "order" option. All matrix operations are
+on row-order matrices.
+
+Level 1 BLAS
+
+	        dim scalar vector   vector   scalars              5-element prefixes
+	                                                          struct
+
+	_rotg (                                      a, b )                S, D
+	_rotmg(                              d1, d2, a, b )                S, D
+	_rot  ( n,         x, incX, y, incY,               c, s )          S, D
+	_rotm ( n,         x, incX, y, incY,                      param )  S, D
+	_swap ( n,         x, incX, y, incY )                              S, D, C, Z
+	_scal ( n,  alpha, x, incX )                                       S, D, C, Z, Cs, Zd
+	_copy ( n,         x, incX, y, incY )                              S, D, C, Z
+	_axpy ( n,  alpha, x, incX, y, incY )                              S, D, C, Z
+	_dot  ( n,         x, incX, y, incY )                              S, D, Ds
+	_dotu ( n,         x, incX, y, incY )                              C, Z
+	_dotc ( n,         x, incX, y, incY )                              C, Z
+	__dot ( n,  alpha, x, incX, y, incY )                              Sds
+	_nrm2 ( n,         x, incX )                                       S, D, Sc, Dz
+	_asum ( n,         x, incX )                                       S, D, Sc, Dz
+	I_amax( n,         x, incX )                                       s, d, c, z
+
+Level 2 BLAS
+
+	        options                   dim   b-width scalar matrix  vector   scalar vector   prefixes
+
+	_gemv (        trans,      m, n,         alpha, a, lda, x, incX, beta,  y, incY ) S, D, C, Z
+	_gbmv (        trans,      m, n, kL, kU, alpha, a, lda, x, incX, beta,  y, incY ) S, D, C, Z
+	_hemv ( uplo,                 n,         alpha, a, lda, x, incX, beta,  y, incY ) C, Z
+	_hbmv ( uplo,                 n, k,      alpha, a, lda, x, incX, beta,  y, incY ) C, Z
+	_hpmv ( uplo,                 n,         alpha, ap,     x, incX, beta,  y, incY ) C, Z
+	_symv ( uplo,                 n,         alpha, a, lda, x, incX, beta,  y, incY ) S, D
+	_sbmv ( uplo,                 n, k,      alpha, a, lda, x, incX, beta,  y, incY ) S, D
+	_spmv ( uplo,                 n,         alpha, ap,     x, incX, beta,  y, incY ) S, D
+	_trmv ( uplo, trans, diag,    n,                a, lda, x, incX )                 S, D, C, Z
+	_tbmv ( uplo, trans, diag,    n, k,             a, lda, x, incX )                 S, D, C, Z
+	_tpmv ( uplo, trans, diag,    n,                ap,     x, incX )                 S, D, C, Z
+	_trsv ( uplo, trans, diag,    n,                a, lda, x, incX )                 S, D, C, Z
+	_tbsv ( uplo, trans, diag,    n, k,             a, lda, x, incX )                 S, D, C, Z
+	_tpsv ( uplo, trans, diag,    n,                ap,     x, incX )                 S, D, C, Z
+
+	        options                   dim   scalar vector   vector   matrix  prefixes
+
+	_ger  (                    m, n, alpha, x, incX, y, incY, a, lda ) S, D
+	_geru (                    m, n, alpha, x, incX, y, incY, a, lda ) C, Z
+	_gerc (                    m, n, alpha, x, incX, y, incY, a, lda ) C, Z
+	_her  ( uplo,                 n, alpha, x, incX,          a, lda ) C, Z
+	_hpr  ( uplo,                 n, alpha, x, incX,          ap )     C, Z
+	_her2 ( uplo,                 n, alpha, x, incX, y, incY, a, lda ) C, Z
+	_hpr2 ( uplo,                 n, alpha, x, incX, y, incY, ap )     C, Z
+	_syr  ( uplo,                 n, alpha, x, incX,          a, lda ) S, D
+	_spr  ( uplo,                 n, alpha, x, incX,          ap )     S, D
+	_syr2 ( uplo,                 n, alpha, x, incX, y, incY, a, lda ) S, D
+	_spr2 ( uplo,                 n, alpha, x, incX, y, incY, ap )     S, D
+
+Level 3 BLAS
+
+	        options                                 dim      scalar matrix  matrix  scalar matrix  prefixes
+
+	_gemm (             transA, transB,      m, n, k, alpha, a, lda, b, ldb, beta,  c, ldc ) S, D, C, Z
+	_symm ( side, uplo,                      m, n,    alpha, a, lda, b, ldb, beta,  c, ldc ) S, D, C, Z
+	_hemm ( side, uplo,                      m, n,    alpha, a, lda, b, ldb, beta,  c, ldc ) C, Z
+	_syrk (       uplo, trans,                  n, k, alpha, a, lda,         beta,  c, ldc ) S, D, C, Z
+	_herk (       uplo, trans,                  n, k, alpha, a, lda,         beta,  c, ldc ) C, Z
+	_syr2k(       uplo, trans,                  n, k, alpha, a, lda, b, ldb, beta,  c, ldc ) S, D, C, Z
+	_her2k(       uplo, trans,                  n, k, alpha, a, lda, b, ldb, beta,  c, ldc ) C, Z
+	_trmm ( side, uplo, transA,        diag, m, n,    alpha, a, lda, b, ldb )                S, D, C, Z
+	_trsm ( side, uplo, transA,        diag, m, n,    alpha, a, lda, b, ldb )                S, D, C, Z
+
+Meaning of prefixes
+
+	S - float32	C - complex64
+	D - float64	Z - complex128
+
+Matrix types
+
+	GE - GEneral 		GB - General Band
+	SY - SYmmetric 		SB - Symmetric Band 	SP - Symmetric Packed
+	HE - HErmitian 		HB - Hermitian Band 	HP - Hermitian Packed
+	TR - TRiangular 	TB - Triangular Band 	TP - Triangular Packed
+
+Options
+
+	trans 	= NoTrans, Trans, ConjTrans
+	uplo 	= Upper, Lower
+	diag 	= Nonunit, Unit
+	side 	= Left, Right (A or op(A) on the left, or A or op(A) on the right)
+
+For real matrices, Trans and ConjTrans have the same meaning.
+For Hermitian matrices, trans = Trans is not allowed.
+For complex symmetric matrices, trans = ConjTrans is not allowed.
+*/
+package blas // import "gonum.org/v1/gonum/blas"
--- a/vendor/gonum.org/v1/gonum/blas/gonum/cmplx.go
+++ b/vendor/gonum.org/v1/gonum/blas/gonum/cmplx.go
@ -0,0 +1,185 @@
+// Copyright ©2017 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "gonum.org/v1/gonum/blas"
+
+var (
+	_ blas.Complex64  = Implementation{}
+	_ blas.Complex128 = Implementation{}
+)
+
+// TODO(btracey): Replace this as complex routines are added, and instead
+// automatically generate the complex64 routines from the complex128 ones.
+
+var noComplex = "native: implementation does not implement this routine, see the cgo wrapper in gonum.org/v1/netlib/blas"
+
+// Level 1 complex64 routines.
+
+func (Implementation) Cdotu(n int, x []complex64, incX int, y []complex64, incY int) (dotu complex64) {
+	panic(noComplex)
+}
+func (Implementation) Cdotc(n int, x []complex64, incX int, y []complex64, incY int) (dotc complex64) {
+	panic(noComplex)
+}
+func (Implementation) Scnrm2(n int, x []complex64, incX int) float32 {
+	panic(noComplex)
+}
+func (Implementation) Scasum(n int, x []complex64, incX int) float32 {
+	panic(noComplex)
+}
+func (Implementation) Icamax(n int, x []complex64, incX int) int {
+	panic(noComplex)
+}
+func (Implementation) Cswap(n int, x []complex64, incX int, y []complex64, incY int) {
+	panic(noComplex)
+}
+func (Implementation) Ccopy(n int, x []complex64, incX int, y []complex64, incY int) {
+	panic(noComplex)
+}
+func (Implementation) Caxpy(n int, alpha complex64, x []complex64, incX int, y []complex64, incY int) {
+	panic(noComplex)
+}
+func (Implementation) Cscal(n int, alpha complex64, x []complex64, incX int) {
+	panic(noComplex)
+}
+func (Implementation) Csscal(n int, alpha float32, x []complex64, incX int) {
+	panic(noComplex)
+}
+
+// Level 2 complex64 routines.
+
+func (Implementation) Cgemv(tA blas.Transpose, m, n int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) {
+	panic(noComplex)
+}
+func (Implementation) Cgbmv(tA blas.Transpose, m, n, kL, kU int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) {
+	panic(noComplex)
+}
+func (Implementation) Ctrmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []complex64, lda int, x []complex64, incX int) {
+	panic(noComplex)
+}
+func (Implementation) Ctbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex64, lda int, x []complex64, incX int) {
+	panic(noComplex)
+}
+func (Implementation) Ctpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []complex64, x []complex64, incX int) {
+	panic(noComplex)
+}
+func (Implementation) Ctrsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []complex64, lda int, x []complex64, incX int) {
+	panic(noComplex)
+}
+func (Implementation) Ctbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex64, lda int, x []complex64, incX int) {
+	panic(noComplex)
+}
+func (Implementation) Ctpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []complex64, x []complex64, incX int) {
+	panic(noComplex)
+}
+func (Implementation) Chemv(ul blas.Uplo, n int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) {
+	panic(noComplex)
+}
+func (Implementation) Chbmv(ul blas.Uplo, n, k int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) {
+	panic(noComplex)
+}
+func (Implementation) Chpmv(ul blas.Uplo, n int, alpha complex64, ap []complex64, x []complex64, incX int, beta complex64, y []complex64, incY int) {
+	panic(noComplex)
+}
+func (Implementation) Cgeru(m, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int) {
+	panic(noComplex)
+}
+func (Implementation) Cgerc(m, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int) {
+	panic(noComplex)
+}
+func (Implementation) Cher(ul blas.Uplo, n int, alpha float32, x []complex64, incX int, a []complex64, lda int) {
+	panic(noComplex)
+}
+func (Implementation) Chpr(ul blas.Uplo, n int, alpha float32, x []complex64, incX int, a []complex64) {
+	panic(noComplex)
+}
+func (Implementation) Cher2(ul blas.Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int) {
+	panic(noComplex)
+}
+func (Implementation) Chpr2(ul blas.Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, ap []complex64) {
+	panic(noComplex)
+}
+
+// Level 3 complex64 routines.
+
+func (Implementation) Cgemm(tA, tB blas.Transpose, m, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
+	panic(noComplex)
+}
+func (Implementation) Csymm(s blas.Side, ul blas.Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
+	panic(noComplex)
+}
+func (Implementation) Csyrk(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, beta complex64, c []complex64, ldc int) {
+	panic(noComplex)
+}
+func (Implementation) Csyr2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
+	panic(noComplex)
+}
+func (Implementation) Ctrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int) {
+	panic(noComplex)
+}
+func (Implementation) Ctrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int) {
+	panic(noComplex)
+}
+func (Implementation) Chemm(s blas.Side, ul blas.Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
+	panic(noComplex)
+}
+func (Implementation) Cherk(ul blas.Uplo, t blas.Transpose, n, k int, alpha float32, a []complex64, lda int, beta float32, c []complex64, ldc int) {
+	panic(noComplex)
+}
+func (Implementation) Cher2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta float32, c []complex64, ldc int) {
+	panic(noComplex)
+}
+
+// Level 2 complex128 routines.
+
+func (Implementation) Zgbmv(tA blas.Transpose, m, n int, kL int, kU int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int) {
+	panic(noComplex)
+}
+func (Implementation) Ztbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex128, lda int, x []complex128, incX int) {
+	panic(noComplex)
+}
+func (Implementation) Ztpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []complex128, x []complex128, incX int) {
+	panic(noComplex)
+}
+func (Implementation) Ztbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex128, lda int, x []complex128, incX int) {
+	panic(noComplex)
+}
+func (Implementation) Ztpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []complex128, x []complex128, incX int) {
+	panic(noComplex)
+}
+func (Implementation) Zhbmv(ul blas.Uplo, n, k int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int) {
+	panic(noComplex)
+}
+
+// Level 3 complex128 routines.
+
+func (Implementation) Zgemm(tA, tB blas.Transpose, m, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) {
+	panic(noComplex)
+}
+func (Implementation) Zsymm(s blas.Side, ul blas.Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) {
+	panic(noComplex)
+}
+func (Implementation) Zsyrk(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, beta complex128, c []complex128, ldc int) {
+	panic(noComplex)
+}
+func (Implementation) Zsyr2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) {
+	panic(noComplex)
+}
+func (Implementation) Ztrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int) {
+	panic(noComplex)
+}
+func (Implementation) Ztrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int) {
+	panic(noComplex)
+}
+func (Implementation) Zhemm(s blas.Side, ul blas.Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) {
+	panic(noComplex)
+}
+func (Implementation) Zherk(ul blas.Uplo, t blas.Transpose, n, k int, alpha float64, a []complex128, lda int, beta float64, c []complex128, ldc int) {
+	panic(noComplex)
+}
+func (Implementation) Zher2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta float64, c []complex128, ldc int) {
+	panic(noComplex)
+}
--- a/vendor/gonum.org/v1/gonum/blas/gonum/dgemm.go
+++ b/vendor/gonum.org/v1/gonum/blas/gonum/dgemm.go
@ -1,15 +1,15 @@
-// Copyright ©2014 The gonum Authors. All rights reserved.
+// Copyright ©2014 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-package native
+package gonum

 import (
 	"runtime"
 	"sync"

-	"github.com/gonum/blas"
-	"github.com/gonum/internal/asm/f64"
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/internal/asm/f64"
 )

 // Dgemm computes
@ -25,17 +25,17 @@ func (Implementation) Dgemm(tA, tB blas.Transpose, m, n, k int, alpha float64, a
 	}
 	aTrans := tA == blas.Trans || tA == blas.ConjTrans
 	if aTrans {
-		checkMatrix64(k, m, a, lda)
+		checkDMatrix('a', k, m, a, lda)
 	} else {
-		checkMatrix64(m, k, a, lda)
+		checkDMatrix('a', m, k, a, lda)
 	}
 	bTrans := tB == blas.Trans || tB == blas.ConjTrans
 	if bTrans {
-		checkMatrix64(n, k, b, ldb)
+		checkDMatrix('b', n, k, b, ldb)
 	} else {
-		checkMatrix64(k, n, b, ldb)
+		checkDMatrix('b', k, n, b, ldb)
 	}
-	checkMatrix64(m, n, c, ldc)
+	checkDMatrix('c', m, n, c, ldc)

 	// scale c
 	if beta != 1 {
@ -121,7 +121,7 @@ func dgemmParallel(aTrans, bTrans bool, m, n, k int, a []float64, lda int, b []f
 		go func() {
 			defer wg.Done()
 			// Make local copies of otherwise global variables to reduce shared memory.
-			// This has a noticable effect on benchmarks in some cases.
+			// This has a noticeable effect on benchmarks in some cases.
 			alpha := alpha
 			aTrans := aTrans
 			bTrans := bTrans
@ -259,18 +259,3 @@ func dgemmSerialTransTrans(m, n, k int, a []float64, lda int, b []float64, ldb i
 func sliceView64(a []float64, lda, i, j, r, c int) []float64 {
 	return a[i*lda+j : (i+r-1)*lda+j+c]
 }
-
-func checkMatrix64(m, n int, a []float64, lda int) {
-	if m < 0 {
-		panic("blas: rows < 0")
-	}
-	if n < 0 {
-		panic("blas: cols < 0")
-	}
-	if lda < n {
-		panic("blas: illegal stride")
-	}
-	if len(a) < (m-1)*lda+n {
-		panic("blas: insufficient matrix slice length")
-	}
-}
--- a/vendor/gonum.org/v1/gonum/blas/gonum/doc.go
+++ b/vendor/gonum.org/v1/gonum/blas/gonum/doc.go
@ -7,12 +7,12 @@
 /*
 Package native is a Go implementation of the BLAS API. This implementation
 panics when the input arguments are invalid as per the standard, for example
-if a vector increment is zero. Please note that the treatment of NaN values
+if a vector increment is zero. Note that the treatment of NaN values
 is not specified, and differs among the BLAS implementations.
-github.com/gonum/blas/blas64 provides helpful wrapper functions to the BLAS
+gonum.org/v1/gonum/blas/blas64 provides helpful wrapper functions to the BLAS
 interface. The rest of this text describes the layout of the data for the input types.

-Please note that in the function documentation, x[i] refers to the i^th element
+Note that in the function documentation, x[i] refers to the i^th element
 of the vector, which will be different from the i^th element of the slice if
 incX != 1.

@ -85,4 +85,4 @@ which is given to the BLAS routine as [∗ 1 2 3 4 ...].
 See http://www.crest.iu.edu/research/mtl/reference/html/banded.html
 for more information
 */
-package native
+package gonum // import "gonum.org/v1/gonum/blas/gonum"
--- a/vendor/gonum.org/v1/gonum/blas/gonum/general_double.go
+++ b/vendor/gonum.org/v1/gonum/blas/gonum/general_double.go
@ -0,0 +1,50 @@
+// Copyright ©2014 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+)
+
+type general64 struct {
+	data       []float64
+	rows, cols int
+	stride     int
+}
+
+func (g general64) clone() general64 {
+	data := make([]float64, len(g.data))
+	copy(data, g.data)
+	return general64{
+		data:   data,
+		rows:   g.rows,
+		cols:   g.cols,
+		stride: g.stride,
+	}
+}
+
+func (g general64) equal(a general64) bool {
+	if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
+		return false
+	}
+	for i, v := range g.data {
+		if a.data[i] != v {
+			return false
+		}
+	}
+	return true
+}
+
+func (g general64) equalWithinAbs(a general64, tol float64) bool {
+	if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
+		return false
+	}
+	for i, v := range g.data {
+		if math.Abs(a.data[i]-v) > tol {
+			return false
+		}
+	}
+	return true
+}
--- a/vendor/gonum.org/v1/gonum/blas/gonum/general_single.go
+++ b/vendor/gonum.org/v1/gonum/blas/gonum/general_single.go
@ -0,0 +1,52 @@
+// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
+
+// Copyright ©2014 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	math "gonum.org/v1/gonum/internal/math32"
+)
+
+type general32 struct {
+	data       []float32
+	rows, cols int
+	stride     int
+}
+
+func (g general32) clone() general32 {
+	data := make([]float32, len(g.data))
+	copy(data, g.data)
+	return general32{
+		data:   data,
+		rows:   g.rows,
+		cols:   g.cols,
+		stride: g.stride,
+	}
+}
+
+func (g general32) equal(a general32) bool {
+	if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
+		return false
+	}
+	for i, v := range g.data {
+		if a.data[i] != v {
+			return false
+		}
+	}
+	return true
+}
+
+func (g general32) equalWithinAbs(a general32, tol float32) bool {
+	if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
+		return false
+	}
+	for i, v := range g.data {
+		if math.Abs(a.data[i]-v) > tol {
+			return false
+		}
+	}
+	return true
+}
--- a/vendor/gonum.org/v1/gonum/blas/gonum/gonum.go
+++ b/vendor/gonum.org/v1/gonum/blas/gonum/gonum.go
@ -1,10 +1,12 @@
-// Copyright ©2015 The gonum Authors. All rights reserved.
+// Copyright ©2015 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 //go:generate ./single_precision.bash

-package native
+package gonum
+
+import "math"

 type Implementation struct{}

@ -43,9 +45,6 @@ const (
 	buffMul     = 4  // how big is the buffer relative to the number of workers
 )

-// [SD]gemm debugging constant.
-const debug = false
-
 // subMul is a common type shared by [SD]gemm.
 type subMul struct {
 	i, j int // index of block
@ -65,8 +64,70 @@ func min(a, b int) int {
 	return a
 }

-// blocks returns the number of divisons of the dimension length with the given
+func checkSMatrix(name byte, m, n int, a []float32, lda int) {
+	if m < 0 {
+		panic(mLT0)
+	}
+	if n < 0 {
+		panic(nLT0)
+	}
+	if lda < n {
+		panic("blas: illegal stride of " + string(name))
+	}
+	if len(a) < (m-1)*lda+n {
+		panic("blas: index of " + string(name) + " out of range")
+	}
+}
+
+func checkDMatrix(name byte, m, n int, a []float64, lda int) {
+	if m < 0 {
+		panic(mLT0)
+	}
+	if n < 0 {
+		panic(nLT0)
+	}
+	if lda < n {
+		panic("blas: illegal stride of " + string(name))
+	}
+	if len(a) < (m-1)*lda+n {
+		panic("blas: index of " + string(name) + " out of range")
+	}
+}
+
+func checkZMatrix(name byte, m, n int, a []complex128, lda int) {
+	if m < 0 {
+		panic(mLT0)
+	}
+	if n < 0 {
+		panic(nLT0)
+	}
+	if lda < max(1, n) {
+		panic("blas: illegal stride of " + string(name))
+	}
+	if len(a) < (m-1)*lda+n {
+		panic("blas: insufficient " + string(name) + " matrix slice length")
+	}
+}
+
+func checkZVector(name byte, n int, x []complex128, incX int) {
+	if n < 0 {
+		panic(nLT0)
+	}
+	if incX == 0 {
+		panic(zeroIncX)
+	}
+	if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
+		panic("blas: insufficient " + string(name) + " vector slice length")
+	}
+}
+
+// blocks returns the number of divisions of the dimension length with the given
 // block size.
 func blocks(dim, bsize int) int {
 	return (dim + bsize - 1) / bsize
 }
+
+// dcabs1 returns |real(z)|+|imag(z)|.
+func dcabs1(z complex128) float64 {
+	return math.Abs(real(z)) + math.Abs(imag(z))
+}
--- a/vendor/gonum.org/v1/gonum/blas/gonum/level1cmplx128.go
+++ b/vendor/gonum.org/v1/gonum/blas/gonum/level1cmplx128.go
@ -0,0 +1,442 @@
+// Copyright ©2017 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+
+	"gonum.org/v1/gonum/internal/asm/c128"
+)
+
+// Dzasum returns the sum of the absolute values of the elements of x
+//  \sum_i |Re(x[i])| + |Im(x[i])|
+// Dzasum returns 0 if incX is negative.
+func (Implementation) Dzasum(n int, x []complex128, incX int) float64 {
+	if n < 0 {
+		panic(negativeN)
+	}
+	if incX < 1 {
+		if incX == 0 {
+			panic(zeroIncX)
+		}
+		return 0
+	}
+	var sum float64
+	if incX == 1 {
+		if len(x) < n {
+			panic(badX)
+		}
+		for _, v := range x[:n] {
+			sum += dcabs1(v)
+		}
+		return sum
+	}
+	if (n-1)*incX >= len(x) {
+		panic(badX)
+	}
+	for i := 0; i < n; i++ {
+		v := x[i*incX]
+		sum += dcabs1(v)
+	}
+	return sum
+}
+
+// Dznrm2 computes the Euclidean norm of the complex vector x,
+//  ‖x‖_2 = sqrt(\sum_i x[i] * conj(x[i])).
+// This function returns 0 if incX is negative.
+func (Implementation) Dznrm2(n int, x []complex128, incX int) float64 {
+	if incX < 1 {
+		if incX == 0 {
+			panic(zeroIncX)
+		}
+		return 0
+	}
+	if n < 1 {
+		if n == 0 {
+			return 0
+		}
+		panic(negativeN)
+	}
+	if (n-1)*incX >= len(x) {
+		panic(badX)
+	}
+	var (
+		scale float64
+		ssq   float64 = 1
+	)
+	if incX == 1 {
+		for _, v := range x[:n] {
+			re, im := math.Abs(real(v)), math.Abs(imag(v))
+			if re != 0 {
+				if re > scale {
+					ssq = 1 + ssq*(scale/re)*(scale/re)
+					scale = re
+				} else {
+					ssq += (re / scale) * (re / scale)
+				}
+			}
+			if im != 0 {
+				if im > scale {
+					ssq = 1 + ssq*(scale/im)*(scale/im)
+					scale = im
+				} else {
+					ssq += (im / scale) * (im / scale)
+				}
+			}
+		}
+		if math.IsInf(scale, 1) {
+			return math.Inf(1)
+		}
+		return scale * math.Sqrt(ssq)
+	}
+	for ix := 0; ix < n*incX; ix += incX {
+		re, im := math.Abs(real(x[ix])), math.Abs(imag(x[ix]))
+		if re != 0 {
+			if re > scale {
+				ssq = 1 + ssq*(scale/re)*(scale/re)
+				scale = re
+			} else {
+				ssq += (re / scale) * (re / scale)
+			}
+		}
+		if im != 0 {
+			if im > scale {
+				ssq = 1 + ssq*(scale/im)*(scale/im)
+				scale = im
+			} else {
+				ssq += (im / scale) * (im / scale)
+			}
+		}
+	}
+	if math.IsInf(scale, 1) {
+		return math.Inf(1)
+	}
+	return scale * math.Sqrt(ssq)
+}
+
+// Izamax returns the index of the first element of x having largest |Re(·)|+|Im(·)|.
+// Izamax returns -1 if n is 0 or incX is negative.
+func (Implementation) Izamax(n int, x []complex128, incX int) int {
+	if incX < 1 {
+		if incX == 0 {
+			panic(zeroIncX)
+		}
+		// Return invalid index.
+		return -1
+	}
+	if n < 1 {
+		if n == 0 {
+			// Return invalid index.
+			return -1
+		}
+		panic(negativeN)
+	}
+	if len(x) <= (n-1)*incX {
+		panic(badX)
+	}
+	idx := 0
+	max := dcabs1(x[0])
+	if incX == 1 {
+		for i, v := range x[1:n] {
+			absV := dcabs1(v)
+			if absV > max {
+				max = absV
+				idx = i + 1
+			}
+		}
+		return idx
+	}
+	ix := incX
+	for i := 1; i < n; i++ {
+		absV := dcabs1(x[ix])
+		if absV > max {
+			max = absV
+			idx = i
+		}
+		ix += incX
+	}
+	return idx
+}
+
+// Zaxpy adds alpha times x to y:
+//  y[i] += alpha * x[i] for all i
+func (Implementation) Zaxpy(n int, alpha complex128, x []complex128, incX int, y []complex128, incY int) {
+	if incX == 0 {
+		panic(zeroIncX)
+	}
+	if incY == 0 {
+		panic(zeroIncY)
+	}
+	if n < 1 {
+		if n == 0 {
+			return
+		}
+		panic(negativeN)
+	}
+	if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
+		panic(badX)
+	}
+	if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
+		panic(badY)
+	}
+	if alpha == 0 {
+		return
+	}
+	if incX == 1 && incY == 1 {
+		c128.AxpyUnitary(alpha, x[:n], y[:n])
+		return
+	}
+	var ix, iy int
+	if incX < 0 {
+		ix = (1 - n) * incX
+	}
+	if incY < 0 {
+		iy = (1 - n) * incY
+	}
+	c128.AxpyInc(alpha, x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
+}
+
+// Zcopy copies the vector x to vector y.
+func (Implementation) Zcopy(n int, x []complex128, incX int, y []complex128, incY int) {
+	if incX == 0 {
+		panic(zeroIncX)
+	}
+	if incY == 0 {
+		panic(zeroIncY)
+	}
+	if n < 1 {
+		if n == 0 {
+			return
+		}
+		panic(negativeN)
+	}
+	if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
+		panic(badX)
+	}
+	if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
+		panic(badY)
+	}
+	if incX == 1 && incY == 1 {
+		copy(y[:n], x[:n])
+		return
+	}
+	var ix, iy int
+	if incX < 0 {
+		ix = (-n + 1) * incX
+	}
+	if incY < 0 {
+		iy = (-n + 1) * incY
+	}
+	for i := 0; i < n; i++ {
+		y[iy] = x[ix]
+		ix += incX
+		iy += incY
+	}
+}
+
+// Zdotc computes the dot product
+//  x^H · y
+// of two complex vectors x and y.
+func (Implementation) Zdotc(n int, x []complex128, incX int, y []complex128, incY int) complex128 {
+	if incX == 0 {
+		panic(zeroIncX)
+	}
+	if incY == 0 {
+		panic(zeroIncY)
+	}
+	if n <= 0 {
+		if n == 0 {
+			return 0
+		}
+		panic(negativeN)
+	}
+	if incX == 1 && incY == 1 {
+		if len(x) < n {
+			panic(badX)
+		}
+		if len(y) < n {
+			panic(badY)
+		}
+		return c128.DotcUnitary(x[:n], y[:n])
+	}
+	var ix, iy int
+	if incX < 0 {
+		ix = (-n + 1) * incX
+	}
+	if incY < 0 {
+		iy = (-n + 1) * incY
+	}
+	if ix >= len(x) || (n-1)*incX >= len(x) {
+		panic(badX)
+	}
+	if iy >= len(y) || (n-1)*incY >= len(y) {
+		panic(badY)
+	}
+	return c128.DotcInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
+}
+
+// Zdotu computes the dot product
+//  x^T · y
+// of two complex vectors x and y.
+func (Implementation) Zdotu(n int, x []complex128, incX int, y []complex128, incY int) complex128 {
+	if incX == 0 {
+		panic(zeroIncX)
+	}
+	if incY == 0 {
+		panic(zeroIncY)
+	}
+	if n <= 0 {
+		if n == 0 {
+			return 0
+		}
+		panic(negativeN)
+	}
+	if incX == 1 && incY == 1 {
+		if len(x) < n {
+			panic(badX)
+		}
+		if len(y) < n {
+			panic(badY)
+		}
+		return c128.DotuUnitary(x[:n], y[:n])
+	}
+	var ix, iy int
+	if incX < 0 {
+		ix = (-n + 1) * incX
+	}
+	if incY < 0 {
+		iy = (-n + 1) * incY
+	}
+	if ix >= len(x) || (n-1)*incX >= len(x) {
+		panic(badX)
+	}
+	if iy >= len(y) || (n-1)*incY >= len(y) {
+		panic(badY)
+	}
+	return c128.DotuInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
+}
+
+// Zdscal scales the vector x by a real scalar alpha.
+// Zdscal has no effect if incX < 0.
+func (Implementation) Zdscal(n int, alpha float64, x []complex128, incX int) {
+	if incX < 1 {
+		if incX == 0 {
+			panic(zeroIncX)
+		}
+		return
+	}
+	if (n-1)*incX >= len(x) {
+		panic(badX)
+	}
+	if n < 1 {
+		if n == 0 {
+			return
+		}
+		panic(negativeN)
+	}
+	if alpha == 0 {
+		if incX == 1 {
+			x = x[:n]
+			for i := range x {
+				x[i] = 0
+			}
+			return
+		}
+		for ix := 0; ix < n*incX; ix += incX {
+			x[ix] = 0
+		}
+		return
+	}
+	if incX == 1 {
+		x = x[:n]
+		for i, v := range x {
+			x[i] = complex(alpha*real(v), alpha*imag(v))
+		}
+		return
+	}
+	for ix := 0; ix < n*incX; ix += incX {
+		v := x[ix]
+		x[ix] = complex(alpha*real(v), alpha*imag(v))
+	}
+}
+
+// Zscal scales the vector x by a complex scalar alpha.
+// Zscal has no effect if incX < 0.
+func (Implementation) Zscal(n int, alpha complex128, x []complex128, incX int) {
+	if incX < 1 {
+		if incX == 0 {
+			panic(zeroIncX)
+		}
+		return
+	}
+	if (n-1)*incX >= len(x) {
+		panic(badX)
+	}
+	if n < 1 {
+		if n == 0 {
+			return
+		}
+		panic(negativeN)
+	}
+	if alpha == 0 {
+		if incX == 1 {
+			x = x[:n]
+			for i := range x {
+				x[i] = 0
+			}
+			return
+		}
+		for ix := 0; ix < n*incX; ix += incX {
+			x[ix] = 0
+		}
+		return
+	}
+	if incX == 1 {
+		c128.ScalUnitary(alpha, x[:n])
+		return
+	}
+	c128.ScalInc(alpha, x, uintptr(n), uintptr(incX))
+}
+
+// Zswap exchanges the elements of two complex vectors x and y.
+func (Implementation) Zswap(n int, x []complex128, incX int, y []complex128, incY int) {
+	if incX == 0 {
+		panic(zeroIncX)
+	}
+	if incY == 0 {
+		panic(zeroIncY)
+	}
+	if n < 1 {
+		if n == 0 {
+			return
+		}
+		panic(negativeN)
+	}
+	if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
+		panic(badX)
+	}
+	if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
+		panic(badY)
+	}
+	if incX == 1 && incY == 1 {
+		x = x[:n]
+		for i, v := range x {
+			x[i], y[i] = y[i], v
+		}
+		return
+	}
+	var ix, iy int
+	if incX < 0 {
+		ix = (-n + 1) * incX
+	}
+	if incY < 0 {
+		iy = (-n + 1) * incY
+	}
+	for i := 0; i < n; i++ {
+		x[ix], y[iy] = y[iy], x[ix]
+		ix += incX
+		iy += incY
+	}
+}
--- a/vendor/gonum.org/v1/gonum/blas/gonum/level1double.go
+++ b/vendor/gonum.org/v1/gonum/blas/gonum/level1double.go
@ -1,14 +1,14 @@
-// Copyright ©2015 The gonum Authors. All rights reserved.
+// Copyright ©2015 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-package native
+package gonum

 import (
 	"math"

-	"github.com/gonum/blas"
-	"github.com/gonum/internal/asm/f64"
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/internal/asm/f64"
 )

 var _ blas.Float64Level1 = Implementation{}
@ -270,13 +270,7 @@ func (Implementation) Daxpy(n int, alpha float64, x []float64, incX int, y []flo
 		return
 	}
 	if incX == 1 && incY == 1 {
-		if len(x) < n {
-			panic(badLenX)
-		}
-		if len(y) < n {
-			panic(badLenY)
-		}
-		f64.AxpyUnitaryTo(y, alpha, x[:n], y)
+		f64.AxpyUnitary(alpha, x[:n], y[:n])
 		return
 	}
 	var ix, iy int
@ -286,12 +280,6 @@ func (Implementation) Daxpy(n int, alpha float64, x []float64, incX int, y []flo
 	if incY < 0 {
 		iy = (-n + 1) * incY
 	}
-	if ix >= len(x) || ix+(n-1)*incX >= len(x) {
-		panic(badLenX)
-	}
-	if iy >= len(y) || iy+(n-1)*incY >= len(y) {
-		panic(badLenY)
-	}
 	f64.AxpyInc(alpha, x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
 }

@ -565,7 +553,6 @@ func (Implementation) Drotm(n int, x []float64, incX int, y []float64, incY int,
 		ix += incX
 		iy += incY
 	}
-	return
 }

 // Dscal scales x by alpha.
@ -604,7 +591,5 @@ func (Implementation) Dscal(n int, alpha float64, x []float64, incX int) {
 		f64.ScalUnitary(alpha, x[:n])
 		return
 	}
-	for ix := 0; ix < n*incX; ix += incX {
-		x[ix] *= alpha
-	}
+	f64.ScalInc(alpha, x, uintptr(n), uintptr(incX))
 }
--- a/vendor/gonum.org/v1/gonum/blas/gonum/level1double_ddot.go
+++ b/vendor/gonum.org/v1/gonum/blas/gonum/level1double_ddot.go
@ -1,11 +1,11 @@
-// Copyright ©2015 The gonum Authors. All rights reserved.
+// Copyright ©2015 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-package native
+package gonum

 import (
-	"github.com/gonum/internal/asm/f64"
+	"gonum.org/v1/gonum/internal/asm/f64"
 )

 // Ddot computes the dot product of the two vectors
--- a/vendor/gonum.org/v1/gonum/blas/gonum/level1single.go
+++ b/vendor/gonum.org/v1/gonum/blas/gonum/level1single.go
@ -1,16 +1,16 @@
-// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
+// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.

-// Copyright ©2015 The gonum Authors. All rights reserved.
+// Copyright ©2015 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-package native
+package gonum

 import (
-	math "github.com/gonum/blas/native/internal/math32"
+	math "gonum.org/v1/gonum/internal/math32"

-	"github.com/gonum/blas"
-	"github.com/gonum/internal/asm/f32"
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/internal/asm/f32"
 )

 var _ blas.Float32Level1 = Implementation{}
@ -284,13 +284,7 @@ func (Implementation) Saxpy(n int, alpha float32, x []float32, incX int, y []flo
 		return
 	}
 	if incX == 1 && incY == 1 {
-		if len(x) < n {
-			panic(badLenX)
-		}
-		if len(y) < n {
-			panic(badLenY)
-		}
-		f32.AxpyUnitaryTo(y, alpha, x[:n], y)
+		f32.AxpyUnitary(alpha, x[:n], y[:n])
 		return
 	}
 	var ix, iy int
@ -300,12 +294,6 @@ func (Implementation) Saxpy(n int, alpha float32, x []float32, incX int, y []flo
 	if incY < 0 {
 		iy = (-n + 1) * incY
 	}
-	if ix >= len(x) || ix+(n-1)*incX >= len(x) {
-		panic(badLenX)
-	}
-	if iy >= len(y) || iy+(n-1)*incY >= len(y) {
-		panic(badLenY)
-	}
 	f32.AxpyInc(alpha, x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
 }

@ -587,7 +575,6 @@ func (Implementation) Srotm(n int, x []float32, incX int, y []float32, incY int,
 		ix += incX
 		iy += incY
 	}
-	return
 }

 // Sscal scales x by alpha.
@ -628,7 +615,5 @@ func (Implementation) Sscal(n int, alpha float32, x []float32, incX int) {
 		f32.ScalUnitary(alpha, x[:n])
 		return
 	}
-	for ix := 0; ix < n*incX; ix += incX {
-		x[ix] *= alpha
-	}
+	f32.ScalInc(alpha, x, uintptr(n), uintptr(incX))
 }
--- a/vendor/gonum.org/v1/gonum/blas/gonum/level1single_dsdot.go
+++ b/vendor/gonum.org/v1/gonum/blas/gonum/level1single_dsdot.go
@ -1,13 +1,13 @@
-// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
+// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.

-// Copyright ©2015 The gonum Authors. All rights reserved.
+// Copyright ©2015 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-package native
+package gonum

 import (
-	"github.com/gonum/internal/asm/f32"
+	"gonum.org/v1/gonum/internal/asm/f32"
 )

 // Dsdot computes the dot product of the two vectors
--- a/vendor/gonum.org/v1/gonum/blas/gonum/level1single_sdot.go
+++ b/vendor/gonum.org/v1/gonum/blas/gonum/level1single_sdot.go
@ -1,13 +1,13 @@
-// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
+// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.

-// Copyright ©2015 The gonum Authors. All rights reserved.
+// Copyright ©2015 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-package native
+package gonum

 import (
-	"github.com/gonum/internal/asm/f32"
+	"gonum.org/v1/gonum/internal/asm/f32"
 )

 // Sdot computes the dot product of the two vectors
--- a/vendor/gonum.org/v1/gonum/blas/gonum/level1single_sdsdot.go
+++ b/vendor/gonum.org/v1/gonum/blas/gonum/level1single_sdsdot.go
@ -1,13 +1,13 @@
-// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
+// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.

-// Copyright ©2015 The gonum Authors. All rights reserved.
+// Copyright ©2015 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-package native
+package gonum

 import (
-	"github.com/gonum/internal/asm/f32"
+	"gonum.org/v1/gonum/internal/asm/f32"
 )

 // Sdsdot computes the dot product of the two vectors plus a constant
--- a/vendor/gonum.org/v1/gonum/blas/gonum/level2cmplx128.go
+++ b/vendor/gonum.org/v1/gonum/blas/gonum/level2cmplx128.go
--- a/vendor/gonum.org/v1/gonum/blas/gonum/level2double.go
+++ b/vendor/gonum.org/v1/gonum/blas/gonum/level2double.go
@ -1,20 +1,20 @@
-// Copyright ©2014 The gonum Authors. All rights reserved.
+// Copyright ©2014 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-package native
+package gonum

 import (
-	"github.com/gonum/blas"
-	"github.com/gonum/internal/asm/f64"
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/internal/asm/f64"
 )

 var _ blas.Float64Level2 = Implementation{}

 // Dgemv computes
-//  y = alpha * a * x + beta * y if tA = blas.NoTrans
+//  y = alpha * A * x + beta * y    if tA = blas.NoTrans
 //  y = alpha * A^T * x + beta * y  if tA = blas.Trans or blas.ConjTrans
-// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.
+// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars.
 func (Implementation) Dgemv(tA blas.Transpose, m, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) {
 	if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
 		panic(badTranspose)
@ -167,21 +167,14 @@ func (Implementation) Dger(m, n int, alpha float64, x []float64, incX int, y []f
 		x = x[:m]
 		y = y[:n]
 		for i, xv := range x {
-			tmp := alpha * xv
-			if tmp != 0 {
-				atmp := a[i*lda : i*lda+n]
-				f64.AxpyUnitaryTo(atmp, tmp, y, atmp)
-			}
+			f64.AxpyUnitary(alpha*xv, y, a[i*lda:i*lda+n])
 		}
 		return
 	}

 	ix := kx
 	for i := 0; i < m; i++ {
-		tmp := alpha * x[ix]
-		if tmp != 0 {
-			f64.AxpyInc(tmp, y, a[i*lda:i*lda+n], uintptr(n), uintptr(incY), 1, uintptr(ky), 0)
-		}
+		f64.AxpyInc(alpha*x[ix], y, a[i*lda:i*lda+n], uintptr(n), uintptr(incY), 1, uintptr(ky), 0)
 		ix += incX
 	}
 }
@ -230,7 +223,7 @@ func (Implementation) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64,
 	if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) {
 		panic(badY)
 	}
-	if lda*(m-1)+kL+kU+1 > len(a) || lda < kL+kU+1 {
+	if lda*(min(m, n+kL)-1)+kL+kU+1 > len(a) || lda < kL+kU+1 {
 		panic(badLdA)
 	}

@ -269,7 +262,7 @@ func (Implementation) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64,
 	if tA == blas.NoTrans {
 		iy := ky
 		if incX == 1 {
-			for i := 0; i < m; i++ {
+			for i := 0; i < min(m, n+kL); i++ {
 				l := max(0, kL-i)
 				u := min(nCol, ld+kL-i)
 				off := max(0, i-kL)
@ -284,7 +277,7 @@ func (Implementation) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64,
 			}
 			return
 		}
-		for i := 0; i < m; i++ {
+		for i := 0; i < min(m, n+kL); i++ {
 			l := max(0, kL-i)
 			u := min(nCol, ld+kL-i)
 			off := max(0, i-kL)
@ -301,7 +294,7 @@ func (Implementation) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64,
 		return
 	}
 	if incX == 1 {
-		for i := 0; i < m; i++ {
+		for i := 0; i < min(m, n+kL); i++ {
 			l := max(0, kL-i)
 			u := min(nCol, ld+kL-i)
 			off := max(0, i-kL)
@ -316,7 +309,7 @@ func (Implementation) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64,
 		return
 	}
 	ix := kx
-	for i := 0; i < m; i++ {
+	for i := 0; i < min(m, n+kL); i++ {
 		l := max(0, kL-i)
 		u := min(nCol, ld+kL-i)
 		off := max(0, i-kL)
@ -1530,7 +1523,6 @@ func (Implementation) Dsbmv(ul blas.Uplo, n, k int, alpha float64, a []float64,
 		ix += incX
 		iy += incY
 	}
-	return
 }

 // Dsyr performs the rank-one update
@ -1718,7 +1710,6 @@ func (Implementation) Dsyr2(ul blas.Uplo, n int, alpha float64, x []float64, inc
 		ix += incX
 		iy += incY
 	}
-	return
 }

 // Dtpsv solves
--- a/vendor/gonum.org/v1/gonum/blas/gonum/level2single.go
+++ b/vendor/gonum.org/v1/gonum/blas/gonum/level2single.go
@ -1,22 +1,22 @@
-// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
+// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.

-// Copyright ©2014 The gonum Authors. All rights reserved.
+// Copyright ©2014 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-package native
+package gonum

 import (
-	"github.com/gonum/blas"
-	"github.com/gonum/internal/asm/f32"
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/internal/asm/f32"
 )

 var _ blas.Float32Level2 = Implementation{}

 // Sgemv computes
-//  y = alpha * a * x + beta * y if tA = blas.NoTrans
+//  y = alpha * A * x + beta * y    if tA = blas.NoTrans
 //  y = alpha * A^T * x + beta * y  if tA = blas.Trans or blas.ConjTrans
-// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.
+// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars.
 //
 // Float32 implementations are autogenerated and not directly tested.
 func (Implementation) Sgemv(tA blas.Transpose, m, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) {
@ -173,21 +173,14 @@ func (Implementation) Sger(m, n int, alpha float32, x []float32, incX int, y []f
 		x = x[:m]
 		y = y[:n]
 		for i, xv := range x {
-			tmp := alpha * xv
-			if tmp != 0 {
-				atmp := a[i*lda : i*lda+n]
-				f32.AxpyUnitaryTo(atmp, tmp, y, atmp)
-			}
+			f32.AxpyUnitary(alpha*xv, y, a[i*lda:i*lda+n])
 		}
 		return
 	}

 	ix := kx
 	for i := 0; i < m; i++ {
-		tmp := alpha * x[ix]
-		if tmp != 0 {
-			f32.AxpyInc(tmp, y, a[i*lda:i*lda+n], uintptr(n), uintptr(incY), 1, uintptr(ky), 0)
-		}
+		f32.AxpyInc(alpha*x[ix], y, a[i*lda:i*lda+n], uintptr(n), uintptr(incY), 1, uintptr(ky), 0)
 		ix += incX
 	}
 }
@ -238,7 +231,7 @@ func (Implementation) Sgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float32,
 	if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) {
 		panic(badY)
 	}
-	if lda*(m-1)+kL+kU+1 > len(a) || lda < kL+kU+1 {
+	if lda*(min(m, n+kL)-1)+kL+kU+1 > len(a) || lda < kL+kU+1 {
 		panic(badLdA)
 	}

@ -277,7 +270,7 @@ func (Implementation) Sgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float32,
 	if tA == blas.NoTrans {
 		iy := ky
 		if incX == 1 {
-			for i := 0; i < m; i++ {
+			for i := 0; i < min(m, n+kL); i++ {
 				l := max(0, kL-i)
 				u := min(nCol, ld+kL-i)
 				off := max(0, i-kL)
@ -292,7 +285,7 @@ func (Implementation) Sgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float32,
 			}
 			return
 		}
-		for i := 0; i < m; i++ {
+		for i := 0; i < min(m, n+kL); i++ {
 			l := max(0, kL-i)
 			u := min(nCol, ld+kL-i)
 			off := max(0, i-kL)
@ -309,7 +302,7 @@ func (Implementation) Sgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float32,
 		return
 	}
 	if incX == 1 {
-		for i := 0; i < m; i++ {
+		for i := 0; i < min(m, n+kL); i++ {
 			l := max(0, kL-i)
 			u := min(nCol, ld+kL-i)
 			off := max(0, i-kL)
@ -324,7 +317,7 @@ func (Implementation) Sgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float32,
 		return
 	}
 	ix := kx
-	for i := 0; i < m; i++ {
+	for i := 0; i < min(m, n+kL); i++ {
 		l := max(0, kL-i)
 		u := min(nCol, ld+kL-i)
 		off := max(0, i-kL)
@ -1552,7 +1545,6 @@ func (Implementation) Ssbmv(ul blas.Uplo, n, k int, alpha float32, a []float32,
 		ix += incX
 		iy += incY
 	}
-	return
 }

 // Ssyr performs the rank-one update
@ -1744,7 +1736,6 @@ func (Implementation) Ssyr2(ul blas.Uplo, n int, alpha float32, x []float32, inc
 		ix += incX
 		iy += incY
 	}
-	return
 }

 // Stpsv solves
--- a/vendor/gonum.org/v1/gonum/blas/gonum/level3double.go
+++ b/vendor/gonum.org/v1/gonum/blas/gonum/level3double.go
@ -1,12 +1,12 @@
-// Copyright ©2014 The gonum Authors. All rights reserved.
+// Copyright ©2014 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-package native
+package gonum

 import (
-	"github.com/gonum/blas"
-	"github.com/gonum/internal/asm/f64"
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/internal/asm/f64"
 )

 var _ blas.Float64Level3 = Implementation{}
--- a/vendor/gonum.org/v1/gonum/blas/gonum/level3single.go
+++ b/vendor/gonum.org/v1/gonum/blas/gonum/level3single.go
@ -1,14 +1,14 @@
-// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
+// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.

-// Copyright ©2014 The gonum Authors. All rights reserved.
+// Copyright ©2014 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-package native
+package gonum

 import (
-	"github.com/gonum/blas"
-	"github.com/gonum/internal/asm/f32"
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/internal/asm/f32"
 )

 var _ blas.Float32Level3 = Implementation{}
--- a/vendor/gonum.org/v1/gonum/blas/gonum/sgemm.go
+++ b/vendor/gonum.org/v1/gonum/blas/gonum/sgemm.go
@ -1,17 +1,17 @@
-// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
+// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.

-// Copyright ©2014 The gonum Authors. All rights reserved.
+// Copyright ©2014 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-package native
+package gonum

 import (
 	"runtime"
 	"sync"

-	"github.com/gonum/blas"
-	"github.com/gonum/internal/asm/f32"
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/internal/asm/f32"
 )

 // Sgemm computes
@ -29,17 +29,17 @@ func (Implementation) Sgemm(tA, tB blas.Transpose, m, n, k int, alpha float32, a
 	}
 	aTrans := tA == blas.Trans || tA == blas.ConjTrans
 	if aTrans {
-		checkMatrix32(k, m, a, lda)
+		checkSMatrix('a', k, m, a, lda)
 	} else {
-		checkMatrix32(m, k, a, lda)
+		checkSMatrix('a', m, k, a, lda)
 	}
 	bTrans := tB == blas.Trans || tB == blas.ConjTrans
 	if bTrans {
-		checkMatrix32(n, k, b, ldb)
+		checkSMatrix('b', n, k, b, ldb)
 	} else {
-		checkMatrix32(k, n, b, ldb)
+		checkSMatrix('b', k, n, b, ldb)
 	}
-	checkMatrix32(m, n, c, ldc)
+	checkSMatrix('c', m, n, c, ldc)

 	// scale c
 	if beta != 1 {
@ -125,7 +125,7 @@ func sgemmParallel(aTrans, bTrans bool, m, n, k int, a []float32, lda int, b []f
 		go func() {
 			defer wg.Done()
 			// Make local copies of otherwise global variables to reduce shared memory.
-			// This has a noticable effect on benchmarks in some cases.
+			// This has a noticeable effect on benchmarks in some cases.
 			alpha := alpha
 			aTrans := aTrans
 			bTrans := bTrans
@ -263,18 +263,3 @@ func sgemmSerialTransTrans(m, n, k int, a []float32, lda int, b []float32, ldb i
 func sliceView32(a []float32, lda, i, j, r, c int) []float32 {
 	return a[i*lda+j : (i+r-1)*lda+j+c]
 }
-
-func checkMatrix32(m, n int, a []float32, lda int) {
-	if m < 0 {
-		panic("blas: rows < 0")
-	}
-	if n < 0 {
-		panic("blas: cols < 0")
-	}
-	if lda < n {
-		panic("blas: illegal stride")
-	}
-	if len(a) < (m-1)*lda+n {
-		panic("blas: insufficient matrix slice length")
-	}
-}
--- a/vendor/gonum.org/v1/gonum/floats/doc.go
+++ b/vendor/gonum.org/v1/gonum/floats/doc.go
@ -0,0 +1,11 @@
+// Copyright ©2017 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package floats provides a set of helper routines for dealing with slices
+// of float64. The functions avoid allocations to allow for use within tight
+// loops without garbage collection overhead.
+//
+// The convention used is that when a slice is being modified in place, it has
+// the name dst.
+package floats // import "gonum.org/v1/gonum/floats"
--- a/vendor/gonum.org/v1/gonum/floats/floats.go
+++ b/vendor/gonum.org/v1/gonum/floats/floats.go
@ -2,20 +2,15 @@
 // Use of this code is governed by a BSD-style
 // license that can be found in the LICENSE file

-// Package floats provides a set of helper routines for dealing with slices
-// of float64. The functions avoid allocations to allow for use within tight
-// loops without garbage collection overhead.
-//
-// The convention used is that when a slice is being modified in place, it has
-// the name dst.
 package floats

 import (
 	"errors"
 	"math"
 	"sort"
+	"strconv"

-	"github.com/gonum/internal/asm/f64"
+	"gonum.org/v1/gonum/internal/asm/f64"
 )

 // Add adds, element-wise, the elements of s and dst, and stores in dst.
@ -89,8 +84,8 @@ func (a argsort) Swap(i, j int) {
 	a.inds[i], a.inds[j] = a.inds[j], a.inds[i]
 }

-// Argsort sorts the elements of s while tracking their original order.
-// At the conclusion of Argsort, s will contain the original elements of s
+// Argsort sorts the elements of dst while tracking their original order.
+// At the conclusion of Argsort, dst will contain the original elements of dst
 // but sorted in increasing order, and inds will contain the original position
 // of the elements in the slice such that dst[i] = origDst[inds[i]].
 // It panics if the lengths of dst and inds do not match.
@ -342,7 +337,6 @@ func EqualLengths(slices ...[]float64) bool {
 // all of the found elements will be returned along with an error.
 // At the return of the function, the input inds will be in an undetermined state.
 func Find(inds []int, f func(float64) bool, s []float64, k int) ([]int, error) {
-
 	// inds is also returned to allow for calling with nil

 	// Reslice inds to have zero length
@ -495,6 +489,29 @@ func MulTo(dst, s, t []float64) []float64 {
 	return dst
 }

+const (
+	nanBits = 0x7ff8000000000000
+	nanMask = 0xfff8000000000000
+)
+
+// NaNWith returns an IEEE 754 "quiet not-a-number" value with the
+// payload specified in the low 51 bits of payload.
+// The NaN returned by math.NaN has a bit pattern equal to NaNWith(1).
+func NaNWith(payload uint64) float64 {
+	return math.Float64frombits(nanBits | (payload &^ nanMask))
+}
+
+// NaNPayload returns the lowest 51 bits payload of an IEEE 754 "quiet
+// not-a-number". For values of f other than quiet-NaN, NaNPayload
+// returns zero and false.
+func NaNPayload(f float64) (payload uint64, ok bool) {
+	b := math.Float64bits(f)
+	if b&nanBits != nanBits {
+		return 0, false
+	}
+	return b &^ nanMask, true
+}
+
 // Nearest returns the index of the element in s
 // whose value is nearest to v.  If several such
 // elements exist, the lowest index is returned.
@ -567,6 +584,19 @@ func Norm(s []float64, L float64) float64 {
 	return math.Pow(norm, 1/L)
 }

+// ParseWithNA converts the string s to a float64 in v.
+// If s equals missing, w is returned as 0, otherwise 1.
+func ParseWithNA(s, missing string) (v, w float64, err error) {
+	if s == missing {
+		return 0, 0, nil
+	}
+	v, err = strconv.ParseFloat(s, 64)
+	if err == nil {
+		w = 1
+	}
+	return v, w, err
+}
+
 // Prod returns the product of the elements of the slice.
 // Returns 1 if len(s) = 0.
 func Prod(s []float64) float64 {
--- a/vendor/gonum.org/v1/gonum/internal/asm/c128/axpyinc_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/axpyinc_amd64.s
@ -0,0 +1,134 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//+build !noasm,!appengine
+
+#include "textflag.h"
+
+// MOVDDUP X2, X3
+#define MOVDDUP_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xDA
+// MOVDDUP X4, X5
+#define MOVDDUP_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xEC
+// MOVDDUP X6, X7
+#define MOVDDUP_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xFE
+// MOVDDUP X8, X9
+#define MOVDDUP_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC8
+
+// ADDSUBPD X2, X3
+#define ADDSUBPD_X2_X3 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA
+// ADDSUBPD X4, X5
+#define ADDSUBPD_X4_X5 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC
+// ADDSUBPD X6, X7
+#define ADDSUBPD_X6_X7 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE
+// ADDSUBPD X8, X9
+#define ADDSUBPD_X8_X9 BYTE $0x66; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8
+
+// func AxpyInc(alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr)
+TEXT ·AxpyInc(SB), NOSPLIT, $0
+	MOVQ   x_base+16(FP), SI // SI = &x
+	MOVQ   y_base+40(FP), DI // DI = &y
+	MOVQ   n+64(FP), CX      // CX = n
+	CMPQ   CX, $0            // if n==0 { return }
+	JE     axpyi_end
+	MOVQ   ix+88(FP), R8     // R8 = ix  // Load the first index
+	SHLQ   $4, R8            // R8 *= sizeof(complex128)
+	MOVQ   iy+96(FP), R9     // R9 = iy
+	SHLQ   $4, R9            // R9 *= sizeof(complex128)
+	LEAQ   (SI)(R8*1), SI    // SI = &(x[ix])
+	LEAQ   (DI)(R9*1), DI    // DI = &(y[iy])
+	MOVQ   DI, DX            // DX = DI      // Separate Read/Write pointers
+	MOVQ   incX+72(FP), R8   // R8 = incX
+	SHLQ   $4, R8            // R8 *= sizeof(complex128)
+	MOVQ   incY+80(FP), R9   // R9 = iy
+	SHLQ   $4, R9            // R9 *= sizeof(complex128)
+	MOVUPS alpha+0(FP), X0   // X0 = { imag(a), real(a) }
+	MOVAPS X0, X1
+	SHUFPD $0x1, X1, X1      // X1 = { real(a), imag(a) }
+	MOVAPS X0, X10           // Copy X0 and X1 for pipelining
+	MOVAPS X1, X11
+	MOVQ   CX, BX
+	ANDQ   $3, CX            // CX = n % 4
+	SHRQ   $2, BX            // BX = floor( n / 4 )
+	JZ     axpyi_tail        // if BX == 0 { goto axpyi_tail }
+
+axpyi_loop: // do {
+	MOVUPS (SI), X2       // X_i = { imag(x[i]), real(x[i]) }
+	MOVUPS (SI)(R8*1), X4
+	LEAQ   (SI)(R8*2), SI // SI = &(SI[incX*2])
+	MOVUPS (SI), X6
+	MOVUPS (SI)(R8*1), X8
+
+	// X_(i+1) = { real(x[i], real(x[i]) }
+	MOVDDUP_X2_X3
+	MOVDDUP_X4_X5
+	MOVDDUP_X6_X7
+	MOVDDUP_X8_X9
+
+	// X_i = { imag(x[i]), imag(x[i]) }
+	SHUFPD $0x3, X2, X2
+	SHUFPD $0x3, X4, X4
+	SHUFPD $0x3, X6, X6
+	SHUFPD $0x3, X8, X8
+
+	// X_i     = { real(a) * imag(x[i]), imag(a) * imag(x[i])  }
+	// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i])  }
+	MULPD X1, X2
+	MULPD X0, X3
+	MULPD X11, X4
+	MULPD X10, X5
+	MULPD X1, X6
+	MULPD X0, X7
+	MULPD X11, X8
+	MULPD X10, X9
+
+	// X_(i+1) = {
+	//	imag(result[i]):  imag(a)*real(x[i]) + real(a)*imag(x[i]),
+	//	real(result[i]):  real(a)*real(x[i]) - imag(a)*imag(x[i])
+	//  }
+	ADDSUBPD_X2_X3
+	ADDSUBPD_X4_X5
+	ADDSUBPD_X6_X7
+	ADDSUBPD_X8_X9
+
+	// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
+	ADDPD  (DX), X3
+	ADDPD  (DX)(R9*1), X5
+	LEAQ   (DX)(R9*2), DX // DX = &(DX[incY*2])
+	ADDPD  (DX), X7
+	ADDPD  (DX)(R9*1), X9
+	MOVUPS X3, (DI)       // dst[i] = X_(i+1)
+	MOVUPS X5, (DI)(R9*1)
+	LEAQ   (DI)(R9*2), DI
+	MOVUPS X7, (DI)
+	MOVUPS X9, (DI)(R9*1)
+	LEAQ   (SI)(R8*2), SI // SI = &(SI[incX*2])
+	LEAQ   (DX)(R9*2), DX // DX = &(DX[incY*2])
+	LEAQ   (DI)(R9*2), DI // DI = &(DI[incY*2])
+	DECQ   BX
+	JNZ    axpyi_loop     // } while --BX > 0
+	CMPQ   CX, $0         // if CX == 0 { return }
+	JE     axpyi_end
+
+axpyi_tail: // do {
+	MOVUPS (SI), X2     // X_i = { imag(x[i]), real(x[i]) }
+	MOVDDUP_X2_X3       // X_(i+1) = { real(x[i], real(x[i]) }
+	SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
+	MULPD  X1, X2       // X_i     = { real(a) * imag(x[i]), imag(a) * imag(x[i])  }
+	MULPD  X0, X3       // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i])  }
+
+	// X_(i+1) = {
+	//	imag(result[i]):  imag(a)*real(x[i]) + real(a)*imag(x[i]),
+	//	real(result[i]):  real(a)*real(x[i]) - imag(a)*imag(x[i])
+	//  }
+	ADDSUBPD_X2_X3
+
+	// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
+	ADDPD  (DI), X3
+	MOVUPS X3, (DI)   // y[i] = X_i
+	ADDQ   R8, SI     // SI = &(SI[incX])
+	ADDQ   R9, DI     // DI = &(DI[incY])
+	LOOP   axpyi_tail // } while --CX > 0
+
+axpyi_end:
+	RET
--- a/vendor/gonum.org/v1/gonum/internal/asm/c128/axpyincto_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/axpyincto_amd64.s
@ -0,0 +1,141 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//+build !noasm,!appengine
+
+#include "textflag.h"
+
+// MOVDDUP X2, X3
+#define MOVDDUP_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xDA
+// MOVDDUP X4, X5
+#define MOVDDUP_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xEC
+// MOVDDUP X6, X7
+#define MOVDDUP_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xFE
+// MOVDDUP X8, X9
+#define MOVDDUP_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC8
+
+// ADDSUBPD X2, X3
+#define ADDSUBPD_X2_X3 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA
+// ADDSUBPD X4, X5
+#define ADDSUBPD_X4_X5 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC
+// ADDSUBPD X6, X7
+#define ADDSUBPD_X6_X7 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE
+// ADDSUBPD X8, X9
+#define ADDSUBPD_X8_X9 BYTE $0x66; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8
+
+// func AxpyIncTo(dst []complex128, incDst, idst uintptr, alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr)
+TEXT ·AxpyIncTo(SB), NOSPLIT, $0
+	MOVQ   dst_base+0(FP), DI // DI = &dst
+	MOVQ   x_base+56(FP), SI  // SI = &x
+	MOVQ   y_base+80(FP), DX  // DX = &y
+	MOVQ   n+104(FP), CX      // CX = n
+	CMPQ   CX, $0             // if n==0 { return }
+	JE     axpyi_end
+	MOVQ   ix+128(FP), R8     // R8 = ix  // Load the first index
+	SHLQ   $4, R8             // R8 *= sizeof(complex128)
+	MOVQ   iy+136(FP), R9     // R9 = iy
+	SHLQ   $4, R9             // R9 *= sizeof(complex128)
+	MOVQ   idst+32(FP), R10   // R10 = idst
+	SHLQ   $4, R10            // R10 *= sizeof(complex128)
+	LEAQ   (SI)(R8*1), SI     // SI = &(x[ix])
+	LEAQ   (DX)(R9*1), DX     // DX = &(y[iy])
+	LEAQ   (DI)(R10*1), DI    // DI = &(dst[idst])
+	MOVQ   incX+112(FP), R8   // R8 = incX
+	SHLQ   $4, R8             // R8 *= sizeof(complex128)
+	MOVQ   incY+120(FP), R9   // R9 = incY
+	SHLQ   $4, R9             // R9 *= sizeof(complex128)
+	MOVQ   incDst+24(FP), R10 // R10 = incDst
+	SHLQ   $4, R10            // R10 *= sizeof(complex128)
+	MOVUPS alpha+40(FP), X0   // X0 = { imag(a), real(a) }
+	MOVAPS X0, X1
+	SHUFPD $0x1, X1, X1       // X1 = { real(a), imag(a) }
+	MOVAPS X0, X10            // Copy X0 and X1 for pipelining
+	MOVAPS X1, X11
+	MOVQ   CX, BX
+	ANDQ   $3, CX             // CX = n % 4
+	SHRQ   $2, BX             // BX = floor( n / 4 )
+	JZ     axpyi_tail         // if BX == 0 { goto axpyi_tail }
+
+axpyi_loop: // do {
+	MOVUPS (SI), X2       // X_i = { imag(x[i]), real(x[i]) }
+	MOVUPS (SI)(R8*1), X4
+	LEAQ   (SI)(R8*2), SI // SI = &(SI[incX*2])
+
+	MOVUPS (SI), X6
+	MOVUPS (SI)(R8*1), X8
+
+	// X_(i+1) = { real(x[i], real(x[i]) }
+	MOVDDUP_X2_X3
+	MOVDDUP_X4_X5
+	MOVDDUP_X6_X7
+	MOVDDUP_X8_X9
+
+	// X_i = { imag(x[i]), imag(x[i]) }
+	SHUFPD $0x3, X2, X2
+	SHUFPD $0x3, X4, X4
+	SHUFPD $0x3, X6, X6
+	SHUFPD $0x3, X8, X8
+
+	// X_i     = { real(a) * imag(x[i]), imag(a) * imag(x[i])  }
+	// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i])  }
+	MULPD X1, X2
+	MULPD X0, X3
+	MULPD X11, X4
+	MULPD X10, X5
+	MULPD X1, X6
+	MULPD X0, X7
+	MULPD X11, X8
+	MULPD X10, X9
+
+	// X_(i+1) = {
+	//	imag(result[i]):  imag(a)*real(x[i]) + real(a)*imag(x[i]),
+	//	real(result[i]):  real(a)*real(x[i]) - imag(a)*imag(x[i])
+	//  }
+	ADDSUBPD_X2_X3
+	ADDSUBPD_X4_X5
+	ADDSUBPD_X6_X7
+	ADDSUBPD_X8_X9
+
+	// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
+	ADDPD  (DX), X3
+	ADDPD  (DX)(R9*1), X5
+	LEAQ   (DX)(R9*2), DX  // DX = &(DX[incY*2])
+	ADDPD  (DX), X7
+	ADDPD  (DX)(R9*1), X9
+	MOVUPS X3, (DI)        // dst[i] = X_(i+1)
+	MOVUPS X5, (DI)(R10*1)
+	LEAQ   (DI)(R10*2), DI
+	MOVUPS X7, (DI)
+	MOVUPS X9, (DI)(R10*1)
+	LEAQ   (SI)(R8*2), SI  // SI = &(SI[incX*2])
+	LEAQ   (DX)(R9*2), DX  // DX = &(DX[incY*2])
+	LEAQ   (DI)(R10*2), DI // DI = &(DI[incDst*2])
+	DECQ   BX
+	JNZ    axpyi_loop      // } while --BX > 0
+	CMPQ   CX, $0          // if CX == 0 { return }
+	JE     axpyi_end
+
+axpyi_tail: // do {
+	MOVUPS (SI), X2     // X_i = { imag(x[i]), real(x[i]) }
+	MOVDDUP_X2_X3       // X_(i+1) = { real(x[i], real(x[i]) }
+	SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
+	MULPD  X1, X2       // X_i     = { real(a) * imag(x[i]), imag(a) * imag(x[i])  }
+	MULPD  X0, X3       // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i])  }
+
+	// X_(i+1) = {
+	//	imag(result[i]):  imag(a)*real(x[i]) + real(a)*imag(x[i]),
+	//	real(result[i]):  real(a)*real(x[i]) - imag(a)*imag(x[i])
+	//  }
+	ADDSUBPD_X2_X3
+
+	// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
+	ADDPD  (DX), X3
+	MOVUPS X3, (DI)   // y[i] X_(i+1)
+	ADDQ   R8, SI     // SI += incX
+	ADDQ   R9, DX     // DX += incY
+	ADDQ   R10, DI    // DI += incDst
+	LOOP   axpyi_tail // } while --CX > 0
+
+axpyi_end:
+	RET
--- a/vendor/gonum.org/v1/gonum/internal/asm/c128/axpyunitary_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/axpyunitary_amd64.s
@ -0,0 +1,122 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//+build !noasm,!appengine
+
+#include "textflag.h"
+
+// MOVDDUP X2, X3
+#define MOVDDUP_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xDA
+// MOVDDUP X4, X5
+#define MOVDDUP_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xEC
+// MOVDDUP X6, X7
+#define MOVDDUP_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xFE
+// MOVDDUP X8, X9
+#define MOVDDUP_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC8
+
+// ADDSUBPD X2, X3
+#define ADDSUBPD_X2_X3 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA
+// ADDSUBPD X4, X5
+#define ADDSUBPD_X4_X5 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC
+// ADDSUBPD X6, X7
+#define ADDSUBPD_X6_X7 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE
+// ADDSUBPD X8, X9
+#define ADDSUBPD_X8_X9 BYTE $0x66; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8
+
+// func AxpyUnitary(alpha complex128, x, y []complex128)
+TEXT ·AxpyUnitary(SB), NOSPLIT, $0
+	MOVQ    x_base+16(FP), SI // SI = &x
+	MOVQ    y_base+40(FP), DI // DI = &y
+	MOVQ    x_len+24(FP), CX  // CX = min( len(x), len(y) )
+	CMPQ    y_len+48(FP), CX
+	CMOVQLE y_len+48(FP), CX
+	CMPQ    CX, $0            // if CX == 0 { return }
+	JE      caxy_end
+	PXOR    X0, X0            // Clear work registers and cache-align loop
+	PXOR    X1, X1
+	MOVUPS  alpha+0(FP), X0   // X0 = { imag(a), real(a) }
+	MOVAPS  X0, X1
+	SHUFPD  $0x1, X1, X1      // X1 = { real(a), imag(a) }
+	XORQ    AX, AX            // i = 0
+	MOVAPS  X0, X10           // Copy X0 and X1 for pipelining
+	MOVAPS  X1, X11
+	MOVQ    CX, BX
+	ANDQ    $3, CX            // CX = n % 4
+	SHRQ    $2, BX            // BX = floor( n / 4 )
+	JZ      caxy_tail         // if BX == 0 { goto caxy_tail }
+
+caxy_loop: // do {
+	MOVUPS (SI)(AX*8), X2   // X_i = { imag(x[i]), real(x[i]) }
+	MOVUPS 16(SI)(AX*8), X4
+	MOVUPS 32(SI)(AX*8), X6
+	MOVUPS 48(SI)(AX*8), X8
+
+	// X_(i+1) = { real(x[i], real(x[i]) }
+	MOVDDUP_X2_X3
+	MOVDDUP_X4_X5
+	MOVDDUP_X6_X7
+	MOVDDUP_X8_X9
+
+	// X_i = { imag(x[i]), imag(x[i]) }
+	SHUFPD $0x3, X2, X2
+	SHUFPD $0x3, X4, X4
+	SHUFPD $0x3, X6, X6
+	SHUFPD $0x3, X8, X8
+
+	// X_i     = { real(a) * imag(x[i]), imag(a) * imag(x[i])  }
+	// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i])  }
+	MULPD X1, X2
+	MULPD X0, X3
+	MULPD X11, X4
+	MULPD X10, X5
+	MULPD X1, X6
+	MULPD X0, X7
+	MULPD X11, X8
+	MULPD X10, X9
+
+	// X_(i+1) = {
+	//	imag(result[i]):  imag(a)*real(x[i]) + real(a)*imag(x[i]),
+	//	real(result[i]):  real(a)*real(x[i]) - imag(a)*imag(x[i])
+	//  }
+	ADDSUBPD_X2_X3
+	ADDSUBPD_X4_X5
+	ADDSUBPD_X6_X7
+	ADDSUBPD_X8_X9
+
+	// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
+	ADDPD  (DI)(AX*8), X3
+	ADDPD  16(DI)(AX*8), X5
+	ADDPD  32(DI)(AX*8), X7
+	ADDPD  48(DI)(AX*8), X9
+	MOVUPS X3, (DI)(AX*8)   // y[i] = X_(i+1)
+	MOVUPS X5, 16(DI)(AX*8)
+	MOVUPS X7, 32(DI)(AX*8)
+	MOVUPS X9, 48(DI)(AX*8)
+	ADDQ   $8, AX           // i += 8
+	DECQ   BX
+	JNZ    caxy_loop        // } while --BX > 0
+	CMPQ   CX, $0           // if CX == 0 { return }
+	JE     caxy_end
+
+caxy_tail: // do {
+	MOVUPS (SI)(AX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
+	MOVDDUP_X2_X3         // X_(i+1) = { real(x[i], real(x[i]) }
+	SHUFPD $0x3, X2, X2   // X_i = { imag(x[i]), imag(x[i]) }
+	MULPD  X1, X2         // X_i     = { real(a) * imag(x[i]), imag(a) * imag(x[i])  }
+	MULPD  X0, X3         // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i])  }
+
+	// X_(i+1) = {
+	//	imag(result[i]):  imag(a)*real(x[i]) + real(a)*imag(x[i]),
+	//	real(result[i]):  real(a)*real(x[i]) - imag(a)*imag(x[i])
+	//  }
+	ADDSUBPD_X2_X3
+
+	// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
+	ADDPD  (DI)(AX*8), X3
+	MOVUPS X3, (DI)(AX*8) // y[i] = X_(i+1)
+	ADDQ   $2, AX         // i += 2
+	LOOP   caxy_tail      // }  while --CX > 0
+
+caxy_end:
+	RET
--- a/vendor/gonum.org/v1/gonum/internal/asm/c128/axpyunitaryto_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/axpyunitaryto_amd64.s
@ -0,0 +1,123 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//+build !noasm,!appengine
+
+#include "textflag.h"
+
+// MOVDDUP X2, X3
+#define MOVDDUP_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xDA
+// MOVDDUP X4, X5
+#define MOVDDUP_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xEC
+// MOVDDUP X6, X7
+#define MOVDDUP_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xFE
+// MOVDDUP X8, X9
+#define MOVDDUP_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC8
+
+// ADDSUBPD X2, X3
+#define ADDSUBPD_X2_X3 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA
+// ADDSUBPD X4, X5
+#define ADDSUBPD_X4_X5 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC
+// ADDSUBPD X6, X7
+#define ADDSUBPD_X6_X7 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE
+// ADDSUBPD X8, X9
+#define ADDSUBPD_X8_X9 BYTE $0x66; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8
+
+// func AxpyUnitaryTo(dst []complex128, alpha complex64, x, y []complex128)
+TEXT ·AxpyUnitaryTo(SB), NOSPLIT, $0
+	MOVQ    dst_base+0(FP), DI // DI = &dst
+	MOVQ    x_base+40(FP), SI  // SI = &x
+	MOVQ    y_base+64(FP), DX  // DX = &y
+	MOVQ    x_len+48(FP), CX   // CX = min( len(x), len(y), len(dst) )
+	CMPQ    y_len+72(FP), CX
+	CMOVQLE y_len+72(FP), CX
+	CMPQ    dst_len+8(FP), CX
+	CMOVQLE dst_len+8(FP), CX
+	CMPQ    CX, $0             // if CX == 0 { return }
+	JE      caxy_end
+	MOVUPS  alpha+24(FP), X0   // X0 = { imag(a), real(a) }
+	MOVAPS  X0, X1
+	SHUFPD  $0x1, X1, X1       // X1 = { real(a), imag(a) }
+	XORQ    AX, AX             // i = 0
+	MOVAPS  X0, X10            // Copy X0 and X1 for pipelining
+	MOVAPS  X1, X11
+	MOVQ    CX, BX
+	ANDQ    $3, CX             // CX = n % 4
+	SHRQ    $2, BX             // BX = floor( n / 4 )
+	JZ      caxy_tail          // if BX == 0 { goto caxy_tail }
+
+caxy_loop: // do {
+	MOVUPS (SI)(AX*8), X2   // X_i = { imag(x[i]), real(x[i]) }
+	MOVUPS 16(SI)(AX*8), X4
+	MOVUPS 32(SI)(AX*8), X6
+	MOVUPS 48(SI)(AX*8), X8
+
+	// X_(i+1) = { real(x[i], real(x[i]) }
+	MOVDDUP_X2_X3 // Load and duplicate imag elements (xi, xi)
+	MOVDDUP_X4_X5
+	MOVDDUP_X6_X7
+	MOVDDUP_X8_X9
+
+	// X_i = { imag(x[i]), imag(x[i]) }
+	SHUFPD $0x3, X2, X2 // duplicate real elements (xr, xr)
+	SHUFPD $0x3, X4, X4
+	SHUFPD $0x3, X6, X6
+	SHUFPD $0x3, X8, X8
+
+	// X_i     = { real(a) * imag(x[i]), imag(a) * imag(x[i])  }
+	// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i])  }
+	MULPD X1, X2
+	MULPD X0, X3
+	MULPD X11, X4
+	MULPD X10, X5
+	MULPD X1, X6
+	MULPD X0, X7
+	MULPD X11, X8
+	MULPD X10, X9
+
+	// X_(i+1) = {
+	//	imag(result[i]):  imag(a)*real(x[i]) + real(a)*imag(x[i]),
+	//	real(result[i]):  real(a)*real(x[i]) - imag(a)*imag(x[i])
+	//  }
+	ADDSUBPD_X2_X3
+	ADDSUBPD_X4_X5
+	ADDSUBPD_X6_X7
+	ADDSUBPD_X8_X9
+
+	// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
+	ADDPD  (DX)(AX*8), X3
+	ADDPD  16(DX)(AX*8), X5
+	ADDPD  32(DX)(AX*8), X7
+	ADDPD  48(DX)(AX*8), X9
+	MOVUPS X3, (DI)(AX*8)   // y[i] = X_(i+1)
+	MOVUPS X5, 16(DI)(AX*8)
+	MOVUPS X7, 32(DI)(AX*8)
+	MOVUPS X9, 48(DI)(AX*8)
+	ADDQ   $8, AX           // i += 8
+	DECQ   BX
+	JNZ    caxy_loop        // } while --BX > 0
+	CMPQ   CX, $0           // if CX == 0 { return }
+	JE     caxy_end
+
+caxy_tail: // Same calculation, but read in values to avoid trampling memory
+	MOVUPS (SI)(AX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
+	MOVDDUP_X2_X3         // X_(i+1) = { real(x[i], real(x[i]) }
+	SHUFPD $0x3, X2, X2   // X_i = { imag(x[i]), imag(x[i]) }
+	MULPD  X1, X2         // X_i     = { real(a) * imag(x[i]), imag(a) * imag(x[i])  }
+	MULPD  X0, X3         // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i])  }
+
+	// X_(i+1) = {
+	//	imag(result[i]):  imag(a)*real(x[i]) + real(a)*imag(x[i]),
+	//	real(result[i]):  real(a)*real(x[i]) - imag(a)*imag(x[i])
+	//  }
+	ADDSUBPD_X2_X3
+
+	// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
+	ADDPD  (DX)(AX*8), X3
+	MOVUPS X3, (DI)(AX*8) // y[i] = X_(i+1)
+	ADDQ   $2, AX         // i += 2
+	LOOP   caxy_tail      // }  while --CX > 0
+
+caxy_end:
+	RET
--- a/vendor/gonum.org/v1/gonum/internal/asm/c128/doc.go
+++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/doc.go
@ -0,0 +1,6 @@
+// Copyright ©2017 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package c128 provides complex128 vector primitives.
+package c128 // import "gonum.org/v1/gonum/internal/asm/c128"
--- a/vendor/gonum.org/v1/gonum/internal/asm/c128/dotcinc_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/dotcinc_amd64.s
@ -0,0 +1,153 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//+build !noasm,!appengine
+
+#include "textflag.h"
+
+#define MOVDDUP_XPTR__X3    LONG $0x1E120FF2 // MOVDDUP (SI), X3
+#define MOVDDUP_XPTR_INCX__X5    LONG $0x120F42F2; WORD $0x062C // MOVDDUP (SI)(R8*1), X5
+#define MOVDDUP_XPTR_INCX_2__X7    LONG $0x120F42F2; WORD $0x463C // MOVDDUP (SI)(R8*2), X7
+#define MOVDDUP_XPTR_INCx3X__X9    LONG $0x120F46F2; WORD $0x0E0C // MOVDDUP (SI)(R9*1), X9
+
+#define MOVDDUP_8_XPTR__X2    LONG $0x56120FF2; BYTE $0x08 // MOVDDUP 8(SI), X2
+#define MOVDDUP_8_XPTR_INCX__X4    LONG $0x120F42F2; WORD $0x0664; BYTE $0x08 // MOVDDUP 8(SI)(R8*1), X4
+#define MOVDDUP_8_XPTR_INCX_2__X6    LONG $0x120F42F2; WORD $0x4674; BYTE $0x08 // MOVDDUP 8(SI)(R8*2), X6
+#define MOVDDUP_8_XPTR_INCx3X__X8    LONG $0x120F46F2; WORD $0x0E44; BYTE $0x08 // MOVDDUP 8(SI)(R9*1), X8
+
+#define ADDSUBPD_X2_X3    LONG $0xDAD00F66 // ADDSUBPD X2, X3
+#define ADDSUBPD_X4_X5    LONG $0xECD00F66 // ADDSUBPD X4, X5
+#define ADDSUBPD_X6_X7    LONG $0xFED00F66 // ADDSUBPD X6, X7
+#define ADDSUBPD_X8_X9    LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
+
+#define X_PTR SI
+#define Y_PTR DI
+#define LEN CX
+#define TAIL BX
+#define SUM X0
+#define P_SUM X1
+#define INC_X R8
+#define INCx3_X R9
+#define INC_Y R10
+#define INCx3_Y R11
+#define NEG1 X15
+#define P_NEG1 X14
+
+// func DotcInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128)
+TEXT ·DotcInc(SB), NOSPLIT, $0
+	MOVQ   x_base+0(FP), X_PTR       // X_PTR = &x
+	MOVQ   y_base+24(FP), Y_PTR      // Y_PTR = &y
+	MOVQ   n+48(FP), LEN             // LEN = n
+	PXOR   SUM, SUM                  // SUM = 0
+	CMPQ   LEN, $0                   // if LEN == 0 { return }
+	JE     dot_end
+	PXOR   P_SUM, P_SUM              // P_SUM = 0
+	MOVQ   ix+72(FP), INC_X          // INC_X = ix * sizeof(complex128)
+	SHLQ   $4, INC_X
+	MOVQ   iy+80(FP), INC_Y          // INC_Y = iy * sizeof(complex128)
+	SHLQ   $4, INC_Y
+	LEAQ   (X_PTR)(INC_X*1), X_PTR   // X_PTR = &(X_PTR[ix])
+	LEAQ   (Y_PTR)(INC_Y*1), Y_PTR   // Y_PTR = &(Y_PTR[iy])
+	MOVQ   incX+56(FP), INC_X        // INC_X = incX
+	SHLQ   $4, INC_X                 // INC_X *=  sizeof(complex128)
+	MOVQ   incY+64(FP), INC_Y        // INC_Y = incY
+	SHLQ   $4, INC_Y                 // INC_Y *=  sizeof(complex128)
+	MOVSD  $(-1.0), NEG1
+	SHUFPD $0, NEG1, NEG1            // { -1, -1 }
+	MOVQ   LEN, TAIL
+	ANDQ   $3, TAIL                  // TAIL = n % 4
+	SHRQ   $2, LEN                   // LEN = floor( n / 4 )
+	JZ     dot_tail                  // if n <= 4 { goto dot_tail }
+	MOVAPS NEG1, P_NEG1              // Copy NEG1 to P_NEG1 for pipelining
+	LEAQ   (INC_X)(INC_X*2), INCx3_X // INCx3_X = 3 * incX * sizeof(complex128)
+	LEAQ   (INC_Y)(INC_Y*2), INCx3_Y // INCx3_Y = 3 * incY * sizeof(complex128)
+
+dot_loop: // do {
+	MOVDDUP_XPTR__X3        // X_(i+1) = { real(x[i], real(x[i]) }
+	MOVDDUP_XPTR_INCX__X5
+	MOVDDUP_XPTR_INCX_2__X7
+	MOVDDUP_XPTR_INCx3X__X9
+
+	MOVDDUP_8_XPTR__X2        // X_i = { imag(x[i]), imag(x[i]) }
+	MOVDDUP_8_XPTR_INCX__X4
+	MOVDDUP_8_XPTR_INCX_2__X6
+	MOVDDUP_8_XPTR_INCx3X__X8
+
+	// X_i = { -imag(x[i]), -imag(x[i]) }
+	MULPD NEG1, X2
+	MULPD P_NEG1, X4
+	MULPD NEG1, X6
+	MULPD P_NEG1, X8
+
+	// X_j = { imag(y[i]), real(y[i]) }
+	MOVUPS (Y_PTR), X10
+	MOVUPS (Y_PTR)(INC_Y*1), X11
+	MOVUPS (Y_PTR)(INC_Y*2), X12
+	MOVUPS (Y_PTR)(INCx3_Y*1), X13
+
+	// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i])  }
+	MULPD X10, X3
+	MULPD X11, X5
+	MULPD X12, X7
+	MULPD X13, X9
+
+	// X_j     = { real(y[i]), imag(y[i]) }
+	SHUFPD $0x1, X10, X10
+	SHUFPD $0x1, X11, X11
+	SHUFPD $0x1, X12, X12
+	SHUFPD $0x1, X13, X13
+
+	// X_i     = { real(a) * imag(x[i]), imag(a) * imag(x[i])  }
+	MULPD X10, X2
+	MULPD X11, X4
+	MULPD X12, X6
+	MULPD X13, X8
+
+	// X_(i+1) = {
+	//	imag(result[i]):  imag(a)*real(x[i]) + real(a)*imag(x[i]),
+	//	real(result[i]):  real(a)*real(x[i]) - imag(a)*imag(x[i])
+	//  }
+	ADDSUBPD_X2_X3
+	ADDSUBPD_X4_X5
+	ADDSUBPD_X6_X7
+	ADDSUBPD_X8_X9
+
+	// psum += result[i]
+	ADDPD X3, SUM
+	ADDPD X5, P_SUM
+	ADDPD X7, SUM
+	ADDPD X9, P_SUM
+
+	LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[incX*4])
+	LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(Y_PTR[incY*4])
+
+	DECQ  LEN
+	JNZ   dot_loop   // } while --LEN > 0
+	ADDPD P_SUM, SUM // sum += psum
+	CMPQ  TAIL, $0   // if TAIL == 0 { return }
+	JE    dot_end
+
+dot_tail: // do {
+	MOVDDUP_XPTR__X3      // X_(i+1) = { real(x[i], real(x[i]) }
+	MOVDDUP_8_XPTR__X2    // X_i = { imag(x[i]), imag(x[i]) }
+	MULPD  NEG1, X2       // X_i     = { -imag(x[i])          , -imag(x[i])           }
+	MOVUPS (Y_PTR), X10   // X_j     = {  imag(y[i])          ,  real(y[i])           }
+	MULPD  X10, X3        // X_(i+1) = {  imag(a) * real(x[i]),  real(a) * real(x[i]) }
+	SHUFPD $0x1, X10, X10 // X_j     = {  real(y[i])          ,  imag(y[i])           }
+	MULPD  X10, X2        // X_i     = {  real(a) * imag(x[i]),  imag(a) * imag(x[i]) }
+
+	// X_(i+1) = {
+	//	imag(result[i]):  imag(a)*real(x[i]) + real(a)*imag(x[i]),
+	//	real(result[i]):  real(a)*real(x[i]) - imag(a)*imag(x[i])
+	//  }
+	ADDSUBPD_X2_X3
+	ADDPD X3, SUM      // sum += result[i]
+	ADDQ  INC_X, X_PTR // X_PTR += incX
+	ADDQ  INC_Y, Y_PTR // Y_PTR += incY
+	DECQ  TAIL
+	JNZ   dot_tail     // }  while --TAIL > 0
+
+dot_end:
+	MOVUPS SUM, sum+88(FP)
+	RET
--- a/vendor/gonum.org/v1/gonum/internal/asm/c128/dotcunitary_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/dotcunitary_amd64.s
@ -0,0 +1,143 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//+build !noasm,!appengine
+
+#include "textflag.h"
+
+#define MOVDDUP_XPTR_IDX_8__X3    LONG $0x1C120FF2; BYTE $0xC6 // MOVDDUP (SI)(AX*8), X3
+#define MOVDDUP_16_XPTR_IDX_8__X5    LONG $0x6C120FF2; WORD $0x10C6 // MOVDDUP 16(SI)(AX*8), X5
+#define MOVDDUP_32_XPTR_IDX_8__X7    LONG $0x7C120FF2; WORD $0x20C6 // MOVDDUP 32(SI)(AX*8), X7
+#define MOVDDUP_48_XPTR_IDX_8__X9    LONG $0x120F44F2; WORD $0xC64C; BYTE $0x30 // MOVDDUP 48(SI)(AX*8), X9
+
+#define MOVDDUP_XPTR_IIDX_8__X2    LONG $0x14120FF2; BYTE $0xD6 // MOVDDUP (SI)(DX*8), X2
+#define MOVDDUP_16_XPTR_IIDX_8__X4    LONG $0x64120FF2; WORD $0x10D6 // MOVDDUP 16(SI)(DX*8), X4
+#define MOVDDUP_32_XPTR_IIDX_8__X6    LONG $0x74120FF2; WORD $0x20D6 // MOVDDUP 32(SI)(DX*8), X6
+#define MOVDDUP_48_XPTR_IIDX_8__X8    LONG $0x120F44F2; WORD $0xD644; BYTE $0x30 // MOVDDUP 48(SI)(DX*8), X8
+
+#define ADDSUBPD_X2_X3    LONG $0xDAD00F66 // ADDSUBPD X2, X3
+#define ADDSUBPD_X4_X5    LONG $0xECD00F66 // ADDSUBPD X4, X5
+#define ADDSUBPD_X6_X7    LONG $0xFED00F66 // ADDSUBPD X6, X7
+#define ADDSUBPD_X8_X9    LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
+
+#define X_PTR SI
+#define Y_PTR DI
+#define LEN CX
+#define TAIL BX
+#define SUM X0
+#define P_SUM X1
+#define IDX AX
+#define I_IDX DX
+#define NEG1 X15
+#define P_NEG1 X14
+
+// func DotcUnitary(x, y []complex128) (sum complex128)
+TEXT ·DotcUnitary(SB), NOSPLIT, $0
+	MOVQ    x_base+0(FP), X_PTR  // X_PTR = &x
+	MOVQ    y_base+24(FP), Y_PTR // Y_PTR = &y
+	MOVQ    x_len+8(FP), LEN     // LEN = min( len(x), len(y) )
+	CMPQ    y_len+32(FP), LEN
+	CMOVQLE y_len+32(FP), LEN
+	PXOR    SUM, SUM             // sum = 0
+	CMPQ    LEN, $0              // if LEN == 0 { return }
+	JE      dot_end
+	XORPS   P_SUM, P_SUM         // psum = 0
+	MOVSD   $(-1.0), NEG1
+	SHUFPD  $0, NEG1, NEG1       // { -1, -1 }
+	XORQ    IDX, IDX             // i := 0
+	MOVQ    $1, I_IDX            // j := 1
+	MOVQ    LEN, TAIL
+	ANDQ    $3, TAIL             // TAIL = floor( TAIL / 4 )
+	SHRQ    $2, LEN              // LEN = TAIL % 4
+	JZ      dot_tail             // if LEN == 0 { goto dot_tail }
+
+	MOVAPS NEG1, P_NEG1 // Copy NEG1 to P_NEG1 for pipelining
+
+dot_loop: // do {
+	MOVDDUP_XPTR_IDX_8__X3    // X_(i+1) = { real(x[i], real(x[i]) }
+	MOVDDUP_16_XPTR_IDX_8__X5
+	MOVDDUP_32_XPTR_IDX_8__X7
+	MOVDDUP_48_XPTR_IDX_8__X9
+
+	MOVDDUP_XPTR_IIDX_8__X2    // X_i = { imag(x[i]), imag(x[i]) }
+	MOVDDUP_16_XPTR_IIDX_8__X4
+	MOVDDUP_32_XPTR_IIDX_8__X6
+	MOVDDUP_48_XPTR_IIDX_8__X8
+
+	// X_i = { -imag(x[i]), -imag(x[i]) }
+	MULPD NEG1, X2
+	MULPD P_NEG1, X4
+	MULPD NEG1, X6
+	MULPD P_NEG1, X8
+
+	// X_j = { imag(y[i]), real(y[i]) }
+	MOVUPS (Y_PTR)(IDX*8), X10
+	MOVUPS 16(Y_PTR)(IDX*8), X11
+	MOVUPS 32(Y_PTR)(IDX*8), X12
+	MOVUPS 48(Y_PTR)(IDX*8), X13
+
+	// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i])  }
+	MULPD X10, X3
+	MULPD X11, X5
+	MULPD X12, X7
+	MULPD X13, X9
+
+	// X_j = { real(y[i]), imag(y[i]) }
+	SHUFPD $0x1, X10, X10
+	SHUFPD $0x1, X11, X11
+	SHUFPD $0x1, X12, X12
+	SHUFPD $0x1, X13, X13
+
+	// X_i     = { real(a) * imag(x[i]), imag(a) * imag(x[i])  }
+	MULPD X10, X2
+	MULPD X11, X4
+	MULPD X12, X6
+	MULPD X13, X8
+
+	// X_(i+1) = {
+	//	imag(result[i]):  imag(a)*real(x[i]) + real(a)*imag(x[i]),
+	//	real(result[i]):  real(a)*real(x[i]) - imag(a)*imag(x[i])
+	//  }
+	ADDSUBPD_X2_X3
+	ADDSUBPD_X4_X5
+	ADDSUBPD_X6_X7
+	ADDSUBPD_X8_X9
+
+	// psum += result[i]
+	ADDPD X3, SUM
+	ADDPD X5, P_SUM
+	ADDPD X7, SUM
+	ADDPD X9, P_SUM
+
+	ADDQ  $8, IDX    // IDX += 8
+	ADDQ  $8, I_IDX  // I_IDX += 8
+	DECQ  LEN
+	JNZ   dot_loop   // } while --LEN > 0
+	ADDPD P_SUM, SUM // sum += psum
+	CMPQ  TAIL, $0   // if TAIL == 0 { return }
+	JE    dot_end
+
+dot_tail: // do {
+	MOVDDUP_XPTR_IDX_8__X3     // X_(i+1) = {  real(x[i])          ,  real(x[i])           }
+	MOVDDUP_XPTR_IIDX_8__X2    // X_i     = {  imag(x[i])          ,  imag(x[i])           }
+	MULPD  NEG1, X2            // X_i     = { -imag(x[i])          , -imag(x[i])           }
+	MOVUPS (Y_PTR)(IDX*8), X10 // X_j     = {  imag(y[i])          ,  real(y[i])           }
+	MULPD  X10, X3             // X_(i+1) = {  imag(a) * real(x[i]),  real(a) * real(x[i]) }
+	SHUFPD $0x1, X10, X10      // X_j     = {  real(y[i])          ,  imag(y[i])           }
+	MULPD  X10, X2             // X_i     = {  real(a) * imag(x[i]),  imag(a) * imag(x[i]) }
+
+	// X_(i+1) = {
+	//	imag(result[i]):  imag(a)*real(x[i]) + real(a)*imag(x[i]),
+	//	real(result[i]):  real(a)*real(x[i]) - imag(a)*imag(x[i])
+	//  }
+	ADDSUBPD_X2_X3
+	ADDPD X3, SUM   // SUM += result[i]
+	ADDQ  $2, IDX   // IDX += 2
+	ADDQ  $2, I_IDX // I_IDX += 2
+	DECQ  TAIL
+	JNZ   dot_tail  // }  while --TAIL > 0
+
+dot_end:
+	MOVUPS SUM, sum+48(FP)
+	RET
--- a/vendor/gonum.org/v1/gonum/internal/asm/c128/dotuinc_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/dotuinc_amd64.s
@ -0,0 +1,141 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//+build !noasm,!appengine
+
+#include "textflag.h"
+
+#define MOVDDUP_XPTR__X3    LONG $0x1E120FF2 // MOVDDUP (SI), X3
+#define MOVDDUP_XPTR_INCX__X5    LONG $0x120F42F2; WORD $0x062C // MOVDDUP (SI)(R8*1), X5
+#define MOVDDUP_XPTR_INCX_2__X7    LONG $0x120F42F2; WORD $0x463C // MOVDDUP (SI)(R8*2), X7
+#define MOVDDUP_XPTR_INCx3X__X9    LONG $0x120F46F2; WORD $0x0E0C // MOVDDUP (SI)(R9*1), X9
+
+#define MOVDDUP_8_XPTR__X2    LONG $0x56120FF2; BYTE $0x08 // MOVDDUP 8(SI), X2
+#define MOVDDUP_8_XPTR_INCX__X4    LONG $0x120F42F2; WORD $0x0664; BYTE $0x08 // MOVDDUP 8(SI)(R8*1), X4
+#define MOVDDUP_8_XPTR_INCX_2__X6    LONG $0x120F42F2; WORD $0x4674; BYTE $0x08 // MOVDDUP 8(SI)(R8*2), X6
+#define MOVDDUP_8_XPTR_INCx3X__X8    LONG $0x120F46F2; WORD $0x0E44; BYTE $0x08 // MOVDDUP 8(SI)(R9*1), X8
+
+#define ADDSUBPD_X2_X3    LONG $0xDAD00F66 // ADDSUBPD X2, X3
+#define ADDSUBPD_X4_X5    LONG $0xECD00F66 // ADDSUBPD X4, X5
+#define ADDSUBPD_X6_X7    LONG $0xFED00F66 // ADDSUBPD X6, X7
+#define ADDSUBPD_X8_X9    LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
+
+#define X_PTR SI
+#define Y_PTR DI
+#define LEN CX
+#define TAIL BX
+#define SUM X0
+#define P_SUM X1
+#define INC_X R8
+#define INCx3_X R9
+#define INC_Y R10
+#define INCx3_Y R11
+
+// func DotuInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128)
+TEXT ·DotuInc(SB), NOSPLIT, $0
+	MOVQ x_base+0(FP), X_PTR       // X_PTR = &x
+	MOVQ y_base+24(FP), Y_PTR      // Y_PTR = &y
+	MOVQ n+48(FP), LEN             // LEN = n
+	PXOR SUM, SUM                  // sum = 0
+	CMPQ LEN, $0                   // if LEN == 0 { return }
+	JE   dot_end
+	MOVQ ix+72(FP), INC_X          // INC_X = ix * sizeof(complex128)
+	SHLQ $4, INC_X
+	MOVQ iy+80(FP), INC_Y          // INC_Y = iy * sizeof(complex128)
+	SHLQ $4, INC_Y
+	LEAQ (X_PTR)(INC_X*1), X_PTR   // X_PTR = &(X_PTR[ix])
+	LEAQ (Y_PTR)(INC_Y*1), Y_PTR   // Y_PTR = &(Y_PTR[iy])
+	MOVQ incX+56(FP), INC_X        // INC_X = incX
+	SHLQ $4, INC_X                 // INC_X *=  sizeof(complex128)
+	MOVQ incY+64(FP), INC_Y        // INC_Y = incY
+	SHLQ $4, INC_Y                 // INC_Y *=  sizeof(complex128)
+	MOVQ LEN, TAIL
+	ANDQ $3, TAIL                  // LEN = LEN % 4
+	SHRQ $2, LEN                   // LEN = floor( LEN / 4 )
+	JZ   dot_tail                  // if LEN <= 4 { goto dot_tail }
+	PXOR P_SUM, P_SUM              // psum = 0
+	LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = 3 * incX * sizeof(complex128)
+	LEAQ (INC_Y)(INC_Y*2), INCx3_Y // INCx3_Y = 3 * incY * sizeof(complex128)
+
+dot_loop: // do {
+	MOVDDUP_XPTR__X3        // X_(i+1) = { real(x[i], real(x[i]) }
+	MOVDDUP_XPTR_INCX__X5
+	MOVDDUP_XPTR_INCX_2__X7
+	MOVDDUP_XPTR_INCx3X__X9
+
+	MOVDDUP_8_XPTR__X2        // X_i = { imag(x[i]), imag(x[i]) }
+	MOVDDUP_8_XPTR_INCX__X4
+	MOVDDUP_8_XPTR_INCX_2__X6
+	MOVDDUP_8_XPTR_INCx3X__X8
+
+	// X_j = { imag(y[i]), real(y[i]) }
+	MOVUPS (Y_PTR), X10
+	MOVUPS (Y_PTR)(INC_Y*1), X11
+	MOVUPS (Y_PTR)(INC_Y*2), X12
+	MOVUPS (Y_PTR)(INCx3_Y*1), X13
+
+	// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i])  }
+	MULPD X10, X3
+	MULPD X11, X5
+	MULPD X12, X7
+	MULPD X13, X9
+
+	// X_j     = { real(y[i]), imag(y[i]) }
+	SHUFPD $0x1, X10, X10
+	SHUFPD $0x1, X11, X11
+	SHUFPD $0x1, X12, X12
+	SHUFPD $0x1, X13, X13
+
+	// X_i     = { real(a) * imag(x[i]), imag(a) * imag(x[i])  }
+	MULPD X10, X2
+	MULPD X11, X4
+	MULPD X12, X6
+	MULPD X13, X8
+
+	// X_(i+1) = {
+	//	imag(result[i]):  imag(a)*real(x[i]) + real(a)*imag(x[i]),
+	//	real(result[i]):  real(a)*real(x[i]) - imag(a)*imag(x[i])
+	//  }
+	ADDSUBPD_X2_X3
+	ADDSUBPD_X4_X5
+	ADDSUBPD_X6_X7
+	ADDSUBPD_X8_X9
+
+	// psum += result[i]
+	ADDPD X3, SUM
+	ADDPD X5, P_SUM
+	ADDPD X7, SUM
+	ADDPD X9, P_SUM
+
+	LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[incX*4])
+	LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(Y_PTR[incY*4])
+
+	DECQ  LEN
+	JNZ   dot_loop   // } while --BX > 0
+	ADDPD P_SUM, SUM // sum += psum
+	CMPQ  TAIL, $0   // if TAIL == 0 { return }
+	JE    dot_end
+
+dot_tail: // do {
+	MOVDDUP_XPTR__X3      // X_(i+1) = { real(x[i], real(x[i]) }
+	MOVDDUP_8_XPTR__X2    // X_i = { imag(x[i]), imag(x[i]) }
+	MOVUPS (Y_PTR), X10   // X_j     = {  imag(y[i])          ,  real(y[i])           }
+	MULPD  X10, X3        // X_(i+1) = {  imag(a) * real(x[i]),  real(a) * real(x[i]) }
+	SHUFPD $0x1, X10, X10 // X_j     = {  real(y[i])          ,  imag(y[i])           }
+	MULPD  X10, X2        // X_i     = {  real(a) * imag(x[i]),  imag(a) * imag(x[i]) }
+
+	// X_(i+1) = {
+	//	imag(result[i]):  imag(a)*real(x[i]) + real(a)*imag(x[i]),
+	//	real(result[i]):  real(a)*real(x[i]) - imag(a)*imag(x[i])
+	//  }
+	ADDSUBPD_X2_X3
+	ADDPD X3, SUM      // sum += result[i]
+	ADDQ  INC_X, X_PTR // X_PTR += incX
+	ADDQ  INC_Y, Y_PTR // Y_PTR += incY
+	DECQ  TAIL         // --TAIL
+	JNZ   dot_tail     // }  while TAIL > 0
+
+dot_end:
+	MOVUPS SUM, sum+88(FP)
+	RET
--- a/vendor/gonum.org/v1/gonum/internal/asm/c128/dotuunitary_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/dotuunitary_amd64.s
@ -0,0 +1,130 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//+build !noasm,!appengine
+
+#include "textflag.h"
+
+#define MOVDDUP_XPTR_IDX_8__X3    LONG $0x1C120FF2; BYTE $0xC6 // MOVDDUP (SI)(AX*8), X3
+#define MOVDDUP_16_XPTR_IDX_8__X5    LONG $0x6C120FF2; WORD $0x10C6 // MOVDDUP 16(SI)(AX*8), X5
+#define MOVDDUP_32_XPTR_IDX_8__X7    LONG $0x7C120FF2; WORD $0x20C6 // MOVDDUP 32(SI)(AX*8), X7
+#define MOVDDUP_48_XPTR_IDX_8__X9    LONG $0x120F44F2; WORD $0xC64C; BYTE $0x30 // MOVDDUP 48(SI)(AX*8), X9
+
+#define MOVDDUP_XPTR_IIDX_8__X2    LONG $0x14120FF2; BYTE $0xD6 // MOVDDUP (SI)(DX*8), X2
+#define MOVDDUP_16_XPTR_IIDX_8__X4    LONG $0x64120FF2; WORD $0x10D6 // MOVDDUP 16(SI)(DX*8), X4
+#define MOVDDUP_32_XPTR_IIDX_8__X6    LONG $0x74120FF2; WORD $0x20D6 // MOVDDUP 32(SI)(DX*8), X6
+#define MOVDDUP_48_XPTR_IIDX_8__X8    LONG $0x120F44F2; WORD $0xD644; BYTE $0x30 // MOVDDUP 48(SI)(DX*8), X8
+
+#define ADDSUBPD_X2_X3    LONG $0xDAD00F66 // ADDSUBPD X2, X3
+#define ADDSUBPD_X4_X5    LONG $0xECD00F66 // ADDSUBPD X4, X5
+#define ADDSUBPD_X6_X7    LONG $0xFED00F66 // ADDSUBPD X6, X7
+#define ADDSUBPD_X8_X9    LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
+
+#define X_PTR SI
+#define Y_PTR DI
+#define LEN CX
+#define TAIL BX
+#define SUM X0
+#define P_SUM X1
+#define IDX AX
+#define I_IDX DX
+
+// func DotuUnitary(x, y []complex128) (sum complex128)
+TEXT ·DotuUnitary(SB), NOSPLIT, $0
+	MOVQ    x_base+0(FP), X_PTR  // X_PTR = &x
+	MOVQ    y_base+24(FP), Y_PTR // Y_PTR = &y
+	MOVQ    x_len+8(FP), LEN     // LEN = min( len(x), len(y) )
+	CMPQ    y_len+32(FP), LEN
+	CMOVQLE y_len+32(FP), LEN
+	PXOR    SUM, SUM             // SUM = 0
+	CMPQ    LEN, $0              // if LEN == 0 { return }
+	JE      dot_end
+	PXOR    P_SUM, P_SUM         // P_SUM = 0
+	XORQ    IDX, IDX             // IDX = 0
+	MOVQ    $1, DX               // j = 1
+	MOVQ    LEN, TAIL
+	ANDQ    $3, TAIL             // TAIL = floor( LEN / 4 )
+	SHRQ    $2, LEN              // LEN = LEN % 4
+	JZ      dot_tail             // if LEN == 0 { goto dot_tail }
+
+dot_loop: // do {
+	MOVDDUP_XPTR_IDX_8__X3    // X_(i+1) = { real(x[i], real(x[i]) }
+	MOVDDUP_16_XPTR_IDX_8__X5
+	MOVDDUP_32_XPTR_IDX_8__X7
+	MOVDDUP_48_XPTR_IDX_8__X9
+
+	MOVDDUP_XPTR_IIDX_8__X2    // X_i = { imag(x[i]), imag(x[i]) }
+	MOVDDUP_16_XPTR_IIDX_8__X4
+	MOVDDUP_32_XPTR_IIDX_8__X6
+	MOVDDUP_48_XPTR_IIDX_8__X8
+
+	// X_j = { imag(y[i]), real(y[i]) }
+	MOVUPS (Y_PTR)(IDX*8), X10
+	MOVUPS 16(Y_PTR)(IDX*8), X11
+	MOVUPS 32(Y_PTR)(IDX*8), X12
+	MOVUPS 48(Y_PTR)(IDX*8), X13
+
+	// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i])  }
+	MULPD X10, X3
+	MULPD X11, X5
+	MULPD X12, X7
+	MULPD X13, X9
+
+	// X_j = { real(y[i]), imag(y[i]) }
+	SHUFPD $0x1, X10, X10
+	SHUFPD $0x1, X11, X11
+	SHUFPD $0x1, X12, X12
+	SHUFPD $0x1, X13, X13
+
+	// X_i     = { real(a) * imag(x[i]), imag(a) * imag(x[i])  }
+	MULPD X10, X2
+	MULPD X11, X4
+	MULPD X12, X6
+	MULPD X13, X8
+
+	// X_(i+1) = {
+	//	imag(result[i]):  imag(a)*real(x[i]) + real(a)*imag(x[i]),
+	//	real(result[i]):  real(a)*real(x[i]) - imag(a)*imag(x[i])
+	//  }
+	ADDSUBPD_X2_X3
+	ADDSUBPD_X4_X5
+	ADDSUBPD_X6_X7
+	ADDSUBPD_X8_X9
+
+	// psum += result[i]
+	ADDPD X3, SUM
+	ADDPD X5, P_SUM
+	ADDPD X7, SUM
+	ADDPD X9, P_SUM
+
+	ADDQ  $8, IDX    // IDX += 8
+	ADDQ  $8, I_IDX  // I_IDX += 8
+	DECQ  LEN
+	JNZ   dot_loop   // } while --LEN > 0
+	ADDPD P_SUM, SUM // SUM += P_SUM
+	CMPQ  TAIL, $0   // if TAIL == 0 { return }
+	JE    dot_end
+
+dot_tail: // do {
+	MOVDDUP_XPTR_IDX_8__X3     // X_(i+1) = { real(x[i]            , real(x[i])            }
+	MOVDDUP_XPTR_IIDX_8__X2    // X_i     = { imag(x[i])           , imag(x[i])            }
+	MOVUPS (Y_PTR)(IDX*8), X10 // X_j     = {  imag(y[i])          ,  real(y[i])           }
+	MULPD  X10, X3             // X_(i+1) = {  imag(a) * real(x[i]),  real(a) * real(x[i]) }
+	SHUFPD $0x1, X10, X10      // X_j     = {  real(y[i])          ,  imag(y[i])           }
+	MULPD  X10, X2             // X_i     = {  real(a) * imag(x[i]),  imag(a) * imag(x[i]) }
+
+	// X_(i+1) = {
+	//	imag(result[i]):  imag(a)*real(x[i]) + real(a)*imag(x[i]),
+	//	real(result[i]):  real(a)*real(x[i]) - imag(a)*imag(x[i])
+	//  }
+	ADDSUBPD_X2_X3
+	ADDPD X3, SUM   // psum += result[i]
+	ADDQ  $2, IDX   // IDX += 2
+	ADDQ  $2, I_IDX // I_IDX += 2
+	DECQ  TAIL      // --TAIL
+	JNZ   dot_tail  // }  while TAIL > 0
+
+dot_end:
+	MOVUPS SUM, sum+48(FP)
+	RET
--- a/vendor/gonum.org/v1/gonum/internal/asm/c128/dscalinc_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/dscalinc_amd64.s
@ -0,0 +1,69 @@
+// Copyright ©2017 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//+build !noasm,!appengine
+
+#include "textflag.h"
+
+#define SRC SI
+#define DST SI
+#define LEN CX
+#define TAIL BX
+#define INC R9
+#define INC3 R10
+#define ALPHA X0
+#define ALPHA_2 X1
+
+#define MOVDDUP_ALPHA    LONG $0x44120FF2; WORD $0x0824 // MOVDDUP 8(SP), X0
+
+// func DscalInc(alpha float64, x []complex128, n, inc uintptr)
+TEXT ·DscalInc(SB), NOSPLIT, $0
+	MOVQ x_base+8(FP), SRC // SRC = &x
+	MOVQ n+32(FP), LEN     // LEN = n
+	CMPQ LEN, $0           // if LEN == 0 { return }
+	JE   dscal_end
+
+	MOVDDUP_ALPHA             // ALPHA = alpha
+	MOVQ   inc+40(FP), INC    // INC = inc
+	SHLQ   $4, INC            // INC = INC * sizeof(complex128)
+	LEAQ   (INC)(INC*2), INC3 // INC3 = 3 * INC
+	MOVUPS ALPHA, ALPHA_2     // Copy ALPHA and ALPHA_2 for pipelining
+	MOVQ   LEN, TAIL          // TAIL = LEN
+	SHRQ   $2, LEN            // LEN = floor( n / 4 )
+	JZ     dscal_tail         // if LEN == 0 { goto dscal_tail }
+
+dscal_loop: // do {
+	MOVUPS (SRC), X2         // X_i = x[i]
+	MOVUPS (SRC)(INC*1), X3
+	MOVUPS (SRC)(INC*2), X4
+	MOVUPS (SRC)(INC3*1), X5
+
+	MULPD ALPHA, X2   // X_i *= ALPHA
+	MULPD ALPHA_2, X3
+	MULPD ALPHA, X4
+	MULPD ALPHA_2, X5
+
+	MOVUPS X2, (DST)         // x[i] = X_i
+	MOVUPS X3, (DST)(INC*1)
+	MOVUPS X4, (DST)(INC*2)
+	MOVUPS X5, (DST)(INC3*1)
+
+	LEAQ (SRC)(INC*4), SRC // SRC += INC*4
+	DECQ LEN
+	JNZ  dscal_loop        // } while --LEN > 0
+
+dscal_tail:
+	ANDQ $3, TAIL  // TAIL = TAIL % 4
+	JE   dscal_end // if TAIL == 0 { return }
+
+dscal_tail_loop: // do {
+	MOVUPS (SRC), X2       // X_i = x[i]
+	MULPD  ALPHA, X2       // X_i *= ALPHA
+	MOVUPS X2, (DST)       // x[i] = X_i
+	ADDQ   INC, SRC        // SRC += INC
+	DECQ   TAIL
+	JNZ    dscal_tail_loop // } while --TAIL > 0
+
+dscal_end:
+	RET
--- a/vendor/gonum.org/v1/gonum/internal/asm/c128/dscalunitary_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/dscalunitary_amd64.s
@ -0,0 +1,66 @@
+// Copyright ©2017 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//+build !noasm,!appengine
+
+#include "textflag.h"
+
+#define SRC SI
+#define DST SI
+#define LEN CX
+#define IDX AX
+#define TAIL BX
+#define ALPHA X0
+#define ALPHA_2 X1
+
+#define MOVDDUP_ALPHA    LONG $0x44120FF2; WORD $0x0824 // MOVDDUP 8(SP), X0
+
+// func DscalUnitary(alpha float64, x []complex128)
+TEXT ·DscalUnitary(SB), NOSPLIT, $0
+	MOVQ x_base+8(FP), SRC // SRC = &x
+	MOVQ x_len+16(FP), LEN // LEN = len(x)
+	CMPQ LEN, $0           // if LEN == 0 { return }
+	JE   dscal_end
+
+	MOVDDUP_ALPHA         // ALPHA = alpha
+	XORQ   IDX, IDX       // IDX = 0
+	MOVUPS ALPHA, ALPHA_2 // Copy ALPHA to ALPHA_2 for pipelining
+	MOVQ   LEN, TAIL      // TAIL = LEN
+	SHRQ   $2, LEN        // LEN = floor( n / 4 )
+	JZ     dscal_tail     // if LEN == 0 { goto dscal_tail }
+
+dscal_loop: // do {
+	MOVUPS (SRC)(IDX*8), X2   // X_i = x[i]
+	MOVUPS 16(SRC)(IDX*8), X3
+	MOVUPS 32(SRC)(IDX*8), X4
+	MOVUPS 48(SRC)(IDX*8), X5
+
+	MULPD ALPHA, X2   // X_i *= ALPHA
+	MULPD ALPHA_2, X3
+	MULPD ALPHA, X4
+	MULPD ALPHA_2, X5
+
+	MOVUPS X2, (DST)(IDX*8)   // x[i] = X_i
+	MOVUPS X3, 16(DST)(IDX*8)
+	MOVUPS X4, 32(DST)(IDX*8)
+	MOVUPS X5, 48(DST)(IDX*8)
+
+	ADDQ $8, IDX    // IDX += 8
+	DECQ LEN
+	JNZ  dscal_loop // } while --LEN > 0
+
+dscal_tail:
+	ANDQ $3, TAIL  // TAIL = TAIL % 4
+	JZ   dscal_end // if TAIL == 0 { return }
+
+dscal_tail_loop: // do {
+	MOVUPS (SRC)(IDX*8), X2 // X_i = x[i]
+	MULPD  ALPHA, X2        // X_i *= ALPHA
+	MOVUPS X2, (DST)(IDX*8) // x[i] = X_i
+	ADDQ   $2, IDX          // IDX += 2
+	DECQ   TAIL
+	JNZ    dscal_tail_loop  // } while --TAIL > 0
+
+dscal_end:
+	RET
--- a/vendor/gonum.org/v1/gonum/internal/asm/c128/scal.go
+++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/scal.go
@ -0,0 +1,31 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package c128
+
+// ScalUnitaryTo is
+//  for i, v := range x {
+//  	dst[i] = alpha * v
+//  }
+func ScalUnitaryTo(dst []complex128, alpha complex128, x []complex128) {
+	for i, v := range x {
+		dst[i] = alpha * v
+	}
+}
+
+// ScalIncTo is
+//  var idst, ix uintptr
+//  for i := 0; i < int(n); i++ {
+//  	dst[idst] = alpha * x[ix]
+//  	ix += incX
+//  	idst += incDst
+//  }
+func ScalIncTo(dst []complex128, incDst uintptr, alpha complex128, x []complex128, n, incX uintptr) {
+	var idst, ix uintptr
+	for i := 0; i < int(n); i++ {
+		dst[idst] = alpha * x[ix]
+		ix += incX
+		idst += incDst
+	}
+}
--- a/vendor/gonum.org/v1/gonum/internal/asm/c128/scalUnitary_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/scalUnitary_amd64.s
@ -0,0 +1,116 @@
+// Copyright ©2017 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//+build !noasm,!appengine
+
+#include "textflag.h"
+
+#define SRC SI
+#define DST SI
+#define LEN CX
+#define IDX AX
+#define TAIL BX
+#define ALPHA X0
+#define ALPHA_C X1
+#define ALPHA2 X10
+#define ALPHA_C2 X11
+
+#define MOVDDUP_X2_X3    LONG $0xDA120FF2 // MOVDDUP X2, X3
+#define MOVDDUP_X4_X5    LONG $0xEC120FF2 // MOVDDUP X4, X5
+#define MOVDDUP_X6_X7    LONG $0xFE120FF2 // MOVDDUP X6, X7
+#define MOVDDUP_X8_X9    LONG $0x120F45F2; BYTE $0xC8 // MOVDDUP X8, X9
+
+#define ADDSUBPD_X2_X3    LONG $0xDAD00F66 // ADDSUBPD X2, X3
+#define ADDSUBPD_X4_X5    LONG $0xECD00F66 // ADDSUBPD X4, X5
+#define ADDSUBPD_X6_X7    LONG $0xFED00F66 // ADDSUBPD X6, X7
+#define ADDSUBPD_X8_X9    LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
+
+// func ScalUnitary(alpha complex128, x []complex128)
+TEXT ·ScalUnitary(SB), NOSPLIT, $0
+	MOVQ x_base+16(FP), SRC // SRC = &x
+	MOVQ x_len+24(FP), LEN  // LEN = len(x)
+	CMPQ LEN, $0            // if LEN == 0 { return }
+	JE   scal_end
+
+	MOVUPS alpha+0(FP), ALPHA     // ALPHA = { imag(alpha), real(alpha) }
+	MOVAPS ALPHA, ALPHA_C
+	SHUFPD $0x1, ALPHA_C, ALPHA_C // ALPHA_C = { real(alpha), imag(alpha) }
+
+	XORQ   IDX, IDX          // IDX = 0
+	MOVAPS ALPHA, ALPHA2     // Copy ALPHA and ALPHA_C for pipelining
+	MOVAPS ALPHA_C, ALPHA_C2
+	MOVQ   LEN, TAIL
+	SHRQ   $2, LEN           // LEN = floor( n / 4 )
+	JZ     scal_tail         // if BX == 0 { goto scal_tail }
+
+scal_loop: // do {
+	MOVUPS (SRC)(IDX*8), X2   // X_i = { imag(x[i]), real(x[i]) }
+	MOVUPS 16(SRC)(IDX*8), X4
+	MOVUPS 32(SRC)(IDX*8), X6
+	MOVUPS 48(SRC)(IDX*8), X8
+
+	// X_(i+1) = { real(x[i], real(x[i]) }
+	MOVDDUP_X2_X3
+	MOVDDUP_X4_X5
+	MOVDDUP_X6_X7
+	MOVDDUP_X8_X9
+
+	// X_i = { imag(x[i]), imag(x[i]) }
+	SHUFPD $0x3, X2, X2
+	SHUFPD $0x3, X4, X4
+	SHUFPD $0x3, X6, X6
+	SHUFPD $0x3, X8, X8
+
+	// X_i     = { real(ALPHA) * imag(x[i]), imag(ALPHA) * imag(x[i])  }
+	// X_(i+1) = { imag(ALPHA) * real(x[i]), real(ALPHA) * real(x[i])  }
+	MULPD ALPHA_C, X2
+	MULPD ALPHA, X3
+	MULPD ALPHA_C2, X4
+	MULPD ALPHA2, X5
+	MULPD ALPHA_C, X6
+	MULPD ALPHA, X7
+	MULPD ALPHA_C2, X8
+	MULPD ALPHA2, X9
+
+	// X_(i+1) = {
+	//	imag(result[i]):  imag(ALPHA)*real(x[i]) + real(ALPHA)*imag(x[i]),
+	//	real(result[i]):  real(ALPHA)*real(x[i]) - imag(ALPHA)*imag(x[i])
+	//  }
+	ADDSUBPD_X2_X3
+	ADDSUBPD_X4_X5
+	ADDSUBPD_X6_X7
+	ADDSUBPD_X8_X9
+
+	MOVUPS X3, (DST)(IDX*8)   // x[i] = X_(i+1)
+	MOVUPS X5, 16(DST)(IDX*8)
+	MOVUPS X7, 32(DST)(IDX*8)
+	MOVUPS X9, 48(DST)(IDX*8)
+	ADDQ   $8, IDX            // IDX += 8
+	DECQ   LEN
+	JNZ    scal_loop          // } while --LEN > 0
+
+scal_tail:
+	ANDQ $3, TAIL // TAIL = TAIL % 4
+	JZ   scal_end // if TAIL == 0 { return }
+
+scal_tail_loop: // do {
+	MOVUPS (SRC)(IDX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
+	MOVDDUP_X2_X3           // X_(i+1) = { real(x[i], real(x[i]) }
+	SHUFPD $0x3, X2, X2     // X_i = { imag(x[i]), imag(x[i]) }
+	MULPD  ALPHA_C, X2      // X_i     = { real(ALPHA) * imag(x[i]), imag(ALPHA) * imag(x[i])  }
+	MULPD  ALPHA, X3        // X_(i+1) = { imag(ALPHA) * real(x[i]), real(ALPHA) * real(x[i])  }
+
+	// X_(i+1) = {
+	//	imag(result[i]):  imag(ALPHA)*real(x[i]) + real(ALPHA)*imag(x[i]),
+	//	real(result[i]):  real(ALPHA)*real(x[i]) - imag(ALPHA)*imag(x[i])
+	//  }
+	ADDSUBPD_X2_X3
+
+	MOVUPS X3, (DST)(IDX*8) // x[i] = X_(i+1)
+	ADDQ   $2, IDX          // IDX += 2
+	DECQ   TAIL
+	JNZ    scal_tail_loop   // }  while --LEN > 0
+
+scal_end:
+	RET
--- a/vendor/gonum.org/v1/gonum/internal/asm/c128/scalinc_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/scalinc_amd64.s
@ -0,0 +1,121 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//+build !noasm,!appengine
+
+#include "textflag.h"
+
+#define SRC SI
+#define DST SI
+#define LEN CX
+#define TAIL BX
+#define INC R9
+#define INC3 R10
+#define ALPHA X0
+#define ALPHA_C X1
+#define ALPHA2 X10
+#define ALPHA_C2 X11
+
+#define MOVDDUP_X2_X3    LONG $0xDA120FF2 // MOVDDUP X2, X3
+#define MOVDDUP_X4_X5    LONG $0xEC120FF2 // MOVDDUP X4, X5
+#define MOVDDUP_X6_X7    LONG $0xFE120FF2 // MOVDDUP X6, X7
+#define MOVDDUP_X8_X9    LONG $0x120F45F2; BYTE $0xC8 // MOVDDUP X8, X9
+
+#define ADDSUBPD_X2_X3    LONG $0xDAD00F66 // ADDSUBPD X2, X3
+#define ADDSUBPD_X4_X5    LONG $0xECD00F66 // ADDSUBPD X4, X5
+#define ADDSUBPD_X6_X7    LONG $0xFED00F66 // ADDSUBPD X6, X7
+#define ADDSUBPD_X8_X9    LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
+
+// func ScalInc(alpha complex128, x []complex128, n, inc uintptr)
+TEXT ·ScalInc(SB), NOSPLIT, $0
+	MOVQ x_base+16(FP), SRC // SRC = &x
+	MOVQ n+40(FP), LEN      // LEN = len(x)
+	CMPQ LEN, $0
+	JE   scal_end           // if LEN == 0 { return }
+
+	MOVQ inc+48(FP), INC    // INC = inc
+	SHLQ $4, INC            // INC = INC * sizeof(complex128)
+	LEAQ (INC)(INC*2), INC3 // INC3 = 3 * INC
+
+	MOVUPS alpha+0(FP), ALPHA     // ALPHA = { imag(alpha), real(alpha) }
+	MOVAPS ALPHA, ALPHA_C
+	SHUFPD $0x1, ALPHA_C, ALPHA_C // ALPHA_C = { real(alpha), imag(alpha) }
+
+	MOVAPS ALPHA, ALPHA2     // Copy ALPHA and ALPHA_C for pipelining
+	MOVAPS ALPHA_C, ALPHA_C2
+	MOVQ   LEN, TAIL
+	SHRQ   $2, LEN           // LEN = floor( n / 4 )
+	JZ     scal_tail         // if BX == 0 { goto scal_tail }
+
+scal_loop: // do {
+	MOVUPS (SRC), X2         // X_i = { imag(x[i]), real(x[i]) }
+	MOVUPS (SRC)(INC*1), X4
+	MOVUPS (SRC)(INC*2), X6
+	MOVUPS (SRC)(INC3*1), X8
+
+	// X_(i+1) = { real(x[i], real(x[i]) }
+	MOVDDUP_X2_X3
+	MOVDDUP_X4_X5
+	MOVDDUP_X6_X7
+	MOVDDUP_X8_X9
+
+	// X_i = { imag(x[i]), imag(x[i]) }
+	SHUFPD $0x3, X2, X2
+	SHUFPD $0x3, X4, X4
+	SHUFPD $0x3, X6, X6
+	SHUFPD $0x3, X8, X8
+
+	// X_i     = { real(ALPHA) * imag(x[i]), imag(ALPHA) * imag(x[i])  }
+	// X_(i+1) = { imag(ALPHA) * real(x[i]), real(ALPHA) * real(x[i])  }
+	MULPD ALPHA_C, X2
+	MULPD ALPHA, X3
+	MULPD ALPHA_C2, X4
+	MULPD ALPHA2, X5
+	MULPD ALPHA_C, X6
+	MULPD ALPHA, X7
+	MULPD ALPHA_C2, X8
+	MULPD ALPHA2, X9
+
+	// X_(i+1) = {
+	//	imag(result[i]):  imag(ALPHA)*real(x[i]) + real(ALPHA)*imag(x[i]),
+	//	real(result[i]):  real(ALPHA)*real(x[i]) - imag(ALPHA)*imag(x[i])
+	//  }
+	ADDSUBPD_X2_X3
+	ADDSUBPD_X4_X5
+	ADDSUBPD_X6_X7
+	ADDSUBPD_X8_X9
+
+	MOVUPS X3, (DST)         // x[i] = X_(i+1)
+	MOVUPS X5, (DST)(INC*1)
+	MOVUPS X7, (DST)(INC*2)
+	MOVUPS X9, (DST)(INC3*1)
+
+	LEAQ (SRC)(INC*4), SRC // SRC = &(SRC[inc*4])
+	DECQ LEN
+	JNZ  scal_loop         // } while --BX > 0
+
+scal_tail:
+	ANDQ $3, TAIL // TAIL = TAIL % 4
+	JE   scal_end // if TAIL == 0 { return }
+
+scal_tail_loop: // do {
+	MOVUPS (SRC), X2    // X_i = { imag(x[i]), real(x[i]) }
+	MOVDDUP_X2_X3       // X_(i+1) = { real(x[i], real(x[i]) }
+	SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
+	MULPD  ALPHA_C, X2  // X_i     = { real(ALPHA) * imag(x[i]), imag(ALPHA) * imag(x[i])  }
+	MULPD  ALPHA, X3    // X_(i+1) = { imag(ALPHA) * real(x[i]), real(ALPHA) * real(x[i])  }
+
+	// X_(i+1) = {
+	//	imag(result[i]):  imag(ALPHA)*real(x[i]) + real(ALPHA)*imag(x[i]),
+	//	real(result[i]):  real(ALPHA)*real(x[i]) - imag(ALPHA)*imag(x[i])
+	//  }
+	ADDSUBPD_X2_X3
+
+	MOVUPS X3, (DST)      // x[i] = X_i
+	ADDQ   INC, SRC       // SRC = &(SRC[incX])
+	DECQ   TAIL
+	JNZ    scal_tail_loop // } while --TAIL > 0
+
+scal_end:
+	RET
--- a/vendor/gonum.org/v1/gonum/internal/asm/c128/stubs_amd64.go
+++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/stubs_amd64.go
@ -0,0 +1,96 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//+build !noasm,!appengine
+
+package c128
+
+// AxpyUnitary is
+//  for i, v := range x {
+//  	y[i] += alpha * v
+//  }
+func AxpyUnitary(alpha complex128, x, y []complex128)
+
+// AxpyUnitaryTo is
+//  for i, v := range x {
+//  	dst[i] = alpha*v + y[i]
+//  }
+func AxpyUnitaryTo(dst []complex128, alpha complex128, x, y []complex128)
+
+// AxpyInc is
+//  for i := 0; i < int(n); i++ {
+//  	y[iy] += alpha * x[ix]
+//  	ix += incX
+//  	iy += incY
+//  }
+func AxpyInc(alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr)
+
+// AxpyIncTo is
+//  for i := 0; i < int(n); i++ {
+//  	dst[idst] = alpha*x[ix] + y[iy]
+//  	ix += incX
+//  	iy += incY
+//  	idst += incDst
+//  }
+func AxpyIncTo(dst []complex128, incDst, idst uintptr, alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr)
+
+// DscalUnitary is
+//  for i, v := range x {
+//  	x[i] = complex(real(v)*alpha, imag(v)*alpha)
+//  }
+func DscalUnitary(alpha float64, x []complex128)
+
+// DscalInc is
+//  var ix uintptr
+//  for i := 0; i < int(n); i++ {
+//  	x[ix] = complex(real(x[ix])*alpha, imag(x[ix])*alpha)
+//  	ix += inc
+//  }
+func DscalInc(alpha float64, x []complex128, n, inc uintptr)
+
+// ScalInc is
+//  var ix uintptr
+//  for i := 0; i < int(n); i++ {
+//  	x[ix] *= alpha
+//  	ix += incX
+//  }
+func ScalInc(alpha complex128, x []complex128, n, inc uintptr)
+
+// ScalUnitary is
+//  for i := range x {
+//  	x[i] *= alpha
+//  }
+func ScalUnitary(alpha complex128, x []complex128)
+
+// DotcUnitary is
+//  for i, v := range x {
+//  	sum += y[i] * cmplx.Conj(v)
+//  }
+//  return sum
+func DotcUnitary(x, y []complex128) (sum complex128)
+
+// DotcInc is
+//  for i := 0; i < int(n); i++ {
+//  	sum += y[iy] * cmplx.Conj(x[ix])
+//  	ix += incX
+//  	iy += incY
+//  }
+//  return sum
+func DotcInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128)
+
+// DotuUnitary is
+//  for i, v := range x {
+//  	sum += y[i] * v
+//  }
+//  return sum
+func DotuUnitary(x, y []complex128) (sum complex128)
+
+// DotuInc is
+//  for i := 0; i < int(n); i++ {
+//  	sum += y[iy] * x[ix]
+//  	ix += incX
+//  	iy += incY
+//  }
+//  return sum
+func DotuInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128)
--- a/vendor/gonum.org/v1/gonum/internal/asm/c128/stubs_noasm.go
+++ b/vendor/gonum.org/v1/gonum/internal/asm/c128/stubs_noasm.go
@ -0,0 +1,163 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//+build !amd64 noasm appengine
+
+package c128
+
+import "math/cmplx"
+
+// AxpyUnitary is
+//  for i, v := range x {
+//  	y[i] += alpha * v
+//  }
+func AxpyUnitary(alpha complex128, x, y []complex128) {
+	for i, v := range x {
+		y[i] += alpha * v
+	}
+}
+
+// AxpyUnitaryTo is
+//  for i, v := range x {
+//  	dst[i] = alpha*v + y[i]
+//  }
+func AxpyUnitaryTo(dst []complex128, alpha complex128, x, y []complex128) {
+	for i, v := range x {
+		dst[i] = alpha*v + y[i]
+	}
+}
+
+// AxpyInc is
+//  for i := 0; i < int(n); i++ {
+//  	y[iy] += alpha * x[ix]
+//  	ix += incX
+//  	iy += incY
+//  }
+func AxpyInc(alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr) {
+	for i := 0; i < int(n); i++ {
+		y[iy] += alpha * x[ix]
+		ix += incX
+		iy += incY
+	}
+}
+
+// AxpyIncTo is
+//  for i := 0; i < int(n); i++ {
+//  	dst[idst] = alpha*x[ix] + y[iy]
+//  	ix += incX
+//  	iy += incY
+//  	idst += incDst
+//  }
+func AxpyIncTo(dst []complex128, incDst, idst uintptr, alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr) {
+	for i := 0; i < int(n); i++ {
+		dst[idst] = alpha*x[ix] + y[iy]
+		ix += incX
+		iy += incY
+		idst += incDst
+	}
+}
+
+// DscalUnitary is
+//  for i, v := range x {
+//  	x[i] = complex(real(v)*alpha, imag(v)*alpha)
+//  }
+func DscalUnitary(alpha float64, x []complex128) {
+	for i, v := range x {
+		x[i] = complex(real(v)*alpha, imag(v)*alpha)
+	}
+}
+
+// DscalInc is
+//  var ix uintptr
+//  for i := 0; i < int(n); i++ {
+//  	x[ix] = complex(real(x[ix])*alpha, imag(x[ix])*alpha)
+//  	ix += inc
+//  }
+func DscalInc(alpha float64, x []complex128, n, inc uintptr) {
+	var ix uintptr
+	for i := 0; i < int(n); i++ {
+		x[ix] = complex(real(x[ix])*alpha, imag(x[ix])*alpha)
+		ix += inc
+	}
+}
+
+// ScalInc is
+//  var ix uintptr
+//  for i := 0; i < int(n); i++ {
+//  	x[ix] *= alpha
+//  	ix += incX
+//  }
+func ScalInc(alpha complex128, x []complex128, n, inc uintptr) {
+	var ix uintptr
+	for i := 0; i < int(n); i++ {
+		x[ix] *= alpha
+		ix += inc
+	}
+}
+
+// ScalUnitary is
+//  for i := range x {
+//  	x[i] *= alpha
+//  }
+func ScalUnitary(alpha complex128, x []complex128) {
+	for i := range x {
+		x[i] *= alpha
+	}
+}
+
+// DotcUnitary is
+//  for i, v := range x {
+//  	sum += y[i] * cmplx.Conj(v)
+//  }
+//  return sum
+func DotcUnitary(x, y []complex128) (sum complex128) {
+	for i, v := range x {
+		sum += y[i] * cmplx.Conj(v)
+	}
+	return sum
+}
+
+// DotcInc is
+//  for i := 0; i < int(n); i++ {
+//  	sum += y[iy] * cmplx.Conj(x[ix])
+//  	ix += incX
+//  	iy += incY
+//  }
+//  return sum
+func DotcInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128) {
+	for i := 0; i < int(n); i++ {
+		sum += y[iy] * cmplx.Conj(x[ix])
+		ix += incX
+		iy += incY
+	}
+	return sum
+}
+
+// DotuUnitary is
+//  for i, v := range x {
+//  	sum += y[i] * v
+//  }
+//  return sum
+func DotuUnitary(x, y []complex128) (sum complex128) {
+	for i, v := range x {
+		sum += y[i] * v
+	}
+	return sum
+}
+
+// DotuInc is
+//  for i := 0; i < int(n); i++ {
+//  	sum += y[iy] * x[ix]
+//  	ix += incX
+//  	iy += incY
+//  }
+//  return sum
+func DotuInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128) {
+	for i := 0; i < int(n); i++ {
+		sum += y[iy] * x[ix]
+		ix += incX
+		iy += incY
+	}
+	return sum
+}
--- a/vendor/gonum.org/v1/gonum/internal/asm/f32/axpyinc_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f32/axpyinc_amd64.s
@ -1,4 +1,4 @@
-// Copyright ©2016 The gonum Authors. All rights reserved.
+// Copyright ©2016 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

--- a/vendor/gonum.org/v1/gonum/internal/asm/f32/axpyincto_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f32/axpyincto_amd64.s
@ -1,4 +1,4 @@
-// Copyright ©2016 The gonum Authors. All rights reserved.
+// Copyright ©2016 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

--- a/vendor/gonum.org/v1/gonum/internal/asm/f32/axpyunitary_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f32/axpyunitary_amd64.s
@ -1,4 +1,4 @@
-// Copyright ©2016 The gonum Authors. All rights reserved.
+// Copyright ©2016 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

--- a/vendor/gonum.org/v1/gonum/internal/asm/f32/axpyunitaryto_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f32/axpyunitaryto_amd64.s
@ -1,4 +1,4 @@
-// Copyright ©2016 The gonum Authors. All rights reserved.
+// Copyright ©2016 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

--- a/vendor/gonum.org/v1/gonum/internal/asm/f32/ddotinc_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f32/ddotinc_amd64.s
@ -0,0 +1,91 @@
+// Copyright ©2017 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//+build !noasm,!appengine
+
+#include "textflag.h"
+
+#define X_PTR SI
+#define Y_PTR DI
+#define LEN CX
+#define TAIL BX
+#define INC_X R8
+#define INCx3_X R10
+#define INC_Y R9
+#define INCx3_Y R11
+#define SUM X0
+#define P_SUM X1
+
+// func DdotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float64)
+TEXT ·DdotInc(SB), NOSPLIT, $0
+	MOVQ x_base+0(FP), X_PTR  // X_PTR = &x
+	MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
+	MOVQ n+48(FP), LEN        // LEN = n
+	PXOR SUM, SUM             // SUM = 0
+	CMPQ LEN, $0
+	JE   dot_end
+
+	MOVQ ix+72(FP), INC_X        // INC_X = ix
+	MOVQ iy+80(FP), INC_Y        // INC_Y = iy
+	LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(x[ix])
+	LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(y[iy])
+
+	MOVQ incX+56(FP), INC_X // INC_X = incX * sizeof(float32)
+	SHLQ $2, INC_X
+	MOVQ incY+64(FP), INC_Y // INC_Y = incY * sizeof(float32)
+	SHLQ $2, INC_Y
+
+	MOVQ LEN, TAIL
+	ANDQ $3, TAIL  // TAIL = LEN % 4
+	SHRQ $2, LEN   // LEN = floor( LEN / 4 )
+	JZ   dot_tail  // if LEN == 0 { goto dot_tail }
+
+	PXOR P_SUM, P_SUM              // P_SUM = 0  for pipelining
+	LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = INC_X * 3
+	LEAQ (INC_Y)(INC_Y*2), INCx3_Y // INCx3_Y = INC_Y * 3
+
+dot_loop: // Loop unrolled 4x  do {
+	CVTSS2SD (X_PTR), X2            // X_i = x[i:i+1]
+	CVTSS2SD (X_PTR)(INC_X*1), X3
+	CVTSS2SD (X_PTR)(INC_X*2), X4
+	CVTSS2SD (X_PTR)(INCx3_X*1), X5
+
+	CVTSS2SD (Y_PTR), X6            // X_j = y[i:i+1]
+	CVTSS2SD (Y_PTR)(INC_Y*1), X7
+	CVTSS2SD (Y_PTR)(INC_Y*2), X8
+	CVTSS2SD (Y_PTR)(INCx3_Y*1), X9
+
+	MULSD X6, X2 // X_i *= X_j
+	MULSD X7, X3
+	MULSD X8, X4
+	MULSD X9, X5
+
+	ADDSD X2, SUM   // SUM += X_i
+	ADDSD X3, P_SUM
+	ADDSD X4, SUM
+	ADDSD X5, P_SUM
+
+	LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[INC_X * 4])
+	LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(Y_PTR[INC_Y * 4])
+
+	DECQ LEN
+	JNZ  dot_loop // } while --LEN > 0
+
+	ADDSD P_SUM, SUM // SUM += P_SUM
+	CMPQ  TAIL, $0   // if TAIL == 0 { return }
+	JE    dot_end
+
+dot_tail: // do {
+	CVTSS2SD (X_PTR), X2  // X2 = x[i]
+	CVTSS2SD (Y_PTR), X3  // X2 *= y[i]
+	MULSD    X3, X2
+	ADDSD    X2, SUM      // SUM += X2
+	ADDQ     INC_X, X_PTR // X_PTR += INC_X
+	ADDQ     INC_Y, Y_PTR // Y_PTR += INC_Y
+	DECQ     TAIL
+	JNZ      dot_tail     // } while --TAIL > 0
+
+dot_end:
+	MOVSD SUM, sum+88(FP) // return SUM
+	RET
--- a/vendor/gonum.org/v1/gonum/internal/asm/f32/ddotunitary_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f32/ddotunitary_amd64.s
@ -0,0 +1,110 @@
+// Copyright ©2017 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//+build !noasm,!appengine
+
+#include "textflag.h"
+
+#define HADDPD_SUM_SUM    LONG $0xC07C0F66 // @ HADDPD X0, X0
+
+#define X_PTR SI
+#define Y_PTR DI
+#define LEN CX
+#define TAIL BX
+#define IDX AX
+#define SUM X0
+#define P_SUM X1
+
+// func DdotUnitary(x, y []float32) (sum float32)
+TEXT ·DdotUnitary(SB), NOSPLIT, $0
+	MOVQ    x_base+0(FP), X_PTR  // X_PTR = &x
+	MOVQ    y_base+24(FP), Y_PTR // Y_PTR = &y
+	MOVQ    x_len+8(FP), LEN     // LEN = min( len(x), len(y) )
+	CMPQ    y_len+32(FP), LEN
+	CMOVQLE y_len+32(FP), LEN
+	PXOR    SUM, SUM             // psum = 0
+	CMPQ    LEN, $0
+	JE      dot_end
+
+	XORQ IDX, IDX
+	MOVQ Y_PTR, DX
+	ANDQ $0xF, DX    // Align on 16-byte boundary for ADDPS
+	JZ   dot_no_trim // if DX == 0 { goto dot_no_trim }
+
+	SUBQ $16, DX
+
+dot_align: // Trim first value(s) in unaligned buffer  do {
+	CVTSS2SD (X_PTR)(IDX*4), X2 // X2 = float64(x[i])
+	CVTSS2SD (Y_PTR)(IDX*4), X3 // X3 = float64(y[i])
+	MULSD    X3, X2
+	ADDSD    X2, SUM            // SUM += X2
+	INCQ     IDX                // IDX++
+	DECQ     LEN
+	JZ       dot_end            // if --TAIL == 0 { return }
+	ADDQ     $4, DX
+	JNZ      dot_align          // } while --LEN > 0
+
+dot_no_trim:
+	PXOR P_SUM, P_SUM   // P_SUM = 0  for pipelining
+	MOVQ LEN, TAIL
+	ANDQ $0x7, TAIL     // TAIL = LEN % 8
+	SHRQ $3, LEN        // LEN = floor( LEN / 8 )
+	JZ   dot_tail_start // if LEN == 0 { goto dot_tail_start }
+
+dot_loop: // Loop unrolled 8x  do {
+	CVTPS2PD (X_PTR)(IDX*4), X2   // X_i = x[i:i+1]
+	CVTPS2PD 8(X_PTR)(IDX*4), X3
+	CVTPS2PD 16(X_PTR)(IDX*4), X4
+	CVTPS2PD 24(X_PTR)(IDX*4), X5
+
+	CVTPS2PD (Y_PTR)(IDX*4), X6   // X_j = y[i:i+1]
+	CVTPS2PD 8(Y_PTR)(IDX*4), X7
+	CVTPS2PD 16(Y_PTR)(IDX*4), X8
+	CVTPS2PD 24(Y_PTR)(IDX*4), X9
+
+	MULPD X6, X2 // X_i *= X_j
+	MULPD X7, X3
+	MULPD X8, X4
+	MULPD X9, X5
+
+	ADDPD X2, SUM   // SUM += X_i
+	ADDPD X3, P_SUM
+	ADDPD X4, SUM
+	ADDPD X5, P_SUM
+
+	ADDQ $8, IDX  // IDX += 8
+	DECQ LEN
+	JNZ  dot_loop // } while --LEN > 0
+
+	ADDPD P_SUM, SUM // SUM += P_SUM
+	CMPQ  TAIL, $0   // if TAIL == 0 { return }
+	JE    dot_end
+
+dot_tail_start:
+	MOVQ TAIL, LEN
+	SHRQ $1, LEN
+	JZ   dot_tail_one
+
+dot_tail_two:
+	CVTPS2PD (X_PTR)(IDX*4), X2 // X_i = x[i:i+1]
+	CVTPS2PD (Y_PTR)(IDX*4), X6 // X_j = y[i:i+1]
+	MULPD    X6, X2             // X_i *= X_j
+	ADDPD    X2, SUM            // SUM += X_i
+	ADDQ     $2, IDX            // IDX += 2
+	DECQ     LEN
+	JNZ      dot_tail_two       // } while --LEN > 0
+
+	ANDQ $1, TAIL
+	JZ   dot_end
+
+dot_tail_one:
+	CVTSS2SD (X_PTR)(IDX*4), X2 // X2 = float64(x[i])
+	CVTSS2SD (Y_PTR)(IDX*4), X3 // X3 = float64(y[i])
+	MULSD    X3, X2             // X2 *= X3
+	ADDSD    X2, SUM            // SUM += X2
+
+dot_end:
+	HADDPD_SUM_SUM        // SUM = \sum{ SUM[i] }
+	MOVSD SUM, sum+48(FP) // return SUM
+	RET
--- a/vendor/gonum.org/v1/gonum/internal/asm/f32/doc.go
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f32/doc.go
@ -1,6 +1,6 @@
-// Copyright ©2017 The gonum Authors. All rights reserved.
+// Copyright ©2017 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 // Package f32 provides float32 vector primitives.
-package f32
+package f32 // import "gonum.org/v1/gonum/internal/asm/f32"
--- a/vendor/gonum.org/v1/gonum/internal/asm/f32/dotinc_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f32/dotinc_amd64.s
@ -0,0 +1,85 @@
+// Copyright ©2017 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//+build !noasm,!appengine
+
+#include "textflag.h"
+
+#define X_PTR SI
+#define Y_PTR DI
+#define LEN CX
+#define TAIL BX
+#define INC_X R8
+#define INCx3_X R10
+#define INC_Y R9
+#define INCx3_Y R11
+#define SUM X0
+#define P_SUM X1
+
+// func DotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float32)
+TEXT ·DotInc(SB), NOSPLIT, $0
+	MOVQ x_base+0(FP), X_PTR  // X_PTR = &x
+	MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
+	PXOR SUM, SUM             // SUM = 0
+	MOVQ n+48(FP), LEN        // LEN = n
+	CMPQ LEN, $0
+	JE   dot_end
+
+	MOVQ ix+72(FP), INC_X        // INC_X = ix
+	MOVQ iy+80(FP), INC_Y        // INC_Y = iy
+	LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(x[ix])
+	LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(y[iy])
+
+	MOVQ incX+56(FP), INC_X // INC_X := incX * sizeof(float32)
+	SHLQ $2, INC_X
+	MOVQ incY+64(FP), INC_Y // INC_Y := incY * sizeof(float32)
+	SHLQ $2, INC_Y
+
+	MOVQ LEN, TAIL
+	ANDQ $0x3, TAIL // TAIL = LEN % 4
+	SHRQ $2, LEN    // LEN = floor( LEN / 4 )
+	JZ   dot_tail   // if LEN == 0 { goto dot_tail }
+
+	PXOR P_SUM, P_SUM              // P_SUM = 0  for pipelining
+	LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = INC_X * 3
+	LEAQ (INC_Y)(INC_Y*2), INCx3_Y // INCx3_Y = INC_Y * 3
+
+dot_loop: // Loop unrolled 4x  do {
+	MOVSS (X_PTR), X2            // X_i = x[i:i+1]
+	MOVSS (X_PTR)(INC_X*1), X3
+	MOVSS (X_PTR)(INC_X*2), X4
+	MOVSS (X_PTR)(INCx3_X*1), X5
+
+	MULSS (Y_PTR), X2            // X_i *= y[i:i+1]
+	MULSS (Y_PTR)(INC_Y*1), X3
+	MULSS (Y_PTR)(INC_Y*2), X4
+	MULSS (Y_PTR)(INCx3_Y*1), X5
+
+	ADDSS X2, SUM   // SUM += X_i
+	ADDSS X3, P_SUM
+	ADDSS X4, SUM
+	ADDSS X5, P_SUM
+
+	LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[INC_X * 4])
+	LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(Y_PTR[INC_Y * 4])
+
+	DECQ LEN
+	JNZ  dot_loop // } while --LEN > 0
+
+	ADDSS P_SUM, SUM // P_SUM += SUM
+	CMPQ  TAIL, $0   // if TAIL == 0 { return }
+	JE    dot_end
+
+dot_tail: // do {
+	MOVSS (X_PTR), X2  // X2 = x[i]
+	MULSS (Y_PTR), X2  // X2 *= y[i]
+	ADDSS X2, SUM      // SUM += X2
+	ADDQ  INC_X, X_PTR // X_PTR += INC_X
+	ADDQ  INC_Y, Y_PTR // Y_PTR += INC_Y
+	DECQ  TAIL
+	JNZ   dot_tail     // } while --TAIL > 0
+
+dot_end:
+	MOVSS SUM, sum+88(FP) // return SUM
+	RET
--- a/vendor/gonum.org/v1/gonum/internal/asm/f32/dotunitary_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f32/dotunitary_amd64.s
@ -0,0 +1,106 @@
+// Copyright ©2017 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//+build !noasm,!appengine
+
+#include "textflag.h"
+
+#define HADDPS_SUM_SUM    LONG $0xC07C0FF2 // @ HADDPS X0, X0
+
+#define X_PTR SI
+#define Y_PTR DI
+#define LEN CX
+#define TAIL BX
+#define IDX AX
+#define SUM X0
+#define P_SUM X1
+
+// func DotUnitary(x, y []float32) (sum float32)
+TEXT ·DotUnitary(SB), NOSPLIT, $0
+	MOVQ    x_base+0(FP), X_PTR  // X_PTR = &x
+	MOVQ    y_base+24(FP), Y_PTR // Y_PTR = &y
+	PXOR    SUM, SUM             // SUM = 0
+	MOVQ    x_len+8(FP), LEN     // LEN = min( len(x), len(y) )
+	CMPQ    y_len+32(FP), LEN
+	CMOVQLE y_len+32(FP), LEN
+	CMPQ    LEN, $0
+	JE      dot_end
+
+	XORQ IDX, IDX
+	MOVQ Y_PTR, DX
+	ANDQ $0xF, DX    // Align on 16-byte boundary for MULPS
+	JZ   dot_no_trim // if DX == 0 { goto dot_no_trim }
+	SUBQ $16, DX
+
+dot_align: // Trim first value(s) in unaligned buffer  do {
+	MOVSS (X_PTR)(IDX*4), X2 // X2 = x[i]
+	MULSS (Y_PTR)(IDX*4), X2 // X2 *= y[i]
+	ADDSS X2, SUM            // SUM += X2
+	INCQ  IDX                // IDX++
+	DECQ  LEN
+	JZ    dot_end            // if --TAIL == 0 { return }
+	ADDQ  $4, DX
+	JNZ   dot_align          // } while --DX > 0
+
+dot_no_trim:
+	PXOR P_SUM, P_SUM    // P_SUM = 0  for pipelining
+	MOVQ LEN, TAIL
+	ANDQ $0xF, TAIL      // TAIL = LEN % 16
+	SHRQ $4, LEN         // LEN = floor( LEN / 16 )
+	JZ   dot_tail4_start // if LEN == 0 { goto dot_tail4_start }
+
+dot_loop: // Loop unrolled 16x  do {
+	MOVUPS (X_PTR)(IDX*4), X2   // X_i = x[i:i+1]
+	MOVUPS 16(X_PTR)(IDX*4), X3
+	MOVUPS 32(X_PTR)(IDX*4), X4
+	MOVUPS 48(X_PTR)(IDX*4), X5
+
+	MULPS (Y_PTR)(IDX*4), X2   // X_i *= y[i:i+1]
+	MULPS 16(Y_PTR)(IDX*4), X3
+	MULPS 32(Y_PTR)(IDX*4), X4
+	MULPS 48(Y_PTR)(IDX*4), X5
+
+	ADDPS X2, SUM   // SUM += X_i
+	ADDPS X3, P_SUM
+	ADDPS X4, SUM
+	ADDPS X5, P_SUM
+
+	ADDQ $16, IDX // IDX += 16
+	DECQ LEN
+	JNZ  dot_loop // } while --LEN > 0
+
+	ADDPS P_SUM, SUM // SUM += P_SUM
+	CMPQ  TAIL, $0   // if TAIL == 0 { return }
+	JE    dot_end
+
+dot_tail4_start: // Reset loop counter for 4-wide tail loop
+	MOVQ TAIL, LEN      // LEN = floor( TAIL / 4 )
+	SHRQ $2, LEN
+	JZ   dot_tail_start // if LEN == 0 { goto dot_tail_start }
+
+dot_tail4_loop: // Loop unrolled 4x  do {
+	MOVUPS (X_PTR)(IDX*4), X2 // X_i = x[i:i+1]
+	MULPS  (Y_PTR)(IDX*4), X2 // X_i *= y[i:i+1]
+	ADDPS  X2, SUM            // SUM += X_i
+	ADDQ   $4, IDX            // i += 4
+	DECQ   LEN
+	JNZ    dot_tail4_loop     // } while --LEN > 0
+
+dot_tail_start: // Reset loop counter for 1-wide tail loop
+	ANDQ $3, TAIL // TAIL = TAIL % 4
+	JZ   dot_end  // if TAIL == 0 { return }
+
+dot_tail: // do {
+	MOVSS (X_PTR)(IDX*4), X2 // X2 = x[i]
+	MULSS (Y_PTR)(IDX*4), X2 // X2 *= y[i]
+	ADDSS X2, SUM            // psum += X2
+	INCQ  IDX                // IDX++
+	DECQ  TAIL
+	JNZ   dot_tail           // } while --TAIL > 0
+
+dot_end:
+	HADDPS_SUM_SUM        // SUM = \sum{ SUM[i] }
+	HADDPS_SUM_SUM
+	MOVSS SUM, sum+48(FP) // return SUM
+	RET
--- a/vendor/gonum.org/v1/gonum/internal/asm/f32/scal.go
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f32/scal.go
@ -1,4 +1,4 @@
-// Copyright ©2016 The gonum Authors. All rights reserved.
+// Copyright ©2016 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

--- a/vendor/gonum.org/v1/gonum/internal/asm/f32/stubs_amd64.go
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f32/stubs_amd64.go
@ -1,4 +1,4 @@
-// Copyright ©2016 The gonum Authors. All rights reserved.
+// Copyright ©2016 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

@ -34,3 +34,35 @@ func AxpyInc(alpha float32, x, y []float32, n, incX, incY, ix, iy uintptr)
 //  	idst += incDst
 //  }
 func AxpyIncTo(dst []float32, incDst, idst uintptr, alpha float32, x, y []float32, n, incX, incY, ix, iy uintptr)
+
+// DdotUnitary is
+//  for i, v := range x {
+//  	sum += float64(y[i]) * float64(v)
+//  }
+//  return
+func DdotUnitary(x, y []float32) (sum float64)
+
+// DdotInc is
+//  for i := 0; i < int(n); i++ {
+//  	sum += float64(y[iy]) * float64(x[ix])
+//  	ix += incX
+//  	iy += incY
+//  }
+//  return
+func DdotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float64)
+
+// DotUnitary is
+//  for i, v := range x {
+//  	sum += y[i] * v
+//  }
+//  return sum
+func DotUnitary(x, y []float32) (sum float32)
+
+// DotInc is
+//  for i := 0; i < int(n); i++ {
+//  	sum += y[iy] * x[ix]
+//  	ix += incX
+//  	iy += incY
+//  }
+//  return sum
+func DotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float32)
--- a/vendor/gonum.org/v1/gonum/internal/asm/f32/stubs_noasm.go
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f32/stubs_noasm.go
@ -1,4 +1,4 @@
-// Copyright ©2016 The gonum Authors. All rights reserved.
+// Copyright ©2016 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

@ -55,3 +55,59 @@ func AxpyIncTo(dst []float32, incDst, idst uintptr, alpha float32, x, y []float3
 		idst += incDst
 	}
 }
+
+// DotUnitary is
+//  for i, v := range x {
+//  	sum += y[i] * v
+//  }
+//  return sum
+func DotUnitary(x, y []float32) (sum float32) {
+	for i, v := range x {
+		sum += y[i] * v
+	}
+	return sum
+}
+
+// DotInc is
+//  for i := 0; i < int(n); i++ {
+//  	sum += y[iy] * x[ix]
+//  	ix += incX
+//  	iy += incY
+//  }
+//  return sum
+func DotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float32) {
+	for i := 0; i < int(n); i++ {
+		sum += y[iy] * x[ix]
+		ix += incX
+		iy += incY
+	}
+	return sum
+}
+
+// DdotUnitary is
+//  for i, v := range x {
+//  	sum += float64(y[i]) * float64(v)
+//  }
+//  return
+func DdotUnitary(x, y []float32) (sum float64) {
+	for i, v := range x {
+		sum += float64(y[i]) * float64(v)
+	}
+	return
+}
+
+// DdotInc is
+//  for i := 0; i < int(n); i++ {
+//  	sum += float64(y[iy]) * float64(x[ix])
+//  	ix += incX
+//  	iy += incY
+//  }
+//  return
+func DdotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float64) {
+	for i := 0; i < int(n); i++ {
+		sum += float64(y[iy]) * float64(x[ix])
+		ix += incX
+		iy += incY
+	}
+	return
+}
--- a/vendor/gonum.org/v1/gonum/internal/asm/f64/abssum_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/abssum_amd64.s
@ -1,4 +1,4 @@
-// Copyright ©2016 The gonum Authors. All rights reserved.
+// Copyright ©2016 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

--- a/vendor/gonum.org/v1/gonum/internal/asm/f64/abssuminc_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/abssuminc_amd64.s
@ -1,4 +1,4 @@
-// Copyright ©2016 The gonum Authors. All rights reserved.
+// Copyright ©2016 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

--- a/vendor/gonum.org/v1/gonum/internal/asm/f64/add_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/add_amd64.s
@ -1,4 +1,4 @@
-// Copyright ©2016 The gonum Authors. All rights reserved.
+// Copyright ©2016 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

--- a/vendor/gonum.org/v1/gonum/internal/asm/f64/addconst_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/addconst_amd64.s
@ -1,4 +1,4 @@
-// Copyright ©2016 The gonum Authors. All rights reserved.
+// Copyright ©2016 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

--- a/vendor/gonum.org/v1/gonum/internal/asm/f64/axpy.go
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/axpy.go
@ -1,4 +1,4 @@
-// Copyright ©2015 The gonum Authors. All rights reserved.
+// Copyright ©2015 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

--- a/vendor/gonum.org/v1/gonum/internal/asm/f64/axpyinc_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/axpyinc_amd64.s
@ -1,4 +1,4 @@
-// Copyright ©2015 The gonum Authors. All rights reserved.
+// Copyright ©2015 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 //
--- a/vendor/gonum.org/v1/gonum/internal/asm/f64/axpyincto_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/axpyincto_amd64.s
@ -1,4 +1,4 @@
-// Copyright ©2015 The gonum Authors. All rights reserved.
+// Copyright ©2015 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 //
--- a/vendor/gonum.org/v1/gonum/internal/asm/f64/axpyunitary_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/axpyunitary_amd64.s
@ -1,4 +1,4 @@
-// Copyright ©2015 The gonum Authors. All rights reserved.
+// Copyright ©2015 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 //
--- a/vendor/gonum.org/v1/gonum/internal/asm/f64/axpyunitaryto_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/axpyunitaryto_amd64.s
@ -1,4 +1,4 @@
-// Copyright ©2015 The gonum Authors. All rights reserved.
+// Copyright ©2015 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 //
--- a/vendor/gonum.org/v1/gonum/internal/asm/f64/cumprod_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/cumprod_amd64.s
@ -1,4 +1,4 @@
-// Copyright ©2016 The gonum Authors. All rights reserved.
+// Copyright ©2016 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

--- a/vendor/gonum.org/v1/gonum/internal/asm/f64/cumsum_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/cumsum_amd64.s
@ -1,4 +1,4 @@
-// Copyright ©2016 The gonum Authors. All rights reserved.
+// Copyright ©2016 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

--- a/vendor/gonum.org/v1/gonum/internal/asm/f64/div_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/div_amd64.s
@ -1,4 +1,4 @@
-// Copyright ©2016 The gonum Authors. All rights reserved.
+// Copyright ©2016 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

--- a/vendor/gonum.org/v1/gonum/internal/asm/f64/divto_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/divto_amd64.s
@ -1,4 +1,4 @@
-// Copyright ©2016 The gonum Authors. All rights reserved.
+// Copyright ©2016 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

--- a/vendor/gonum.org/v1/gonum/internal/asm/f64/doc.go
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/doc.go
@ -1,6 +1,6 @@
-// Copyright ©2017 The gonum Authors. All rights reserved.
+// Copyright ©2017 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 // Package f64 provides float64 vector primitives.
-package f64
+package f64 // import "gonum.org/v1/gonum/internal/asm/f64"
--- a/vendor/gonum.org/v1/gonum/internal/asm/f64/dot.go
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/dot.go
@ -1,4 +1,4 @@
-// Copyright ©2015 The gonum Authors. All rights reserved.
+// Copyright ©2015 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

--- a/vendor/gonum.org/v1/gonum/internal/asm/f64/dot_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/dot_amd64.s
@ -1,4 +1,4 @@
-// Copyright ©2015 The gonum Authors. All rights reserved.
+// Copyright ©2015 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 //
--- a/vendor/gonum.org/v1/gonum/internal/asm/f64/l1norm_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/l1norm_amd64.s
@ -1,4 +1,4 @@
-// Copyright ©2016 The gonum Authors. All rights reserved.
+// Copyright ©2016 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

--- a/vendor/gonum.org/v1/gonum/internal/asm/f64/linfnorm_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/linfnorm_amd64.s
@ -1,4 +1,4 @@
-// Copyright ©2016 The gonum Authors. All rights reserved.
+// Copyright ©2016 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

--- a/vendor/gonum.org/v1/gonum/internal/asm/f64/scal.go
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/scal.go
@ -1,4 +1,4 @@
-// Copyright ©2016 The gonum Authors. All rights reserved.
+// Copyright ©2016 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

--- a/vendor/gonum.org/v1/gonum/internal/asm/f64/scalinc_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/scalinc_amd64.s
@ -1,4 +1,4 @@
-// Copyright ©2016 The gonum Authors. All rights reserved.
+// Copyright ©2016 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 //
--- a/vendor/gonum.org/v1/gonum/internal/asm/f64/scalincto_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/scalincto_amd64.s
@ -1,4 +1,4 @@
-// Copyright ©2016 The gonum Authors. All rights reserved.
+// Copyright ©2016 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 //
--- a/vendor/gonum.org/v1/gonum/internal/asm/f64/scalunitary_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/scalunitary_amd64.s
@ -1,4 +1,4 @@
-// Copyright ©2016 The gonum Authors. All rights reserved.
+// Copyright ©2016 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 //
--- a/vendor/gonum.org/v1/gonum/internal/asm/f64/scalunitaryto_amd64.s
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/scalunitaryto_amd64.s
@ -1,4 +1,4 @@
-// Copyright ©2016 The gonum Authors. All rights reserved.
+// Copyright ©2016 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 //
--- a/vendor/gonum.org/v1/gonum/internal/asm/f64/stubs_amd64.go
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/stubs_amd64.go
@ -1,4 +1,4 @@
-// Copyright ©2015 The gonum Authors. All rights reserved.
+// Copyright ©2015 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

--- a/vendor/gonum.org/v1/gonum/internal/asm/f64/stubs_noasm.go
+++ b/vendor/gonum.org/v1/gonum/internal/asm/f64/stubs_noasm.go
@ -1,4 +1,4 @@
-// Copyright ©2016 The gonum Authors. All rights reserved.
+// Copyright ©2016 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

--- a/Show More
+++ b/Show More