vendor: update import paths to "gonum.org/v1"

Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
This commit is contained in:
Gyuho Lee 2018-01-09 12:37:46 -08:00
parent f4955798bc
commit e7294311a0
302 changed files with 10101 additions and 4365 deletions

104
Gopkg.lock generated
View File

@ -99,70 +99,6 @@
revision = "1e59b77b52bf8e4b449a57e6f79f21226d571845"
source = "https://github.com/golang/protobuf"
[[projects]]
branch = "master"
name = "github.com/gonum/blas"
packages = [
".",
"blas64",
"native",
"native/internal/math32"
]
revision = "37e82626499e1df7c54aeaba0959fd6e7e8dc1e4"
[[projects]]
branch = "master"
name = "github.com/gonum/floats"
packages = ["."]
revision = "f74b330d45c56584a6ea7a27f5c64ea2900631e9"
[[projects]]
branch = "master"
name = "github.com/gonum/internal"
packages = [
"asm/f32",
"asm/f64"
]
revision = "e57e4534cf9b3b00ef6c0175f59d8d2d34f60914"
[[projects]]
branch = "master"
name = "github.com/gonum/lapack"
packages = [
".",
"lapack64",
"native"
]
revision = "5ed4b826becd1807e09377508f51756586d1a98c"
[[projects]]
branch = "master"
name = "github.com/gonum/matrix"
packages = [
".",
"mat64"
]
revision = "dd6034299e4242c9f0ea36735e6d4264dfcb3f9f"
[[projects]]
name = "github.com/gonum/plot"
packages = [
".",
"palette",
"plotter",
"plotutil",
"tools/bezier",
"vg",
"vg/draw",
"vg/fonts",
"vg/vgeps",
"vg/vgimg",
"vg/vgpdf",
"vg/vgsvg"
]
revision = "51b62dc5319d7fce41240d13e780a93e640b9a38"
source = "https://github.com/gonum/plot"
[[projects]]
name = "github.com/googleapis/gax-go"
packages = ["."]
@ -344,6 +280,44 @@
revision = "6dc17368e09b0e8634d71cac8168d853e869a0c7"
source = "https://github.com/golang/time"
[[projects]]
branch = "master"
name = "gonum.org/v1/gonum"
packages = [
"blas",
"blas/blas64",
"blas/gonum",
"floats",
"internal/asm/c128",
"internal/asm/f32",
"internal/asm/f64",
"internal/math32",
"lapack",
"lapack/gonum",
"lapack/lapack64",
"mat"
]
revision = "69fc04c7c31754cf196d3bcc70c3bc0eec9da1b7"
[[projects]]
name = "gonum.org/v1/plot"
packages = [
".",
"palette",
"plotter",
"plotutil",
"tools/bezier",
"vg",
"vg/draw",
"vg/fonts",
"vg/vgeps",
"vg/vgimg",
"vg/vgpdf",
"vg/vgsvg"
]
revision = "feab214a240f4312b98ab52baf662b55ff1ee377"
source = "https://github.com/gonum/plot"
[[projects]]
name = "google.golang.org/api"
packages = [
@ -421,6 +395,6 @@
[solve-meta]
analyzer-name = "dep"
analyzer-version = 1
inputs-digest = "057a961c21585211116e57f99b60e5bdd28cfe46411715ec4e13dbcf12ddd876"
inputs-digest = "21abf25cf507cffab0266c5836fb8acc3cb14f96b0f3fba49e0c55b85fedf5e5"
solver-name = "gps-cdcl"
solver-version = 1

View File

@ -88,9 +88,9 @@
[[constraint]]
name = "github.com/gonum/plot"
name = "gonum.org/v1/plot"
source = "https://github.com/gonum/plot"
revision = "51b62dc5319d7fce41240d13e780a93e640b9a38"
revision = "feab214a240f4312b98ab52baf662b55ff1ee377"
[[constraint]]

View File

@ -19,11 +19,11 @@ import (
"github.com/coreos/dbtester/dbtesterpb"
"github.com/gonum/plot"
"github.com/gonum/plot/plotter"
"github.com/gonum/plot/plotutil"
"github.com/gonum/plot/vg"
"github.com/gyuho/dataframe"
"gonum.org/v1/plot"
"gonum.org/v1/plot/plotter"
"gonum.org/v1/plot/plotutil"
"gonum.org/v1/plot/vg"
)
var (

View File

@ -18,7 +18,7 @@ import (
"image/color"
"sort"
"github.com/gonum/plot/plotutil"
"gonum.org/v1/plot/plotutil"
)
// IsValidDatabaseID returns false if the database id is not supported.

View File

@ -1,155 +0,0 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package native
import (
"errors"
"fmt"
"math"
)
func newGeneral64(r, c int) general64 {
return general64{
data: make([]float64, r*c),
rows: r,
cols: c,
stride: c,
}
}
type general64 struct {
data []float64
rows, cols int
stride int
}
// adds element-wise into receiver. rows and columns must match
func (g general64) add(h general64) {
if debug {
if g.rows != h.rows {
panic("blas: row size mismatch")
}
if g.cols != h.cols {
panic("blas: col size mismatch")
}
}
for i := 0; i < g.rows; i++ {
gtmp := g.data[i*g.stride : i*g.stride+g.cols]
for j, v := range h.data[i*h.stride : i*h.stride+h.cols] {
gtmp[j] += v
}
}
}
// at returns the value at the ith row and jth column. For speed reasons, the
// rows and columns are not bounds checked.
func (g general64) at(i, j int) float64 {
if debug {
if i < 0 || i >= g.rows {
panic("blas: row out of bounds")
}
if j < 0 || j >= g.cols {
panic("blas: col out of bounds")
}
}
return g.data[i*g.stride+j]
}
func (g general64) check(c byte) error {
if g.rows < 0 {
return errors.New("blas: rows < 0")
}
if g.cols < 0 {
return errors.New("blas: cols < 0")
}
if g.stride < 1 {
return errors.New("blas: stride < 1")
}
if g.stride < g.cols {
return errors.New("blas: illegal stride")
}
if (g.rows-1)*g.stride+g.cols > len(g.data) {
return fmt.Errorf("blas: index of %c out of range", c)
}
return nil
}
func (g general64) clone() general64 {
data := make([]float64, len(g.data))
copy(data, g.data)
return general64{
data: data,
rows: g.rows,
cols: g.cols,
stride: g.stride,
}
}
// assumes they are the same size
func (g general64) copy(h general64) {
if debug {
if g.rows != h.rows {
panic("blas: row mismatch")
}
if g.cols != h.cols {
panic("blas: col mismatch")
}
}
for k := 0; k < g.rows; k++ {
copy(g.data[k*g.stride:(k+1)*g.stride], h.data[k*h.stride:(k+1)*h.stride])
}
}
func (g general64) equal(a general64) bool {
if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
return false
}
for i, v := range g.data {
if a.data[i] != v {
return false
}
}
return true
}
/*
// print is to aid debugging. Commented out to avoid fmt import
func (g general64) print() {
fmt.Println("r = ", g.rows, "c = ", g.cols, "stride: ", g.stride)
for i := 0; i < g.rows; i++ {
fmt.Println(g.data[i*g.stride : (i+1)*g.stride])
}
}
*/
func (g general64) view(i, j, r, c int) general64 {
if debug {
if i < 0 || i+r > g.rows {
panic("blas: row out of bounds")
}
if j < 0 || j+c > g.cols {
panic("blas: col out of bounds")
}
}
return general64{
data: g.data[i*g.stride+j : (i+r-1)*g.stride+j+c],
rows: r,
cols: c,
stride: g.stride,
}
}
func (g general64) equalWithinAbs(a general64, tol float64) bool {
if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
return false
}
for i, v := range g.data {
if math.Abs(a.data[i]-v) > tol {
return false
}
}
return true
}

View File

@ -1,157 +0,0 @@
// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package native
import (
"errors"
"fmt"
math "github.com/gonum/blas/native/internal/math32"
)
func newGeneral32(r, c int) general32 {
return general32{
data: make([]float32, r*c),
rows: r,
cols: c,
stride: c,
}
}
type general32 struct {
data []float32
rows, cols int
stride int
}
// adds element-wise into receiver. rows and columns must match
func (g general32) add(h general32) {
if debug {
if g.rows != h.rows {
panic("blas: row size mismatch")
}
if g.cols != h.cols {
panic("blas: col size mismatch")
}
}
for i := 0; i < g.rows; i++ {
gtmp := g.data[i*g.stride : i*g.stride+g.cols]
for j, v := range h.data[i*h.stride : i*h.stride+h.cols] {
gtmp[j] += v
}
}
}
// at returns the value at the ith row and jth column. For speed reasons, the
// rows and columns are not bounds checked.
func (g general32) at(i, j int) float32 {
if debug {
if i < 0 || i >= g.rows {
panic("blas: row out of bounds")
}
if j < 0 || j >= g.cols {
panic("blas: col out of bounds")
}
}
return g.data[i*g.stride+j]
}
func (g general32) check(c byte) error {
if g.rows < 0 {
return errors.New("blas: rows < 0")
}
if g.cols < 0 {
return errors.New("blas: cols < 0")
}
if g.stride < 1 {
return errors.New("blas: stride < 1")
}
if g.stride < g.cols {
return errors.New("blas: illegal stride")
}
if (g.rows-1)*g.stride+g.cols > len(g.data) {
return fmt.Errorf("blas: index of %c out of range", c)
}
return nil
}
func (g general32) clone() general32 {
data := make([]float32, len(g.data))
copy(data, g.data)
return general32{
data: data,
rows: g.rows,
cols: g.cols,
stride: g.stride,
}
}
// assumes they are the same size
func (g general32) copy(h general32) {
if debug {
if g.rows != h.rows {
panic("blas: row mismatch")
}
if g.cols != h.cols {
panic("blas: col mismatch")
}
}
for k := 0; k < g.rows; k++ {
copy(g.data[k*g.stride:(k+1)*g.stride], h.data[k*h.stride:(k+1)*h.stride])
}
}
func (g general32) equal(a general32) bool {
if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
return false
}
for i, v := range g.data {
if a.data[i] != v {
return false
}
}
return true
}
/*
// print is to aid debugging. Commented out to avoid fmt import
func (g general32) print() {
fmt.Println("r = ", g.rows, "c = ", g.cols, "stride: ", g.stride)
for i := 0; i < g.rows; i++ {
fmt.Println(g.data[i*g.stride : (i+1)*g.stride])
}
}
*/
func (g general32) view(i, j, r, c int) general32 {
if debug {
if i < 0 || i+r > g.rows {
panic("blas: row out of bounds")
}
if j < 0 || j+c > g.cols {
panic("blas: col out of bounds")
}
}
return general32{
data: g.data[i*g.stride+j : (i+r-1)*g.stride+j+c],
rows: r,
cols: c,
stride: g.stride,
}
}
func (g general32) equalWithinAbs(a general32, tol float32) bool {
if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
return false
}
for i, v := range g.data {
if math.Abs(a.data[i]-v) > tol {
return false
}
}
return true
}

View File

@ -1,33 +0,0 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package f32
// DdotUnitary is
// for i, v := range x {
// sum += float64(y[i]) * float64(v)
// }
// return
func DdotUnitary(x, y []float32) (sum float64) {
for i, v := range x {
sum += float64(y[i]) * float64(v)
}
return
}
// DdotInc is
// for i := 0; i < int(n); i++ {
// sum += float64(y[iy]) * float64(x[ix])
// ix += incX
// iy += incY
// }
// return
func DdotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float64) {
for i := 0; i < int(n); i++ {
sum += float64(y[iy]) * float64(x[ix])
ix += incX
iy += incY
}
return
}

View File

@ -1,33 +0,0 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package f32
// DotUnitary is
// for i, v := range x {
// sum += y[i] * v
// }
// return sum
func DotUnitary(x, y []float32) (sum float32) {
for i, v := range x {
sum += y[i] * v
}
return sum
}
// DotInc is
// for i := 0; i < int(n); i++ {
// sum += y[iy] * x[ix]
// ix += incX
// iy += incY
// }
// return sum
func DotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float32) {
for i := 0; i < int(n); i++ {
sum += y[iy] * x[ix]
ix += incX
iy += incY
}
return sum
}

103
vendor/github.com/gonum/matrix/doc.go generated vendored
View File

@ -1,103 +0,0 @@
// Generated by running
// go generate github.com/gonum/matrix
// DO NOT EDIT.
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package matrix provides common error handling mechanisms for matrix operations
// in mat64 and cmat128.
//
// Overview
//
// This section provides a quick overview of the matrix package. The following
// sections provide more in depth commentary.
//
// matrix provides:
// - Error type definitions
// - Error recovery mechanisms
// - Common constants used by mat64 and cmat128
//
// Errors
//
// The mat64 and cmat128 matrix packages share a common set of errors
// provided by matrix via the matrix.Error type.
//
// Errors are either returned directly or used as the parameter of a panic
// depending on the class of error encountered. Returned errors indicate
// that a call was not able to complete successfully while panics generally
// indicate a programmer or unrecoverable error.
//
// Examples of each type are found in the mat64 Solve methods, which find
// x such that A*x = b.
//
// An error value is returned from the function or method when the operation
// can meaningfully fail. The Solve operation cannot complete if A is
// singular. However, determining the singularity of A is most easily
// discovered during the Solve procedure itself and is a valid result from
// the operation, so in this case an error is returned.
//
// A function will panic when the input parameters are inappropriate for
// the function. In Solve, for example, the number of rows of each input
// matrix must be equal because of the rules of matrix multiplication.
// Similarly, for solving A*x = b, a non-zero receiver must have the same
// number of rows as A has columns and must have the same number of columns
// as b. In all cases where a function will panic, conditions that would
// lead to a panic can easily be checked prior to a call.
//
// Error Recovery
//
// When a matrix.Error is the parameter of a panic, the panic can be
// recovered by a Maybe function, which will then return the error.
// Panics that are not of type matrix.Error are re-panicked by the
// Maybe functions.
//
// Invariants
//
// Matrix input arguments to functions are never directly modified. If an operation
// changes Matrix data, the mutated matrix will be the receiver of a function.
//
// For convenience, a matrix may be used as both a receiver and as an input, e.g.
// a.Pow(a, 6)
// v.SolveVec(a.T(), v)
// though in many cases this will cause an allocation (see Element Aliasing).
// An exception to this rule is Copy, which does not allow a.Copy(a.T()).
//
// Element Aliasing
//
// Most methods in the matrix packages modify receiver data. It is forbidden for the modified
// data region of the receiver to overlap the used data area of the input
// arguments. The exception to this rule is when the method receiver is equal to one
// of the input arguments, as in the a.Pow(a, 6) call above, or its implicit transpose.
//
// This prohibition is to help avoid subtle mistakes when the method needs to read
// from and write to the same data region. There are ways to make mistakes using the
// matrix API, and matrix functions will detect and complain about those.
// There are many ways to make mistakes by excursion from the matrix API via
// interaction with raw matrix values.
//
// If you need to read the rest of this section to understand the behavior of
// your program, you are being clever. Don't be clever. If you must be clever,
// blas64/cblas128 and lapack64/clapack128 may be used to call the behavior directly.
//
// The matrix packages will use the following rules to detect overlap between the receiver and one
// of the inputs:
// - the input implements one of the Raw methods, and
// - the Raw type matches that of the receiver or
// one is a RawMatrixer and the other is a RawVectorer, and
// - the address ranges of the backing data slices overlap, and
// - the strides differ or there is an overlap in the used data elements.
// If such an overlap is detected, the method will panic.
//
// The following cases will not panic:
// - the data slices do not overlap,
// - there is pointer identity between the receiver and input values after
// the value has been untransposed if necessary.
//
// The matrix packages will not attempt to detect element overlap if the input does not implement a
// Raw method, or if the Raw method differs from that of the receiver except when a
// conversion has occurred through a matrix API function. Method behavior is undefined
// if there is undetected overlap.
//
package matrix

View File

@ -1,343 +0,0 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build ignore
// gendoc creates the matrix, mat64 and cmat128 package doc comments.
package main
import (
"fmt"
"log"
"os"
"path/filepath"
"strings"
"text/template"
"unicode/utf8"
)
var docs = template.Must(template.New("docs").Funcs(funcs).Parse(`{{define "common"}}// Generated by running
// go generate github.com/gonum/matrix
// DO NOT EDIT.
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package {{.Name}} provides {{.Provides}}
//
// Overview
//
// This section provides a quick overview of the {{.Name}} package. The following
// sections provide more in depth commentary.
//
{{.Overview}}
//{{end}}
{{define "interfaces"}}// The Matrix Interfaces
//
// The Matrix interface is the common link between the concrete types. The Matrix
// interface is defined by three functions: Dims, which returns the dimensions
// of the Matrix, At, which returns the element in the specified location, and
// T for returning a Transpose (discussed later). All of the concrete types can
// perform these behaviors and so implement the interface. Methods and functions
// are designed to use this interface, so in particular the method
// func (m *Dense) Mul(a, b Matrix)
// constructs a *Dense from the result of a multiplication with any Matrix types,
// not just *Dense. Where more restrictive requirements must be met, there are also the
// Symmetric and Triangular interfaces. For example, in
// func (s *SymDense) AddSym(a, b Symmetric)
// the Symmetric interface guarantees a symmetric result.
//
// Transposes
//
// The T method is used for transposition. For example, c.Mul(a.T(), b) computes
// c = a^T * b. The {{if .ExamplePackage}}{{.ExamplePackage}}{{else}}{{.Name}}{{end}} types implement this method using an implicit transpose —
// see the Transpose type for more details. Note that some operations have a
// transpose as part of their definition, as in *SymDense.SymOuterK.
//{{end}}
{{define "factorization"}}// Matrix Factorization
//
// Matrix factorizations, such as the LU decomposition, typically have their own
// specific data storage, and so are each implemented as a specific type. The
// factorization can be computed through a call to Factorize
// var lu {{if .ExamplePackage}}{{.ExamplePackage}}{{else}}{{.Name}}{{end}}.LU
// lu.Factorize(a)
// The elements of the factorization can be extracted through methods on the
// appropriate type, i.e. *TriDense.LFromLU and *TriDense.UFromLU. Alternatively,
// they can be used directly, as in *Dense.SolveLU. Some factorizations can be
// updated directly, without needing to update the original matrix and refactorize,
// as in *LU.RankOne.
//{{end}}
{{define "blas"}}// BLAS and LAPACK
//
// BLAS and LAPACK are the standard APIs for linear algebra routines. Many
// operations in {{if .Description}}{{.Description}}{{else}}{{.Name}}{{end}} are implemented using calls to the wrapper functions
// in gonum/blas/{{.BLAS|alts}} and gonum/lapack/{{.LAPACK|alts}}. By default, {{.BLAS|join "/"}} and
// {{.LAPACK|join "/"}} call the native Go implementations of the routines. Alternatively,
// it is possible to use C-based implementations of the APIs through the respective
// cgo packages and "Use" functions. The Go implementation of LAPACK makes calls
// through {{.BLAS|join "/"}}, so if a cgo BLAS implementation is registered, the {{.LAPACK|join "/"}}
// calls will be partially executed in Go and partially executed in C.
//{{end}}
{{define "switching"}}// Type Switching
//
// The Matrix abstraction enables efficiency as well as interoperability. Go's
// type reflection capabilities are used to choose the most efficient routine
// given the specific concrete types. For example, in
// c.Mul(a, b)
// if a and b both implement RawMatrixer, that is, they can be represented as a
// {{.BLAS|alts}}.General, {{.BLAS|alts}}.Gemm (general matrix multiplication) is called, while
// instead if b is a RawSymmetricer {{.BLAS|alts}}.Symm is used (general-symmetric
// multiplication), and if b is a *Vector {{.BLAS|alts}}.Gemv is used.
//
// There are many possible type combinations and special cases. No specific guarantees
// are made about the performance of any method, and in particular, note that an
// abstract matrix type may be copied into a concrete type of the corresponding
// value. If there are specific special cases that are needed, please submit a
// pull-request or file an issue.
//{{end}}
{{define "invariants"}}// Invariants
//
// Matrix input arguments to functions are never directly modified. If an operation
// changes Matrix data, the mutated matrix will be the receiver of a function.
//
// For convenience, a matrix may be used as both a receiver and as an input, e.g.
// a.Pow(a, 6)
// v.SolveVec(a.T(), v)
// though in many cases this will cause an allocation (see Element Aliasing).
// An exception to this rule is Copy, which does not allow a.Copy(a.T()).
//{{end}}
{{define "aliasing"}}// Element Aliasing
//
// Most methods in {{if .Description}}{{.Description}}{{else}}{{.Name}}{{end}} modify receiver data. It is forbidden for the modified
// data region of the receiver to overlap the used data area of the input
// arguments. The exception to this rule is when the method receiver is equal to one
// of the input arguments, as in the a.Pow(a, 6) call above, or its implicit transpose.
//
// This prohibition is to help avoid subtle mistakes when the method needs to read
// from and write to the same data region. There are ways to make mistakes using the
// {{.Name}} API, and {{.Name}} functions will detect and complain about those.
// There are many ways to make mistakes by excursion from the {{.Name}} API via
// interaction with raw matrix values.
//
// If you need to read the rest of this section to understand the behavior of
// your program, you are being clever. Don't be clever. If you must be clever,
// {{.BLAS|join "/"}} and {{.LAPACK|join "/"}} may be used to call the behavior directly.
//
// {{if .Description}}{{.Description|sentence}}{{else}}{{.Name}}{{end}} will use the following rules to detect overlap between the receiver and one
// of the inputs:
// - the input implements one of the Raw methods, and
// - the Raw type matches that of the receiver or
// one is a RawMatrixer and the other is a RawVectorer, and
// - the address ranges of the backing data slices overlap, and
// - the strides differ or there is an overlap in the used data elements.
// If such an overlap is detected, the method will panic.
//
// The following cases will not panic:
// - the data slices do not overlap,
// - there is pointer identity between the receiver and input values after
// the value has been untransposed if necessary.
//
// {{if .Description}}{{.Description|sentence}}{{else}}{{.Name}}{{end}} will not attempt to detect element overlap if the input does not implement a
// Raw method, or if the Raw method differs from that of the receiver except when a
// conversion has occurred through a {{.Name}} API function. Method behavior is undefined
// if there is undetected overlap.
//{{end}}`))
type Package struct {
path string
Name string
Provides string
Description string
ExamplePackage string
Overview string
BLAS []string
LAPACK []string
template string
}
var pkgs = []Package{
{
path: ".",
Name: "matrix",
Description: "the matrix packages",
Provides: `common error handling mechanisms for matrix operations
// in mat64 and cmat128.`,
ExamplePackage: "mat64",
Overview: `// matrix provides:
// - Error type definitions
// - Error recovery mechanisms
// - Common constants used by mat64 and cmat128
//
// Errors
//
// The mat64 and cmat128 matrix packages share a common set of errors
// provided by matrix via the matrix.Error type.
//
// Errors are either returned directly or used as the parameter of a panic
// depending on the class of error encountered. Returned errors indicate
// that a call was not able to complete successfully while panics generally
// indicate a programmer or unrecoverable error.
//
// Examples of each type are found in the mat64 Solve methods, which find
// x such that A*x = b.
//
// An error value is returned from the function or method when the operation
// can meaningfully fail. The Solve operation cannot complete if A is
// singular. However, determining the singularity of A is most easily
// discovered during the Solve procedure itself and is a valid result from
// the operation, so in this case an error is returned.
//
// A function will panic when the input parameters are inappropriate for
// the function. In Solve, for example, the number of rows of each input
// matrix must be equal because of the rules of matrix multiplication.
// Similarly, for solving A*x = b, a non-zero receiver must have the same
// number of rows as A has columns and must have the same number of columns
// as b. In all cases where a function will panic, conditions that would
// lead to a panic can easily be checked prior to a call.
//
// Error Recovery
//
// When a matrix.Error is the parameter of a panic, the panic can be
// recovered by a Maybe function, which will then return the error.
// Panics that are not of type matrix.Error are re-panicked by the
// Maybe functions.`,
BLAS: []string{"blas64", "cblas128"},
LAPACK: []string{"lapack64", "clapack128"},
template: `{{template "common" .}}
{{template "invariants" .}}
{{template "aliasing" .}}
package {{.Name}}
`,
},
{
path: "mat64",
Name: "mat64",
Provides: `implementations of float64 matrix structures and
// linear algebra operations on them.`,
Overview: `// mat64 provides:
// - Interfaces for Matrix classes (Matrix, Symmetric, Triangular)
// - Concrete implementations (Dense, SymDense, TriDense)
// - Methods and functions for using matrix data (Add, Trace, SymRankOne)
// - Types for constructing and using matrix factorizations (QR, LU)
//
// A matrix may be constructed through the corresponding New function. If no
// backing array is provided the matrix will be initialized to all zeros.
// // Allocate a zeroed matrix of size 3×5
// zero := mat64.NewDense(3, 5, nil)
// If a backing data slice is provided, the matrix will have those elements.
// Matrices are all stored in row-major format.
// // Generate a 6×6 matrix of random values.
// data := make([]float64, 36)
// for i := range data {
// data[i] = rand.NormFloat64()
// }
// a := mat64.NewDense(6, 6, data)
//
// Operations involving matrix data are implemented as functions when the values
// of the matrix remain unchanged
// tr := mat64.Trace(a)
// and are implemented as methods when the operation modifies the receiver.
// zero.Copy(a)
//
// Receivers must be the correct size for the matrix operations, otherwise the
// operation will panic. As a special case for convenience, a zero-sized matrix
// will be modified to have the correct size, allocating data if necessary.
// var c mat64.Dense // construct a new zero-sized matrix
// c.Mul(a, a) // c is automatically adjusted to be 6×6`,
BLAS: []string{"blas64"},
LAPACK: []string{"lapack64"},
template: `{{template "common" .}}
{{template "interfaces" .}}
{{template "factorization" .}}
{{template "blas" .}}
{{template "switching" .}}
{{template "invariants" .}}
{{template "aliasing" .}}
package {{.Name}}
`,
},
{
path: "cmat128",
Name: "cmat128",
Provides: `implementations of complex128 matrix structures and
// linear algebra operations on them.`,
Overview: `// cmat128 provides:
// - Interfaces for a complex Matrix`,
BLAS: []string{"cblas128"},
LAPACK: []string{"clapack128"},
template: `{{template "common" . }}
{{template "blas" .}}
{{template "switching" .}}
{{template "invariants" .}}
{{template "aliasing" .}}
package {{.Name}}
`,
},
}
var funcs = template.FuncMap{
"sentence": sentence,
"alts": alts,
"join": join,
}
// sentence converts a string to sentence case where the string is the prefix of the sentence.
func sentence(s string) string {
if len(s) == 0 {
return ""
}
_, size := utf8.DecodeRune([]byte(s))
return strings.ToUpper(s[:size]) + s[size:]
}
// alts renders a []string as a glob alternatives list.
func alts(s []string) string {
switch len(s) {
case 0:
return ""
case 1:
return s[0]
default:
return fmt.Sprintf("{%s}", strings.Join(s, ","))
}
}
// join is strings.Join with the parameter order changed.
func join(sep string, s []string) string {
return strings.Join(s, sep)
}
func main() {
for _, pkg := range pkgs {
t, err := template.Must(docs.Clone()).Parse(pkg.template)
if err != nil {
log.Fatalf("failed to parse template: %v", err)
}
file := filepath.Join(pkg.path, "doc.go")
f, err := os.Create(file)
if err != nil {
log.Fatalf("failed to create %q: %v", file, err)
}
err = t.Execute(f, pkg)
if err != nil {
log.Fatalf("failed to execute template: %v", err)
}
f.Close()
}
}

View File

@ -1,7 +0,0 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run gendoc.go
package matrix

View File

@ -1,145 +0,0 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This file must be kept in sync with index_no_bound_checks.go.
//+build bounds
package mat64
import "github.com/gonum/matrix"
// At returns the element at row i, column j.
func (m *Dense) At(i, j int) float64 {
return m.at(i, j)
}
func (m *Dense) at(i, j int) float64 {
if uint(i) >= uint(m.mat.Rows) {
panic(matrix.ErrRowAccess)
}
if uint(j) >= uint(m.mat.Cols) {
panic(matrix.ErrColAccess)
}
return m.mat.Data[i*m.mat.Stride+j]
}
// Set sets the element at row i, column j to the value v.
func (m *Dense) Set(i, j int, v float64) {
m.set(i, j, v)
}
func (m *Dense) set(i, j int, v float64) {
if uint(i) >= uint(m.mat.Rows) {
panic(matrix.ErrRowAccess)
}
if uint(j) >= uint(m.mat.Cols) {
panic(matrix.ErrColAccess)
}
m.mat.Data[i*m.mat.Stride+j] = v
}
// At returns the element at row i.
// It panics if i is out of bounds or if j is not zero.
func (v *Vector) At(i, j int) float64 {
if j != 0 {
panic(matrix.ErrColAccess)
}
return v.at(i)
}
func (v *Vector) at(i int) float64 {
if uint(i) >= uint(v.n) {
panic(matrix.ErrRowAccess)
}
return v.mat.Data[i*v.mat.Inc]
}
// SetVec sets the element at row i to the value val.
// It panics if i is out of bounds.
func (v *Vector) SetVec(i int, val float64) {
v.setVec(i, val)
}
func (v *Vector) setVec(i int, val float64) {
if uint(i) >= uint(v.n) {
panic(matrix.ErrVectorAccess)
}
v.mat.Data[i*v.mat.Inc] = val
}
// At returns the element at row i and column j.
func (t *SymDense) At(i, j int) float64 {
return t.at(i, j)
}
func (t *SymDense) at(i, j int) float64 {
if uint(i) >= uint(t.mat.N) {
panic(matrix.ErrRowAccess)
}
if uint(j) >= uint(t.mat.N) {
panic(matrix.ErrColAccess)
}
if i > j {
i, j = j, i
}
return t.mat.Data[i*t.mat.Stride+j]
}
// SetSym sets the elements at (i,j) and (j,i) to the value v.
func (t *SymDense) SetSym(i, j int, v float64) {
t.set(i, j, v)
}
func (t *SymDense) set(i, j int, v float64) {
if uint(i) >= uint(t.mat.N) {
panic(matrix.ErrRowAccess)
}
if uint(j) >= uint(t.mat.N) {
panic(matrix.ErrColAccess)
}
if i > j {
i, j = j, i
}
t.mat.Data[i*t.mat.Stride+j] = v
}
// At returns the element at row i, column j.
func (t *TriDense) At(i, j int) float64 {
return t.at(i, j)
}
func (t *TriDense) at(i, j int) float64 {
if uint(i) >= uint(t.mat.N) {
panic(matrix.ErrRowAccess)
}
if uint(j) >= uint(t.mat.N) {
panic(matrix.ErrColAccess)
}
isUpper := t.isUpper()
if (isUpper && i > j) || (!isUpper && i < j) {
return 0
}
return t.mat.Data[i*t.mat.Stride+j]
}
// SetTri sets the element of the triangular matrix at row i, column j to the value v.
// It panics if the location is outside the appropriate half of the matrix.
func (t *TriDense) SetTri(i, j int, v float64) {
t.set(i, j, v)
}
func (t *TriDense) set(i, j int, v float64) {
if uint(i) >= uint(t.mat.N) {
panic(matrix.ErrRowAccess)
}
if uint(j) >= uint(t.mat.N) {
panic(matrix.ErrColAccess)
}
isUpper := t.isUpper()
if (isUpper && i > j) || (!isUpper && i < j) {
panic(matrix.ErrTriangleSet)
}
t.mat.Data[i*t.mat.Stride+j] = v
}

View File

@ -1,145 +0,0 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This file must be kept in sync with index_bound_checks.go.
//+build !bounds
package mat64
import "github.com/gonum/matrix"
// At returns the element at row i, column j.
func (m *Dense) At(i, j int) float64 {
if uint(i) >= uint(m.mat.Rows) {
panic(matrix.ErrRowAccess)
}
if uint(j) >= uint(m.mat.Cols) {
panic(matrix.ErrColAccess)
}
return m.at(i, j)
}
func (m *Dense) at(i, j int) float64 {
return m.mat.Data[i*m.mat.Stride+j]
}
// Set sets the element at row i, column j to the value v.
func (m *Dense) Set(i, j int, v float64) {
if uint(i) >= uint(m.mat.Rows) {
panic(matrix.ErrRowAccess)
}
if uint(j) >= uint(m.mat.Cols) {
panic(matrix.ErrColAccess)
}
m.set(i, j, v)
}
func (m *Dense) set(i, j int, v float64) {
m.mat.Data[i*m.mat.Stride+j] = v
}
// At returns the element at row i.
// It panics if i is out of bounds or if j is not zero.
func (v *Vector) At(i, j int) float64 {
if uint(i) >= uint(v.n) {
panic(matrix.ErrRowAccess)
}
if j != 0 {
panic(matrix.ErrColAccess)
}
return v.at(i)
}
func (v *Vector) at(i int) float64 {
return v.mat.Data[i*v.mat.Inc]
}
// SetVec sets the element at row i to the value val.
// It panics if i is out of bounds.
func (v *Vector) SetVec(i int, val float64) {
if uint(i) >= uint(v.n) {
panic(matrix.ErrVectorAccess)
}
v.setVec(i, val)
}
func (v *Vector) setVec(i int, val float64) {
v.mat.Data[i*v.mat.Inc] = val
}
// At returns the element at row i and column j.
func (s *SymDense) At(i, j int) float64 {
if uint(i) >= uint(s.mat.N) {
panic(matrix.ErrRowAccess)
}
if uint(j) >= uint(s.mat.N) {
panic(matrix.ErrColAccess)
}
return s.at(i, j)
}
func (s *SymDense) at(i, j int) float64 {
if i > j {
i, j = j, i
}
return s.mat.Data[i*s.mat.Stride+j]
}
// SetSym sets the elements at (i,j) and (j,i) to the value v.
func (s *SymDense) SetSym(i, j int, v float64) {
if uint(i) >= uint(s.mat.N) {
panic(matrix.ErrRowAccess)
}
if uint(j) >= uint(s.mat.N) {
panic(matrix.ErrColAccess)
}
s.set(i, j, v)
}
func (s *SymDense) set(i, j int, v float64) {
if i > j {
i, j = j, i
}
s.mat.Data[i*s.mat.Stride+j] = v
}
// At returns the element at row i, column j.
func (t *TriDense) At(i, j int) float64 {
if uint(i) >= uint(t.mat.N) {
panic(matrix.ErrRowAccess)
}
if uint(j) >= uint(t.mat.N) {
panic(matrix.ErrColAccess)
}
return t.at(i, j)
}
func (t *TriDense) at(i, j int) float64 {
isUpper := t.triKind()
if (isUpper && i > j) || (!isUpper && i < j) {
return 0
}
return t.mat.Data[i*t.mat.Stride+j]
}
// SetTri sets the element at row i, column j to the value v.
// It panics if the location is outside the appropriate half of the matrix.
func (t *TriDense) SetTri(i, j int, v float64) {
if uint(i) >= uint(t.mat.N) {
panic(matrix.ErrRowAccess)
}
if uint(j) >= uint(t.mat.N) {
panic(matrix.ErrColAccess)
}
isUpper := t.isUpper()
if (isUpper && i > j) || (!isUpper && i < j) {
panic(matrix.ErrTriangleSet)
}
t.set(i, j, v)
}
func (t *TriDense) set(i, j int, v float64) {
t.mat.Data[i*t.mat.Stride+j] = v
}

View File

@ -1,103 +0,0 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package mat64
import (
"github.com/gonum/blas"
"github.com/gonum/internal/asm/f64"
"github.com/gonum/matrix"
)
// Inner computes the generalized inner product
// x^T A y
// between vectors x and y with matrix A. This is only a true inner product if
// A is symmetric positive definite, though the operation works for any matrix A.
//
// Inner panics if x.Len != m or y.Len != n when A is an m x n matrix.
func Inner(x *Vector, A Matrix, y *Vector) float64 {
m, n := A.Dims()
if x.Len() != m {
panic(matrix.ErrShape)
}
if y.Len() != n {
panic(matrix.ErrShape)
}
if m == 0 || n == 0 {
return 0
}
var sum float64
switch b := A.(type) {
case RawSymmetricer:
bmat := b.RawSymmetric()
if bmat.Uplo != blas.Upper {
// Panic as a string not a mat64.Error.
panic(badSymTriangle)
}
for i := 0; i < x.Len(); i++ {
xi := x.at(i)
if xi != 0 {
if y.mat.Inc == 1 {
sum += xi * f64.DotUnitary(
bmat.Data[i*bmat.Stride+i:i*bmat.Stride+n],
y.mat.Data[i:],
)
} else {
sum += xi * f64.DotInc(
bmat.Data[i*bmat.Stride+i:i*bmat.Stride+n],
y.mat.Data[i*y.mat.Inc:], uintptr(n-i),
1, uintptr(y.mat.Inc),
0, 0,
)
}
}
yi := y.at(i)
if i != n-1 && yi != 0 {
if x.mat.Inc == 1 {
sum += yi * f64.DotUnitary(
bmat.Data[i*bmat.Stride+i+1:i*bmat.Stride+n],
x.mat.Data[i+1:],
)
} else {
sum += yi * f64.DotInc(
bmat.Data[i*bmat.Stride+i+1:i*bmat.Stride+n],
x.mat.Data[(i+1)*x.mat.Inc:], uintptr(n-i-1),
1, uintptr(x.mat.Inc),
0, 0,
)
}
}
}
case RawMatrixer:
bmat := b.RawMatrix()
for i := 0; i < x.Len(); i++ {
xi := x.at(i)
if xi != 0 {
if y.mat.Inc == 1 {
sum += xi * f64.DotUnitary(
bmat.Data[i*bmat.Stride:i*bmat.Stride+n],
y.mat.Data,
)
} else {
sum += xi * f64.DotInc(
bmat.Data[i*bmat.Stride:i*bmat.Stride+n],
y.mat.Data, uintptr(n),
1, uintptr(y.mat.Inc),
0, 0,
)
}
}
}
default:
for i := 0; i < x.Len(); i++ {
xi := x.at(i)
for j := 0; j < y.Len(); j++ {
sum += xi * A.At(i, j) * y.at(j)
}
}
}
return sum
}

View File

@ -1,217 +0,0 @@
// Copyright ©2013 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package mat64
import (
"math"
"github.com/gonum/blas"
"github.com/gonum/blas/blas64"
"github.com/gonum/lapack/lapack64"
"github.com/gonum/matrix"
)
// LQ is a type for creating and using the LQ factorization of a matrix.
type LQ struct {
lq *Dense
tau []float64
cond float64
}
func (lq *LQ) updateCond() {
// A = LQ, where Q is orthonormal. Orthonormal multiplications do not change
// the condition number. Thus, ||A|| = ||L|| ||Q|| = ||Q||.
m := lq.lq.mat.Rows
work := make([]float64, 3*m)
iwork := make([]int, m)
l := lq.lq.asTriDense(m, blas.NonUnit, blas.Lower)
v := lapack64.Trcon(matrix.CondNorm, l.mat, work, iwork)
lq.cond = 1 / v
}
// Factorize computes the LQ factorization of an m×n matrix a where n <= m. The LQ
// factorization always exists even if A is singular.
//
// The LQ decomposition is a factorization of the matrix A such that A = L * Q.
// The matrix Q is an orthonormal n×n matrix, and L is an m×n upper triangular matrix.
// L and Q can be extracted from the LFromLQ and QFromLQ methods on Dense.
func (lq *LQ) Factorize(a Matrix) {
m, n := a.Dims()
if m > n {
panic(matrix.ErrShape)
}
k := min(m, n)
if lq.lq == nil {
lq.lq = &Dense{}
}
lq.lq.Clone(a)
work := make([]float64, 1)
lq.tau = make([]float64, k)
lapack64.Gelqf(lq.lq.mat, lq.tau, work, -1)
work = make([]float64, int(work[0]))
lapack64.Gelqf(lq.lq.mat, lq.tau, work, len(work))
lq.updateCond()
}
// TODO(btracey): Add in the "Reduced" forms for extracting the m×m orthogonal
// and upper triangular matrices.
// LFromLQ extracts the m×n lower trapezoidal matrix from a LQ decomposition.
func (m *Dense) LFromLQ(lq *LQ) {
r, c := lq.lq.Dims()
m.reuseAs(r, c)
// Disguise the LQ as a lower triangular
t := &TriDense{
mat: blas64.Triangular{
N: r,
Stride: lq.lq.mat.Stride,
Data: lq.lq.mat.Data,
Uplo: blas.Lower,
Diag: blas.NonUnit,
},
cap: lq.lq.capCols,
}
m.Copy(t)
if r == c {
return
}
// Zero right of the triangular.
for i := 0; i < r; i++ {
zero(m.mat.Data[i*m.mat.Stride+r : i*m.mat.Stride+c])
}
}
// QFromLQ extracts the n×n orthonormal matrix Q from an LQ decomposition.
func (m *Dense) QFromLQ(lq *LQ) {
r, c := lq.lq.Dims()
m.reuseAs(c, c)
// Set Q = I.
for i := 0; i < c; i++ {
v := m.mat.Data[i*m.mat.Stride : i*m.mat.Stride+c]
zero(v)
v[i] = 1
}
// Construct Q from the elementary reflectors.
h := blas64.General{
Rows: c,
Cols: c,
Stride: c,
Data: make([]float64, c*c),
}
qCopy := getWorkspace(c, c, false)
v := blas64.Vector{
Inc: 1,
Data: make([]float64, c),
}
for i := 0; i < r; i++ {
// Set h = I.
zero(h.Data)
for j := 0; j < len(h.Data); j += c + 1 {
h.Data[j] = 1
}
// Set the vector data as the elementary reflector.
for j := 0; j < i; j++ {
v.Data[j] = 0
}
v.Data[i] = 1
for j := i + 1; j < c; j++ {
v.Data[j] = lq.lq.mat.Data[i*lq.lq.mat.Stride+j]
}
// Compute the multiplication matrix.
blas64.Ger(-lq.tau[i], v, v, h)
qCopy.Copy(m)
blas64.Gemm(blas.NoTrans, blas.NoTrans,
1, h, qCopy.mat,
0, m.mat)
}
}
// SolveLQ finds a minimum-norm solution to a system of linear equations defined
// by the matrices A and b, where A is an m×n matrix represented in its LQ factorized
// form. If A is singular or near-singular a Condition error is returned. Please
// see the documentation for Condition for more information.
//
// The minimization problem solved depends on the input parameters.
// If trans == false, find the minimum norm solution of A * X = b.
// If trans == true, find X such that ||A*X - b||_2 is minimized.
// The solution matrix, X, is stored in place into the receiver.
func (m *Dense) SolveLQ(lq *LQ, trans bool, b Matrix) error {
r, c := lq.lq.Dims()
br, bc := b.Dims()
// The LQ solve algorithm stores the result in-place into the right hand side.
// The storage for the answer must be large enough to hold both b and x.
// However, this method's receiver must be the size of x. Copy b, and then
// copy the result into m at the end.
if trans {
if c != br {
panic(matrix.ErrShape)
}
m.reuseAs(r, bc)
} else {
if r != br {
panic(matrix.ErrShape)
}
m.reuseAs(c, bc)
}
// Do not need to worry about overlap between m and b because x has its own
// independent storage.
x := getWorkspace(max(r, c), bc, false)
x.Copy(b)
t := lq.lq.asTriDense(lq.lq.mat.Rows, blas.NonUnit, blas.Lower).mat
if trans {
work := make([]float64, 1)
lapack64.Ormlq(blas.Left, blas.NoTrans, lq.lq.mat, lq.tau, x.mat, work, -1)
work = make([]float64, int(work[0]))
lapack64.Ormlq(blas.Left, blas.NoTrans, lq.lq.mat, lq.tau, x.mat, work, len(work))
ok := lapack64.Trtrs(blas.Trans, t, x.mat)
if !ok {
return matrix.Condition(math.Inf(1))
}
} else {
ok := lapack64.Trtrs(blas.NoTrans, t, x.mat)
if !ok {
return matrix.Condition(math.Inf(1))
}
for i := r; i < c; i++ {
zero(x.mat.Data[i*x.mat.Stride : i*x.mat.Stride+bc])
}
work := make([]float64, 1)
lapack64.Ormlq(blas.Left, blas.Trans, lq.lq.mat, lq.tau, x.mat, work, -1)
work = make([]float64, int(work[0]))
lapack64.Ormlq(blas.Left, blas.Trans, lq.lq.mat, lq.tau, x.mat, work, len(work))
}
// M was set above to be the correct size for the result.
m.Copy(x)
putWorkspace(x)
if lq.cond > matrix.ConditionTolerance {
return matrix.Condition(lq.cond)
}
return nil
}
// SolveLQVec finds a minimum-norm solution to a system of linear equations.
// Please see Dense.SolveLQ for the full documentation.
func (v *Vector) SolveLQVec(lq *LQ, trans bool, b *Vector) error {
if v != b {
v.checkOverlap(b.mat)
}
r, c := lq.lq.Dims()
// The Solve implementation is non-trivial, so rather than duplicate the code,
// instead recast the Vectors as Dense and call the matrix code.
if trans {
v.reuseAs(r)
} else {
v.reuseAs(c)
}
return v.asDense().SolveLQ(lq, trans, b.asDense())
}

View File

@ -1,183 +0,0 @@
// Copyright ©2013 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package mat64
import (
"math"
"github.com/gonum/blas"
"github.com/gonum/blas/blas64"
"github.com/gonum/lapack/lapack64"
"github.com/gonum/matrix"
)
// QR is a type for creating and using the QR factorization of a matrix.
type QR struct {
qr *Dense
tau []float64
cond float64
}
func (qr *QR) updateCond() {
// A = QR, where Q is orthonormal. Orthonormal multiplications do not change
// the condition number. Thus, ||A|| = ||Q|| ||R|| = ||R||.
n := qr.qr.mat.Cols
work := make([]float64, 3*n)
iwork := make([]int, n)
r := qr.qr.asTriDense(n, blas.NonUnit, blas.Upper)
v := lapack64.Trcon(matrix.CondNorm, r.mat, work, iwork)
qr.cond = 1 / v
}
// Factorize computes the QR factorization of an m×n matrix a where m >= n. The QR
// factorization always exists even if A is singular.
//
// The QR decomposition is a factorization of the matrix A such that A = Q * R.
// The matrix Q is an orthonormal m×m matrix, and R is an m×n upper triangular matrix.
// Q and R can be extracted from the QFromQR and RFromQR methods on Dense.
func (qr *QR) Factorize(a Matrix) {
m, n := a.Dims()
if m < n {
panic(matrix.ErrShape)
}
k := min(m, n)
if qr.qr == nil {
qr.qr = &Dense{}
}
qr.qr.Clone(a)
work := make([]float64, 1)
qr.tau = make([]float64, k)
lapack64.Geqrf(qr.qr.mat, qr.tau, work, -1)
work = make([]float64, int(work[0]))
lapack64.Geqrf(qr.qr.mat, qr.tau, work, len(work))
qr.updateCond()
}
// TODO(btracey): Add in the "Reduced" forms for extracting the n×n orthogonal
// and upper triangular matrices.
// RFromQR extracts the m×n upper trapezoidal matrix from a QR decomposition.
func (m *Dense) RFromQR(qr *QR) {
r, c := qr.qr.Dims()
m.reuseAs(r, c)
// Disguise the QR as an upper triangular
t := &TriDense{
mat: blas64.Triangular{
N: c,
Stride: qr.qr.mat.Stride,
Data: qr.qr.mat.Data,
Uplo: blas.Upper,
Diag: blas.NonUnit,
},
cap: qr.qr.capCols,
}
m.Copy(t)
// Zero below the triangular.
for i := r; i < c; i++ {
zero(m.mat.Data[i*m.mat.Stride : i*m.mat.Stride+c])
}
}
// QFromQR extracts the m×m orthonormal matrix Q from a QR decomposition.
func (m *Dense) QFromQR(qr *QR) {
r, _ := qr.qr.Dims()
m.reuseAsZeroed(r, r)
// Set Q = I.
for i := 0; i < r*r; i += r + 1 {
m.mat.Data[i] = 1
}
// Construct Q from the elementary reflectors.
work := make([]float64, 1)
lapack64.Ormqr(blas.Left, blas.NoTrans, qr.qr.mat, qr.tau, m.mat, work, -1)
work = make([]float64, int(work[0]))
lapack64.Ormqr(blas.Left, blas.NoTrans, qr.qr.mat, qr.tau, m.mat, work, len(work))
}
// SolveQR finds a minimum-norm solution to a system of linear equations defined
// by the matrices A and b, where A is an m×n matrix represented in its QR factorized
// form. If A is singular or near-singular a Condition error is returned. Please
// see the documentation for Condition for more information.
//
// The minimization problem solved depends on the input parameters.
// If trans == false, find X such that ||A*X - b||_2 is minimized.
// If trans == true, find the minimum norm solution of A^T * X = b.
// The solution matrix, X, is stored in place into the receiver.
func (m *Dense) SolveQR(qr *QR, trans bool, b Matrix) error {
r, c := qr.qr.Dims()
br, bc := b.Dims()
// The QR solve algorithm stores the result in-place into the right hand side.
// The storage for the answer must be large enough to hold both b and x.
// However, this method's receiver must be the size of x. Copy b, and then
// copy the result into m at the end.
if trans {
if c != br {
panic(matrix.ErrShape)
}
m.reuseAs(r, bc)
} else {
if r != br {
panic(matrix.ErrShape)
}
m.reuseAs(c, bc)
}
// Do not need to worry about overlap between m and b because x has its own
// independent storage.
x := getWorkspace(max(r, c), bc, false)
x.Copy(b)
t := qr.qr.asTriDense(qr.qr.mat.Cols, blas.NonUnit, blas.Upper).mat
if trans {
ok := lapack64.Trtrs(blas.Trans, t, x.mat)
if !ok {
return matrix.Condition(math.Inf(1))
}
for i := c; i < r; i++ {
zero(x.mat.Data[i*x.mat.Stride : i*x.mat.Stride+bc])
}
work := make([]float64, 1)
lapack64.Ormqr(blas.Left, blas.NoTrans, qr.qr.mat, qr.tau, x.mat, work, -1)
work = make([]float64, int(work[0]))
lapack64.Ormqr(blas.Left, blas.NoTrans, qr.qr.mat, qr.tau, x.mat, work, len(work))
} else {
work := make([]float64, 1)
lapack64.Ormqr(blas.Left, blas.Trans, qr.qr.mat, qr.tau, x.mat, work, -1)
work = make([]float64, int(work[0]))
lapack64.Ormqr(blas.Left, blas.Trans, qr.qr.mat, qr.tau, x.mat, work, len(work))
ok := lapack64.Trtrs(blas.NoTrans, t, x.mat)
if !ok {
return matrix.Condition(math.Inf(1))
}
}
// M was set above to be the correct size for the result.
m.Copy(x)
putWorkspace(x)
if qr.cond > matrix.ConditionTolerance {
return matrix.Condition(qr.cond)
}
return nil
}
// SolveQRVec finds a minimum-norm solution to a system of linear equations.
// Please see Dense.SolveQR for the full documentation.
func (v *Vector) SolveQRVec(qr *QR, trans bool, b *Vector) error {
if v != b {
v.checkOverlap(b.mat)
}
r, c := qr.qr.Dims()
// The Solve implementation is non-trivial, so rather than duplicate the code,
// instead recast the Vectors as Dense and call the matrix code.
if trans {
v.reuseAs(r)
} else {
v.reuseAs(c)
}
return v.asDense().SolveQR(qr, trans, b.asDense())
}

View File

@ -1,281 +0,0 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package mat64
import (
"github.com/gonum/blas"
"github.com/gonum/blas/blas64"
)
const (
// regionOverlap is the panic string used for the general case
// of a matrix region overlap between a source and destination.
regionOverlap = "mat64: bad region: overlap"
// regionIdentity is the panic string used for the specific
// case of complete agreement between a source and a destination.
regionIdentity = "mat64: bad region: identical"
// mismatchedStrides is the panic string used for overlapping
// data slices with differing strides.
mismatchedStrides = "mat64: bad region: different strides"
)
// checkOverlap returns false if the receiver does not overlap data elements
// referenced by the parameter and panics otherwise.
//
// checkOverlap methods return a boolean to allow the check call to be added to a
// boolean expression, making use of short-circuit operators.
func (m *Dense) checkOverlap(a blas64.General) bool {
mat := m.RawMatrix()
if cap(mat.Data) == 0 || cap(a.Data) == 0 {
return false
}
off := offset(mat.Data[:1], a.Data[:1])
if off == 0 {
// At least one element overlaps.
if mat.Cols == a.Cols && mat.Rows == a.Rows && mat.Stride == a.Stride {
panic(regionIdentity)
}
panic(regionOverlap)
}
if off > 0 && len(mat.Data) <= off {
// We know m is completely before a.
return false
}
if off < 0 && len(a.Data) <= -off {
// We know m is completely after a.
return false
}
if mat.Stride != a.Stride {
// Too hard, so assume the worst.
panic(mismatchedStrides)
}
if off < 0 {
off = -off
mat.Cols, a.Cols = a.Cols, mat.Cols
}
if rectanglesOverlap(off, mat.Cols, a.Cols, mat.Stride) {
panic(regionOverlap)
}
return false
}
func (s *SymDense) checkOverlap(a blas64.Symmetric) bool {
mat := s.RawSymmetric()
if cap(mat.Data) == 0 || cap(a.Data) == 0 {
return false
}
off := offset(mat.Data[:1], a.Data[:1])
if off == 0 {
// At least one element overlaps.
if mat.N == a.N && mat.Stride == a.Stride {
panic(regionIdentity)
}
panic(regionOverlap)
}
if off > 0 && len(mat.Data) <= off {
// We know s is completely before a.
return false
}
if off < 0 && len(a.Data) <= -off {
// We know s is completely after a.
return false
}
if mat.Stride != a.Stride {
// Too hard, so assume the worst.
panic(mismatchedStrides)
}
if off < 0 {
off = -off
mat.N, a.N = a.N, mat.N
// If we created the matrix it will always
// be in the upper triangle, but don't trust
// that this is the case.
mat.Uplo, a.Uplo = a.Uplo, mat.Uplo
}
if trianglesOverlap(off, mat.N, a.N, mat.Stride, mat.Uplo == blas.Upper, a.Uplo == blas.Upper) {
panic(regionOverlap)
}
return false
}
func (t *TriDense) checkOverlap(a blas64.Triangular) bool {
mat := t.RawTriangular()
if cap(mat.Data) == 0 || cap(a.Data) == 0 {
return false
}
off := offset(mat.Data[:1], a.Data[:1])
if off == 0 {
// At least one element overlaps.
if mat.N == a.N && mat.Stride == a.Stride {
panic(regionIdentity)
}
panic(regionOverlap)
}
if off > 0 && len(mat.Data) <= off {
// We know t is completely before a.
return false
}
if off < 0 && len(a.Data) <= -off {
// We know t is completely after a.
return false
}
if mat.Stride != a.Stride {
// Too hard, so assume the worst.
panic(mismatchedStrides)
}
if off < 0 {
off = -off
mat.N, a.N = a.N, mat.N
mat.Uplo, a.Uplo = a.Uplo, mat.Uplo
}
if trianglesOverlap(off, mat.N, a.N, mat.Stride, mat.Uplo == blas.Upper, a.Uplo == blas.Upper) {
panic(regionOverlap)
}
return false
}
func (v *Vector) checkOverlap(a blas64.Vector) bool {
mat := v.mat
if cap(mat.Data) == 0 || cap(a.Data) == 0 {
return false
}
off := offset(mat.Data[:1], a.Data[:1])
if off == 0 {
// At least one element overlaps.
if mat.Inc == a.Inc && len(mat.Data) == len(a.Data) {
panic(regionIdentity)
}
panic(regionOverlap)
}
if off > 0 && len(mat.Data) <= off {
// We know v is completely before a.
return false
}
if off < 0 && len(a.Data) <= -off {
// We know v is completely after a.
return false
}
if mat.Inc != a.Inc {
// Too hard, so assume the worst.
panic(mismatchedStrides)
}
if mat.Inc == 1 || off&mat.Inc == 0 {
panic(regionOverlap)
}
return false
}
// rectanglesOverlap returns whether the strided rectangles a and b overlap
// when b is offset by off elements after a but has at least one element before
// the end of a. off must be positive. a and b have aCols and bCols respectively.
//
// rectanglesOverlap works by shifting both matrices left such that the left
// column of a is at 0. The column indexes are flattened by obtaining the shifted
// relative left and right column positions modulo the common stride. This allows
// direct comparison of the column offsets when the matrix backing data slices
// are known to overlap.
func rectanglesOverlap(off, aCols, bCols, stride int) bool {
if stride == 1 {
// Unit stride means overlapping data
// slices must overlap as matrices.
return true
}
// Flatten the shifted matrix column positions
// so a starts at 0, modulo the common stride.
const aFrom = 0
aTo := aCols
// The mod stride operations here make the from
// and to indexes comparable between a and b when
// the data slices of a and b overlap.
bFrom := off % stride
bTo := (bFrom + bCols) % stride
if bTo == 0 || bFrom < bTo {
// b matrix is not wrapped: compare for
// simple overlap.
return bFrom < aTo
}
// b strictly wraps and so must overlap with a.
return true
}
// trianglesOverlap returns whether the strided triangles a and b overlap
// when b is offset by off elements after a but has at least one element before
// the end of a. off must be positive. a and b are aSize×aSize and bSize×bSize
// respectively.
func trianglesOverlap(off, aSize, bSize, stride int, aUpper, bUpper bool) bool {
if !rectanglesOverlap(off, aSize, bSize, stride) {
// Fast return if bounding rectangles do not overlap.
return false
}
// Find location of b relative to a.
rowOffset := off / stride
colOffset := off % stride
if (off+bSize)%stride < colOffset {
// We have wrapped, so readjust offsets.
rowOffset++
colOffset -= stride
}
if aUpper {
// Check whether the upper left of b
// is in the triangle of a
if rowOffset >= 0 && rowOffset <= colOffset {
return true
}
// Check whether the upper right of b
// is in the triangle of a.
return bUpper && rowOffset < colOffset+bSize
}
// Check whether the upper left of b
// is in the triangle of a
if colOffset >= 0 && rowOffset >= colOffset {
return true
}
if bUpper {
// Check whether the upper right corner of b
// is in a or the upper row of b spans a row
// of a.
return rowOffset > colOffset+bSize || colOffset < 0
}
if colOffset < 0 {
// Check whether the lower left of a
// is in the triangle of b or below
// the diagonal of a. This requires a
// swap of reference origin.
return -rowOffset+aSize > -colOffset
}
// Check whether the lower left of b
// is in the triangle of a or below
// the diagonal of a.
return rowOffset+bSize > colOffset
}

View File

@ -1,468 +0,0 @@
// Copyright ©2013 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package mat64
import (
"github.com/gonum/blas"
"github.com/gonum/blas/blas64"
"github.com/gonum/internal/asm/f64"
"github.com/gonum/matrix"
)
var (
vector *Vector
_ Matrix = vector
_ Reseter = vector
)
// Vector represents a column vector.
type Vector struct {
mat blas64.Vector
n int
// A BLAS vector can have a negative increment, but allowing this
// in the mat64 type complicates a lot of code, and doesn't gain anything.
// Vector must have positive increment in this package.
}
// NewVector creates a new Vector of length n. If data == nil,
// a new slice is allocated for the backing slice. If len(data) == n, data is
// used as the backing slice, and changes to the elements of the returned Vector
// will be reflected in data. If neither of these is true, NewVector will panic.
func NewVector(n int, data []float64) *Vector {
if len(data) != n && data != nil {
panic(matrix.ErrShape)
}
if data == nil {
data = make([]float64, n)
}
return &Vector{
mat: blas64.Vector{
Inc: 1,
Data: data,
},
n: n,
}
}
// ViewVec returns a sub-vector view of the receiver starting at element i and
// extending n rows. If i is out of range, n is zero, or the view extends
// beyond the bounds of the Vector, ViewVec will panic with ErrIndexOutOfRange.
// The returned Vector retains reference to the underlying vector.
//
// ViewVec is deprecated and should not be used. It will be removed at a later date.
func (v *Vector) ViewVec(i, n int) *Vector {
return v.SliceVec(i, i+n)
}
// SliceVec returns a new Vector that shares backing data with the receiver.
// The returned matrix starts at i of the recevier and extends k-i elements.
// SliceVec panics with ErrIndexOutOfRange if the slice is outside the bounds
// of the receiver.
func (v *Vector) SliceVec(i, k int) *Vector {
if i < 0 || k <= i || v.n < k {
panic(matrix.ErrIndexOutOfRange)
}
return &Vector{
n: k - i,
mat: blas64.Vector{
Inc: v.mat.Inc,
Data: v.mat.Data[i*v.mat.Inc : (k-1)*v.mat.Inc+1],
},
}
}
func (v *Vector) Dims() (r, c int) {
if v.isZero() {
return 0, 0
}
return v.n, 1
}
// Len returns the length of the vector.
func (v *Vector) Len() int {
return v.n
}
// T performs an implicit transpose by returning the receiver inside a Transpose.
func (v *Vector) T() Matrix {
return Transpose{v}
}
// Reset zeros the length of the vector so that it can be reused as the
// receiver of a dimensionally restricted operation.
//
// See the Reseter interface for more information.
func (v *Vector) Reset() {
// No change of Inc or n to 0 may be
// made unless both are set to 0.
v.mat.Inc = 0
v.n = 0
v.mat.Data = v.mat.Data[:0]
}
// CloneVec makes a copy of a into the receiver, overwriting the previous value
// of the receiver.
func (v *Vector) CloneVec(a *Vector) {
if v == a {
return
}
v.n = a.n
v.mat = blas64.Vector{
Inc: 1,
Data: use(v.mat.Data, v.n),
}
blas64.Copy(v.n, a.mat, v.mat)
}
func (v *Vector) RawVector() blas64.Vector {
return v.mat
}
// CopyVec makes a copy of elements of a into the receiver. It is similar to the
// built-in copy; it copies as much as the overlap between the two vectors and
// returns the number of elements it copied.
func (v *Vector) CopyVec(a *Vector) int {
n := min(v.Len(), a.Len())
if v != a {
blas64.Copy(n, a.mat, v.mat)
}
return n
}
// ScaleVec scales the vector a by alpha, placing the result in the receiver.
func (v *Vector) ScaleVec(alpha float64, a *Vector) {
n := a.Len()
if v != a {
v.reuseAs(n)
if v.mat.Inc == 1 && a.mat.Inc == 1 {
f64.ScalUnitaryTo(v.mat.Data, alpha, a.mat.Data)
return
}
f64.ScalIncTo(v.mat.Data, uintptr(v.mat.Inc),
alpha, a.mat.Data, uintptr(n), uintptr(a.mat.Inc))
return
}
if v.mat.Inc == 1 {
f64.ScalUnitary(alpha, v.mat.Data)
return
}
f64.ScalInc(alpha, v.mat.Data, uintptr(n), uintptr(v.mat.Inc))
}
// AddScaledVec adds the vectors a and alpha*b, placing the result in the receiver.
func (v *Vector) AddScaledVec(a *Vector, alpha float64, b *Vector) {
if alpha == 1 {
v.AddVec(a, b)
return
}
if alpha == -1 {
v.SubVec(a, b)
return
}
ar := a.Len()
br := b.Len()
if ar != br {
panic(matrix.ErrShape)
}
if v != a {
v.checkOverlap(a.mat)
}
if v != b {
v.checkOverlap(b.mat)
}
v.reuseAs(ar)
switch {
case alpha == 0: // v <- a
v.CopyVec(a)
case v == a && v == b: // v <- v + alpha * v = (alpha + 1) * v
blas64.Scal(ar, alpha+1, v.mat)
case v == a && v != b: // v <- v + alpha * b
if v.mat.Inc == 1 && b.mat.Inc == 1 {
// Fast path for a common case.
f64.AxpyUnitaryTo(v.mat.Data, alpha, b.mat.Data, a.mat.Data)
} else {
f64.AxpyInc(alpha, b.mat.Data, v.mat.Data,
uintptr(ar), uintptr(b.mat.Inc), uintptr(v.mat.Inc), 0, 0)
}
default: // v <- a + alpha * b or v <- a + alpha * v
if v.mat.Inc == 1 && a.mat.Inc == 1 && b.mat.Inc == 1 {
// Fast path for a common case.
f64.AxpyUnitaryTo(v.mat.Data, alpha, b.mat.Data, a.mat.Data)
} else {
f64.AxpyIncTo(v.mat.Data, uintptr(v.mat.Inc), 0,
alpha, b.mat.Data, a.mat.Data,
uintptr(ar), uintptr(b.mat.Inc), uintptr(a.mat.Inc), 0, 0)
}
}
}
// AddVec adds the vectors a and b, placing the result in the receiver.
func (v *Vector) AddVec(a, b *Vector) {
ar := a.Len()
br := b.Len()
if ar != br {
panic(matrix.ErrShape)
}
if v != a {
v.checkOverlap(a.mat)
}
if v != b {
v.checkOverlap(b.mat)
}
v.reuseAs(ar)
if v.mat.Inc == 1 && a.mat.Inc == 1 && b.mat.Inc == 1 {
// Fast path for a common case.
f64.AxpyUnitaryTo(v.mat.Data, 1, b.mat.Data, a.mat.Data)
return
}
f64.AxpyIncTo(v.mat.Data, uintptr(v.mat.Inc), 0,
1, b.mat.Data, a.mat.Data,
uintptr(ar), uintptr(b.mat.Inc), uintptr(a.mat.Inc), 0, 0)
}
// SubVec subtracts the vector b from a, placing the result in the receiver.
func (v *Vector) SubVec(a, b *Vector) {
ar := a.Len()
br := b.Len()
if ar != br {
panic(matrix.ErrShape)
}
if v != a {
v.checkOverlap(a.mat)
}
if v != b {
v.checkOverlap(b.mat)
}
v.reuseAs(ar)
if v.mat.Inc == 1 && a.mat.Inc == 1 && b.mat.Inc == 1 {
// Fast path for a common case.
f64.AxpyUnitaryTo(v.mat.Data, -1, b.mat.Data, a.mat.Data)
return
}
f64.AxpyIncTo(v.mat.Data, uintptr(v.mat.Inc), 0,
-1, b.mat.Data, a.mat.Data,
uintptr(ar), uintptr(b.mat.Inc), uintptr(a.mat.Inc), 0, 0)
}
// MulElemVec performs element-wise multiplication of a and b, placing the result
// in the receiver.
func (v *Vector) MulElemVec(a, b *Vector) {
ar := a.Len()
br := b.Len()
if ar != br {
panic(matrix.ErrShape)
}
if v != a {
v.checkOverlap(a.mat)
}
if v != b {
v.checkOverlap(b.mat)
}
v.reuseAs(ar)
amat, bmat := a.RawVector(), b.RawVector()
for i := 0; i < v.n; i++ {
v.mat.Data[i*v.mat.Inc] = amat.Data[i*amat.Inc] * bmat.Data[i*bmat.Inc]
}
}
// DivElemVec performs element-wise division of a by b, placing the result
// in the receiver.
func (v *Vector) DivElemVec(a, b *Vector) {
ar := a.Len()
br := b.Len()
if ar != br {
panic(matrix.ErrShape)
}
if v != a {
v.checkOverlap(a.mat)
}
if v != b {
v.checkOverlap(b.mat)
}
v.reuseAs(ar)
amat, bmat := a.RawVector(), b.RawVector()
for i := 0; i < v.n; i++ {
v.mat.Data[i*v.mat.Inc] = amat.Data[i*amat.Inc] / bmat.Data[i*bmat.Inc]
}
}
// MulVec computes a * b. The result is stored into the receiver.
// MulVec panics if the number of columns in a does not equal the number of rows in b.
func (v *Vector) MulVec(a Matrix, b *Vector) {
r, c := a.Dims()
br := b.Len()
if c != br {
panic(matrix.ErrShape)
}
if v != b {
v.checkOverlap(b.mat)
}
a, trans := untranspose(a)
ar, ac := a.Dims()
v.reuseAs(r)
var restore func()
if v == a {
v, restore = v.isolatedWorkspace(a.(*Vector))
defer restore()
} else if v == b {
v, restore = v.isolatedWorkspace(b)
defer restore()
}
switch a := a.(type) {
case *Vector:
if v != a {
v.checkOverlap(a.mat)
}
if a.Len() == 1 {
// {1,1} x {1,n}
av := a.At(0, 0)
for i := 0; i < b.Len(); i++ {
v.mat.Data[i*v.mat.Inc] = av * b.mat.Data[i*b.mat.Inc]
}
return
}
if b.Len() == 1 {
// {1,n} x {1,1}
bv := b.At(0, 0)
for i := 0; i < a.Len(); i++ {
v.mat.Data[i*v.mat.Inc] = bv * a.mat.Data[i*a.mat.Inc]
}
return
}
// {n,1} x {1,n}
var sum float64
for i := 0; i < c; i++ {
sum += a.At(i, 0) * b.At(i, 0)
}
v.SetVec(0, sum)
return
case RawSymmetricer:
amat := a.RawSymmetric()
blas64.Symv(1, amat, b.mat, 0, v.mat)
case RawTriangular:
v.CopyVec(b)
amat := a.RawTriangular()
ta := blas.NoTrans
if trans {
ta = blas.Trans
}
blas64.Trmv(ta, amat, v.mat)
case RawMatrixer:
amat := a.RawMatrix()
// We don't know that a is a *Dense, so make
// a temporary Dense to check overlap.
(&Dense{mat: amat}).checkOverlap(v.asGeneral())
t := blas.NoTrans
if trans {
t = blas.Trans
}
blas64.Gemv(t, 1, amat, b.mat, 0, v.mat)
default:
if trans {
col := make([]float64, ar)
for c := 0; c < ac; c++ {
for i := range col {
col[i] = a.At(i, c)
}
var f float64
for i, e := range col {
f += e * b.mat.Data[i*b.mat.Inc]
}
v.mat.Data[c*v.mat.Inc] = f
}
} else {
row := make([]float64, ac)
for r := 0; r < ar; r++ {
for i := range row {
row[i] = a.At(r, i)
}
var f float64
for i, e := range row {
f += e * b.mat.Data[i*b.mat.Inc]
}
v.mat.Data[r*v.mat.Inc] = f
}
}
}
}
// reuseAs resizes an empty vector to a r×1 vector,
// or checks that a non-empty matrix is r×1.
func (v *Vector) reuseAs(r int) {
if v.isZero() {
v.mat = blas64.Vector{
Inc: 1,
Data: use(v.mat.Data, r),
}
v.n = r
return
}
if r != v.n {
panic(matrix.ErrShape)
}
}
func (v *Vector) isZero() bool {
// It must be the case that v.Dims() returns
// zeros in this case. See comment in Reset().
return v.mat.Inc == 0
}
func (v *Vector) isolatedWorkspace(a *Vector) (n *Vector, restore func()) {
l := a.Len()
n = getWorkspaceVec(l, false)
return n, func() {
v.CopyVec(n)
putWorkspaceVec(n)
}
}
// asDense returns a Dense representation of the receiver with the same
// underlying data.
func (v *Vector) asDense() *Dense {
return &Dense{
mat: v.asGeneral(),
capRows: v.n,
capCols: 1,
}
}
// asGeneral returns a blas64.General representation of the receiver with the
// same underlying data.
func (v *Vector) asGeneral() blas64.General {
return blas64.General{
Rows: v.n,
Cols: 1,
Stride: v.mat.Inc,
Data: v.mat.Data,
}
}

View File

@ -1,118 +0,0 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package plotter
import (
"errors"
"image/color"
"math"
"github.com/gonum/plot"
"github.com/gonum/plot/vg"
"github.com/gonum/plot/vg/draw"
)
// Bubbles implements the Plotter interface, drawing
// a bubble plot of x, y, z triples where the z value
// determines the radius of the bubble.
type Bubbles struct {
XYZs
// Color is the color of the bubbles.
color.Color
// MinRadius and MaxRadius give the minimum
// and maximum bubble radius respectively.
// The radii of each bubble is interpolated linearly
// between these two values.
MinRadius, MaxRadius vg.Length
// MinZ and MaxZ are the minimum and
// maximum Z values from the data.
MinZ, MaxZ float64
}
// NewBubbles creates as new bubble plot plotter for
// the given data, with a minimum and maximum
// bubble radius.
func NewBubbles(xyz XYZer, min, max vg.Length) (*Bubbles, error) {
cpy, err := CopyXYZs(xyz)
if err != nil {
return nil, err
}
if min > max {
return nil, errors.New("Min bubble radius is greater than the max radius")
}
minz := cpy[0].Z
maxz := cpy[0].Z
for _, d := range cpy {
minz = math.Min(minz, d.Z)
maxz = math.Max(maxz, d.Z)
}
return &Bubbles{
XYZs: cpy,
MinRadius: min,
MaxRadius: max,
MinZ: minz,
MaxZ: maxz,
}, nil
}
// Plot implements the Plot method of the plot.Plotter interface.
func (bs *Bubbles) Plot(c draw.Canvas, plt *plot.Plot) {
trX, trY := plt.Transforms(&c)
c.SetColor(bs.Color)
for _, d := range bs.XYZs {
x := trX(d.X)
y := trY(d.Y)
pt := vg.Point{X: x, Y: y}
if !c.Contains(pt) {
continue
}
rad := bs.radius(d.Z)
// draw a circle centered at x, y
var p vg.Path
p.Move(vg.Point{X: x + rad, Y: y})
p.Arc(pt, rad, 0, 2*math.Pi)
p.Close()
c.Fill(p)
}
}
// radius returns the radius of a bubble by linear interpolation.
func (bs *Bubbles) radius(z float64) vg.Length {
rng := bs.MaxRadius - bs.MinRadius
if bs.MaxZ == bs.MinZ {
return rng/2 + bs.MinRadius
}
d := (z - bs.MinZ) / (bs.MaxZ - bs.MinZ)
return vg.Length(d)*rng + bs.MinRadius
}
// DataRange implements the DataRange method
// of the plot.DataRanger interface.
func (bs *Bubbles) DataRange() (xmin, xmax, ymin, ymax float64) {
return XYRange(XYValues{bs.XYZs})
}
// GlyphBoxes implements the GlyphBoxes method
// of the plot.GlyphBoxer interface.
func (bs *Bubbles) GlyphBoxes(plt *plot.Plot) []plot.GlyphBox {
boxes := make([]plot.GlyphBox, len(bs.XYZs))
for i, d := range bs.XYZs {
boxes[i].X = plt.X.Norm(d.X)
boxes[i].Y = plt.Y.Norm(d.Y)
r := bs.radius(d.Z)
boxes[i].Rectangle = vg.Rectangle{
Min: vg.Point{X: -r, Y: -r},
Max: vg.Point{X: +r, Y: +r},
}
}
return boxes
}

23
vendor/gonum.org/v1/gonum/LICENSE generated vendored Normal file
View File

@ -0,0 +1,23 @@
Copyright ©2013 The Gonum Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the gonum project nor the names of its authors and
contributors may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,110 +1,9 @@
// Copyright ©2013 The gonum Authors. All rights reserved.
// Copyright ©2013 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
Package blas provides interfaces for the BLAS linear algebra standard.
//go:generate ./conversions.bash
All methods must perform appropriate parameter checking and panic if
provided parameters that do not conform to the requirements specified
by the BLAS standard.
Quick Reference Guide to the BLAS from http://www.netlib.org/lapack/lug/node145.html
This version is modified to remove the "order" option. All matrix operations are
on row-order matrices.
Level 1 BLAS
dim scalar vector vector scalars 5-element prefixes
struct
_rotg ( a, b ) S, D
_rotmg( d1, d2, a, b ) S, D
_rot ( n, x, incX, y, incY, c, s ) S, D
_rotm ( n, x, incX, y, incY, param ) S, D
_swap ( n, x, incX, y, incY ) S, D, C, Z
_scal ( n, alpha, x, incX ) S, D, C, Z, Cs, Zd
_copy ( n, x, incX, y, incY ) S, D, C, Z
_axpy ( n, alpha, x, incX, y, incY ) S, D, C, Z
_dot ( n, x, incX, y, incY ) S, D, Ds
_dotu ( n, x, incX, y, incY ) C, Z
_dotc ( n, x, incX, y, incY ) C, Z
__dot ( n, alpha, x, incX, y, incY ) Sds
_nrm2 ( n, x, incX ) S, D, Sc, Dz
_asum ( n, x, incX ) S, D, Sc, Dz
I_amax( n, x, incX ) s, d, c, z
Level 2 BLAS
options dim b-width scalar matrix vector scalar vector prefixes
_gemv ( trans, m, n, alpha, a, lda, x, incX, beta, y, incY ) S, D, C, Z
_gbmv ( trans, m, n, kL, kU, alpha, a, lda, x, incX, beta, y, incY ) S, D, C, Z
_hemv ( uplo, n, alpha, a, lda, x, incX, beta, y, incY ) C, Z
_hbmv ( uplo, n, k, alpha, a, lda, x, incX, beta, y, incY ) C, Z
_hpmv ( uplo, n, alpha, ap, x, incX, beta, y, incY ) C, Z
_symv ( uplo, n, alpha, a, lda, x, incX, beta, y, incY ) S, D
_sbmv ( uplo, n, k, alpha, a, lda, x, incX, beta, y, incY ) S, D
_spmv ( uplo, n, alpha, ap, x, incX, beta, y, incY ) S, D
_trmv ( uplo, trans, diag, n, a, lda, x, incX ) S, D, C, Z
_tbmv ( uplo, trans, diag, n, k, a, lda, x, incX ) S, D, C, Z
_tpmv ( uplo, trans, diag, n, ap, x, incX ) S, D, C, Z
_trsv ( uplo, trans, diag, n, a, lda, x, incX ) S, D, C, Z
_tbsv ( uplo, trans, diag, n, k, a, lda, x, incX ) S, D, C, Z
_tpsv ( uplo, trans, diag, n, ap, x, incX ) S, D, C, Z
options dim scalar vector vector matrix prefixes
_ger ( m, n, alpha, x, incX, y, incY, a, lda ) S, D
_geru ( m, n, alpha, x, incX, y, incY, a, lda ) C, Z
_gerc ( m, n, alpha, x, incX, y, incY, a, lda ) C, Z
_her ( uplo, n, alpha, x, incX, a, lda ) C, Z
_hpr ( uplo, n, alpha, x, incX, ap ) C, Z
_her2 ( uplo, n, alpha, x, incX, y, incY, a, lda ) C, Z
_hpr2 ( uplo, n, alpha, x, incX, y, incY, ap ) C, Z
_syr ( uplo, n, alpha, x, incX, a, lda ) S, D
_spr ( uplo, n, alpha, x, incX, ap ) S, D
_syr2 ( uplo, n, alpha, x, incX, y, incY, a, lda ) S, D
_spr2 ( uplo, n, alpha, x, incX, y, incY, ap ) S, D
Level 3 BLAS
options dim scalar matrix matrix scalar matrix prefixes
_gemm ( transA, transB, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) S, D, C, Z
_symm ( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc ) S, D, C, Z
_hemm ( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc ) C, Z
_syrk ( uplo, trans, n, k, alpha, a, lda, beta, c, ldc ) S, D, C, Z
_herk ( uplo, trans, n, k, alpha, a, lda, beta, c, ldc ) C, Z
_syr2k( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) S, D, C, Z
_her2k( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) C, Z
_trmm ( side, uplo, transA, diag, m, n, alpha, a, lda, b, ldb ) S, D, C, Z
_trsm ( side, uplo, transA, diag, m, n, alpha, a, lda, b, ldb ) S, D, C, Z
Meaning of prefixes
S - float32 C - complex64
D - float64 Z - complex128
Matrix types
GE - GEneral GB - General Band
SY - SYmmetric SB - Symmetric Band SP - Symmetric Packed
HE - HErmitian HB - Hermitian Band HP - Hermitian Packed
TR - TRiangular TB - Triangular Band TP - Triangular Packed
Options
trans = NoTrans, Trans, ConjTrans
uplo = Upper, Lower
diag = Nonunit, Unit
side = Left, Right (A or op(A) on the left, or A or op(A) on the right)
For real matrices, Trans and ConjTrans have the same meaning.
For Hermitian matrices, trans = Trans is not allowed.
For complex symmetric matrices, trans = ConjTrans is not allowed.
*/
package blas
// Flag constants indicate Givens transformation H matrix state.

View File

@ -1,16 +1,15 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package blas64 provides a simple interface to the float64 BLAS API.
package blas64
import (
"github.com/gonum/blas"
"github.com/gonum/blas/native"
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/blas/gonum"
)
var blas64 blas.Float64 = native.Implementation{}
var blas64 blas.Float64 = gonum.Implementation{}
// Use sets the BLAS float64 implementation to be used by subsequent BLAS calls.
// The default implementation is native.Implementation.

277
vendor/gonum.org/v1/gonum/blas/blas64/conv.go generated vendored Normal file
View File

@ -0,0 +1,277 @@
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package blas64
import "gonum.org/v1/gonum/blas"
// GeneralCols represents a matrix using the conventional column-major storage scheme.
type GeneralCols General
// From fills the receiver with elements from a. The receiver
// must have the same dimensions as a and have adequate backing
// data storage.
func (t GeneralCols) From(a General) {
if t.Rows != a.Rows || t.Cols != a.Cols {
panic("blas64: mismatched dimension")
}
if len(t.Data) < (t.Cols-1)*t.Stride+t.Rows {
panic("blas64: short data slice")
}
for i := 0; i < a.Rows; i++ {
for j, v := range a.Data[i*a.Stride : i*a.Stride+a.Cols] {
t.Data[i+j*t.Stride] = v
}
}
}
// From fills the receiver with elements from a. The receiver
// must have the same dimensions as a and have adequate backing
// data storage.
func (t General) From(a GeneralCols) {
if t.Rows != a.Rows || t.Cols != a.Cols {
panic("blas64: mismatched dimension")
}
if len(t.Data) < (t.Rows-1)*t.Stride+t.Cols {
panic("blas64: short data slice")
}
for j := 0; j < a.Cols; j++ {
for i, v := range a.Data[j*a.Stride : j*a.Stride+a.Rows] {
t.Data[i*t.Stride+j] = v
}
}
}
// TriangularCols represents a matrix using the conventional column-major storage scheme.
type TriangularCols Triangular
// From fills the receiver with elements from a. The receiver
// must have the same dimensions, uplo and diag as a and have
// adequate backing data storage.
func (t TriangularCols) From(a Triangular) {
if t.N != a.N {
panic("blas64: mismatched dimension")
}
if t.Uplo != a.Uplo {
panic("blas64: mismatched BLAS uplo")
}
if t.Diag != a.Diag {
panic("blas64: mismatched BLAS diag")
}
switch a.Uplo {
default:
panic("blas64: bad BLAS uplo")
case blas.Upper:
for i := 0; i < a.N; i++ {
for j := i; j < a.N; j++ {
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
}
}
case blas.Lower:
for i := 0; i < a.N; i++ {
for j := 0; j <= i; j++ {
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
}
}
case blas.All:
for i := 0; i < a.N; i++ {
for j := 0; j < a.N; j++ {
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
}
}
}
}
// From fills the receiver with elements from a. The receiver
// must have the same dimensions, uplo and diag as a and have
// adequate backing data storage.
func (t Triangular) From(a TriangularCols) {
if t.N != a.N {
panic("blas64: mismatched dimension")
}
if t.Uplo != a.Uplo {
panic("blas64: mismatched BLAS uplo")
}
if t.Diag != a.Diag {
panic("blas64: mismatched BLAS diag")
}
switch a.Uplo {
default:
panic("blas64: bad BLAS uplo")
case blas.Upper:
for i := 0; i < a.N; i++ {
for j := i; j < a.N; j++ {
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
}
}
case blas.Lower:
for i := 0; i < a.N; i++ {
for j := 0; j <= i; j++ {
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
}
}
case blas.All:
for i := 0; i < a.N; i++ {
for j := 0; j < a.N; j++ {
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
}
}
}
}
// BandCols represents a matrix using the band column-major storage scheme.
type BandCols Band
// From fills the receiver with elements from a. The receiver
// must have the same dimensions and bandwidth as a and have
// adequate backing data storage.
func (t BandCols) From(a Band) {
if t.Rows != a.Rows || t.Cols != a.Cols {
panic("blas64: mismatched dimension")
}
if t.KL != a.KL || t.KU != a.KU {
panic("blas64: mismatched bandwidth")
}
if a.Stride < a.KL+a.KU+1 {
panic("blas64: short stride for source")
}
if t.Stride < t.KL+t.KU+1 {
panic("blas64: short stride for destination")
}
for i := 0; i < a.Rows; i++ {
for j := max(0, i-a.KL); j < min(i+a.KU+1, a.Cols); j++ {
t.Data[i+t.KU-j+j*t.Stride] = a.Data[j+a.KL-i+i*a.Stride]
}
}
}
// From fills the receiver with elements from a. The receiver
// must have the same dimensions and bandwidth as a and have
// adequate backing data storage.
func (t Band) From(a BandCols) {
if t.Rows != a.Rows || t.Cols != a.Cols {
panic("blas64: mismatched dimension")
}
if t.KL != a.KL || t.KU != a.KU {
panic("blas64: mismatched bandwidth")
}
if a.Stride < a.KL+a.KU+1 {
panic("blas64: short stride for source")
}
if t.Stride < t.KL+t.KU+1 {
panic("blas64: short stride for destination")
}
for j := 0; j < a.Cols; j++ {
for i := max(0, j-a.KU); i < min(j+a.KL+1, a.Rows); i++ {
t.Data[j+a.KL-i+i*a.Stride] = a.Data[i+t.KU-j+j*t.Stride]
}
}
}
// TriangularBandCols represents a symmetric matrix using the band column-major storage scheme.
type TriangularBandCols TriangularBand
// From fills the receiver with elements from a. The receiver
// must have the same dimensions, bandwidth and uplo as a and
// have adequate backing data storage.
func (t TriangularBandCols) From(a TriangularBand) {
if t.N != a.N {
panic("blas64: mismatched dimension")
}
if t.K != a.K {
panic("blas64: mismatched bandwidth")
}
if a.Stride < a.K+1 {
panic("blas64: short stride for source")
}
if t.Stride < t.K+1 {
panic("blas64: short stride for destination")
}
if t.Uplo != a.Uplo {
panic("blas64: mismatched BLAS uplo")
}
if t.Diag != a.Diag {
panic("blas64: mismatched BLAS diag")
}
dst := BandCols{
Rows: t.N, Cols: t.N,
Stride: t.Stride,
Data: t.Data,
}
src := Band{
Rows: a.N, Cols: a.N,
Stride: a.Stride,
Data: a.Data,
}
switch a.Uplo {
default:
panic("blas64: bad BLAS uplo")
case blas.Upper:
dst.KU = t.K
src.KU = a.K
case blas.Lower:
dst.KL = t.K
src.KL = a.K
}
dst.From(src)
}
// From fills the receiver with elements from a. The receiver
// must have the same dimensions, bandwidth and uplo as a and
// have adequate backing data storage.
func (t TriangularBand) From(a TriangularBandCols) {
if t.N != a.N {
panic("blas64: mismatched dimension")
}
if t.K != a.K {
panic("blas64: mismatched bandwidth")
}
if a.Stride < a.K+1 {
panic("blas64: short stride for source")
}
if t.Stride < t.K+1 {
panic("blas64: short stride for destination")
}
if t.Uplo != a.Uplo {
panic("blas64: mismatched BLAS uplo")
}
if t.Diag != a.Diag {
panic("blas64: mismatched BLAS diag")
}
dst := Band{
Rows: t.N, Cols: t.N,
Stride: t.Stride,
Data: t.Data,
}
src := BandCols{
Rows: a.N, Cols: a.N,
Stride: a.Stride,
Data: a.Data,
}
switch a.Uplo {
default:
panic("blas64: bad BLAS uplo")
case blas.Upper:
dst.KU = t.K
src.KU = a.K
case blas.Lower:
dst.KL = t.K
src.KL = a.K
}
dst.From(src)
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
func max(a, b int) int {
if a > b {
return a
}
return b
}

153
vendor/gonum.org/v1/gonum/blas/blas64/conv_symmetric.go generated vendored Normal file
View File

@ -0,0 +1,153 @@
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package blas64
import "gonum.org/v1/gonum/blas"
// SymmetricCols represents a matrix using the conventional column-major storage scheme.
type SymmetricCols Symmetric
// From fills the receiver with elements from a. The receiver
// must have the same dimensions and uplo as a and have adequate
// backing data storage.
func (t SymmetricCols) From(a Symmetric) {
if t.N != a.N {
panic("blas64: mismatched dimension")
}
if t.Uplo != a.Uplo {
panic("blas64: mismatched BLAS uplo")
}
switch a.Uplo {
default:
panic("blas64: bad BLAS uplo")
case blas.Upper:
for i := 0; i < a.N; i++ {
for j := i; j < a.N; j++ {
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
}
}
case blas.Lower:
for i := 0; i < a.N; i++ {
for j := 0; j <= i; j++ {
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
}
}
}
}
// From fills the receiver with elements from a. The receiver
// must have the same dimensions and uplo as a and have adequate
// backing data storage.
func (t Symmetric) From(a SymmetricCols) {
if t.N != a.N {
panic("blas64: mismatched dimension")
}
if t.Uplo != a.Uplo {
panic("blas64: mismatched BLAS uplo")
}
switch a.Uplo {
default:
panic("blas64: bad BLAS uplo")
case blas.Upper:
for i := 0; i < a.N; i++ {
for j := i; j < a.N; j++ {
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
}
}
case blas.Lower:
for i := 0; i < a.N; i++ {
for j := 0; j <= i; j++ {
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
}
}
}
}
// SymmetricBandCols represents a symmetric matrix using the band column-major storage scheme.
type SymmetricBandCols SymmetricBand
// From fills the receiver with elements from a. The receiver
// must have the same dimensions, bandwidth and uplo as a and
// have adequate backing data storage.
func (t SymmetricBandCols) From(a SymmetricBand) {
if t.N != a.N {
panic("blas64: mismatched dimension")
}
if t.K != a.K {
panic("blas64: mismatched bandwidth")
}
if a.Stride < a.K+1 {
panic("blas64: short stride for source")
}
if t.Stride < t.K+1 {
panic("blas64: short stride for destination")
}
if t.Uplo != a.Uplo {
panic("blas64: mismatched BLAS uplo")
}
dst := BandCols{
Rows: t.N, Cols: t.N,
Stride: t.Stride,
Data: t.Data,
}
src := Band{
Rows: a.N, Cols: a.N,
Stride: a.Stride,
Data: a.Data,
}
switch a.Uplo {
default:
panic("blas64: bad BLAS uplo")
case blas.Upper:
dst.KU = t.K
src.KU = a.K
case blas.Lower:
dst.KL = t.K
src.KL = a.K
}
dst.From(src)
}
// From fills the receiver with elements from a. The receiver
// must have the same dimensions, bandwidth and uplo as a and
// have adequate backing data storage.
func (t SymmetricBand) From(a SymmetricBandCols) {
if t.N != a.N {
panic("blas64: mismatched dimension")
}
if t.K != a.K {
panic("blas64: mismatched bandwidth")
}
if a.Stride < a.K+1 {
panic("blas64: short stride for source")
}
if t.Stride < t.K+1 {
panic("blas64: short stride for destination")
}
if t.Uplo != a.Uplo {
panic("blas64: mismatched BLAS uplo")
}
dst := Band{
Rows: t.N, Cols: t.N,
Stride: t.Stride,
Data: t.Data,
}
src := BandCols{
Rows: a.N, Cols: a.N,
Stride: a.Stride,
Data: a.Data,
}
switch a.Uplo {
default:
panic("blas64: bad BLAS uplo")
case blas.Upper:
dst.KU = t.K
src.KU = a.K
case blas.Lower:
dst.KL = t.K
src.KL = a.K
}
dst.From(src)
}

6
vendor/gonum.org/v1/gonum/blas/blas64/doc.go generated vendored Normal file
View File

@ -0,0 +1,6 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package blas64 provides a simple interface to the float64 BLAS API.
package blas64 // import "gonum.org/v1/gonum/blas/blas64"

108
vendor/gonum.org/v1/gonum/blas/doc.go generated vendored Normal file
View File

@ -0,0 +1,108 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
Package blas provides interfaces for the BLAS linear algebra standard.
All methods must perform appropriate parameter checking and panic if
provided parameters that do not conform to the requirements specified
by the BLAS standard.
Quick Reference Guide to the BLAS from http://www.netlib.org/lapack/lug/node145.html
This version is modified to remove the "order" option. All matrix operations are
on row-order matrices.
Level 1 BLAS
dim scalar vector vector scalars 5-element prefixes
struct
_rotg ( a, b ) S, D
_rotmg( d1, d2, a, b ) S, D
_rot ( n, x, incX, y, incY, c, s ) S, D
_rotm ( n, x, incX, y, incY, param ) S, D
_swap ( n, x, incX, y, incY ) S, D, C, Z
_scal ( n, alpha, x, incX ) S, D, C, Z, Cs, Zd
_copy ( n, x, incX, y, incY ) S, D, C, Z
_axpy ( n, alpha, x, incX, y, incY ) S, D, C, Z
_dot ( n, x, incX, y, incY ) S, D, Ds
_dotu ( n, x, incX, y, incY ) C, Z
_dotc ( n, x, incX, y, incY ) C, Z
__dot ( n, alpha, x, incX, y, incY ) Sds
_nrm2 ( n, x, incX ) S, D, Sc, Dz
_asum ( n, x, incX ) S, D, Sc, Dz
I_amax( n, x, incX ) s, d, c, z
Level 2 BLAS
options dim b-width scalar matrix vector scalar vector prefixes
_gemv ( trans, m, n, alpha, a, lda, x, incX, beta, y, incY ) S, D, C, Z
_gbmv ( trans, m, n, kL, kU, alpha, a, lda, x, incX, beta, y, incY ) S, D, C, Z
_hemv ( uplo, n, alpha, a, lda, x, incX, beta, y, incY ) C, Z
_hbmv ( uplo, n, k, alpha, a, lda, x, incX, beta, y, incY ) C, Z
_hpmv ( uplo, n, alpha, ap, x, incX, beta, y, incY ) C, Z
_symv ( uplo, n, alpha, a, lda, x, incX, beta, y, incY ) S, D
_sbmv ( uplo, n, k, alpha, a, lda, x, incX, beta, y, incY ) S, D
_spmv ( uplo, n, alpha, ap, x, incX, beta, y, incY ) S, D
_trmv ( uplo, trans, diag, n, a, lda, x, incX ) S, D, C, Z
_tbmv ( uplo, trans, diag, n, k, a, lda, x, incX ) S, D, C, Z
_tpmv ( uplo, trans, diag, n, ap, x, incX ) S, D, C, Z
_trsv ( uplo, trans, diag, n, a, lda, x, incX ) S, D, C, Z
_tbsv ( uplo, trans, diag, n, k, a, lda, x, incX ) S, D, C, Z
_tpsv ( uplo, trans, diag, n, ap, x, incX ) S, D, C, Z
options dim scalar vector vector matrix prefixes
_ger ( m, n, alpha, x, incX, y, incY, a, lda ) S, D
_geru ( m, n, alpha, x, incX, y, incY, a, lda ) C, Z
_gerc ( m, n, alpha, x, incX, y, incY, a, lda ) C, Z
_her ( uplo, n, alpha, x, incX, a, lda ) C, Z
_hpr ( uplo, n, alpha, x, incX, ap ) C, Z
_her2 ( uplo, n, alpha, x, incX, y, incY, a, lda ) C, Z
_hpr2 ( uplo, n, alpha, x, incX, y, incY, ap ) C, Z
_syr ( uplo, n, alpha, x, incX, a, lda ) S, D
_spr ( uplo, n, alpha, x, incX, ap ) S, D
_syr2 ( uplo, n, alpha, x, incX, y, incY, a, lda ) S, D
_spr2 ( uplo, n, alpha, x, incX, y, incY, ap ) S, D
Level 3 BLAS
options dim scalar matrix matrix scalar matrix prefixes
_gemm ( transA, transB, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) S, D, C, Z
_symm ( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc ) S, D, C, Z
_hemm ( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc ) C, Z
_syrk ( uplo, trans, n, k, alpha, a, lda, beta, c, ldc ) S, D, C, Z
_herk ( uplo, trans, n, k, alpha, a, lda, beta, c, ldc ) C, Z
_syr2k( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) S, D, C, Z
_her2k( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) C, Z
_trmm ( side, uplo, transA, diag, m, n, alpha, a, lda, b, ldb ) S, D, C, Z
_trsm ( side, uplo, transA, diag, m, n, alpha, a, lda, b, ldb ) S, D, C, Z
Meaning of prefixes
S - float32 C - complex64
D - float64 Z - complex128
Matrix types
GE - GEneral GB - General Band
SY - SYmmetric SB - Symmetric Band SP - Symmetric Packed
HE - HErmitian HB - Hermitian Band HP - Hermitian Packed
TR - TRiangular TB - Triangular Band TP - Triangular Packed
Options
trans = NoTrans, Trans, ConjTrans
uplo = Upper, Lower
diag = Nonunit, Unit
side = Left, Right (A or op(A) on the left, or A or op(A) on the right)
For real matrices, Trans and ConjTrans have the same meaning.
For Hermitian matrices, trans = Trans is not allowed.
For complex symmetric matrices, trans = ConjTrans is not allowed.
*/
package blas // import "gonum.org/v1/gonum/blas"

185
vendor/gonum.org/v1/gonum/blas/gonum/cmplx.go generated vendored Normal file
View File

@ -0,0 +1,185 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import "gonum.org/v1/gonum/blas"
var (
_ blas.Complex64 = Implementation{}
_ blas.Complex128 = Implementation{}
)
// TODO(btracey): Replace this as complex routines are added, and instead
// automatically generate the complex64 routines from the complex128 ones.
var noComplex = "native: implementation does not implement this routine, see the cgo wrapper in gonum.org/v1/netlib/blas"
// Level 1 complex64 routines.
func (Implementation) Cdotu(n int, x []complex64, incX int, y []complex64, incY int) (dotu complex64) {
panic(noComplex)
}
func (Implementation) Cdotc(n int, x []complex64, incX int, y []complex64, incY int) (dotc complex64) {
panic(noComplex)
}
func (Implementation) Scnrm2(n int, x []complex64, incX int) float32 {
panic(noComplex)
}
func (Implementation) Scasum(n int, x []complex64, incX int) float32 {
panic(noComplex)
}
func (Implementation) Icamax(n int, x []complex64, incX int) int {
panic(noComplex)
}
func (Implementation) Cswap(n int, x []complex64, incX int, y []complex64, incY int) {
panic(noComplex)
}
func (Implementation) Ccopy(n int, x []complex64, incX int, y []complex64, incY int) {
panic(noComplex)
}
func (Implementation) Caxpy(n int, alpha complex64, x []complex64, incX int, y []complex64, incY int) {
panic(noComplex)
}
func (Implementation) Cscal(n int, alpha complex64, x []complex64, incX int) {
panic(noComplex)
}
func (Implementation) Csscal(n int, alpha float32, x []complex64, incX int) {
panic(noComplex)
}
// Level 2 complex64 routines.
func (Implementation) Cgemv(tA blas.Transpose, m, n int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) {
panic(noComplex)
}
func (Implementation) Cgbmv(tA blas.Transpose, m, n, kL, kU int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) {
panic(noComplex)
}
func (Implementation) Ctrmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []complex64, lda int, x []complex64, incX int) {
panic(noComplex)
}
func (Implementation) Ctbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex64, lda int, x []complex64, incX int) {
panic(noComplex)
}
func (Implementation) Ctpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []complex64, x []complex64, incX int) {
panic(noComplex)
}
func (Implementation) Ctrsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []complex64, lda int, x []complex64, incX int) {
panic(noComplex)
}
func (Implementation) Ctbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex64, lda int, x []complex64, incX int) {
panic(noComplex)
}
func (Implementation) Ctpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []complex64, x []complex64, incX int) {
panic(noComplex)
}
func (Implementation) Chemv(ul blas.Uplo, n int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) {
panic(noComplex)
}
func (Implementation) Chbmv(ul blas.Uplo, n, k int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) {
panic(noComplex)
}
func (Implementation) Chpmv(ul blas.Uplo, n int, alpha complex64, ap []complex64, x []complex64, incX int, beta complex64, y []complex64, incY int) {
panic(noComplex)
}
func (Implementation) Cgeru(m, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int) {
panic(noComplex)
}
func (Implementation) Cgerc(m, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int) {
panic(noComplex)
}
func (Implementation) Cher(ul blas.Uplo, n int, alpha float32, x []complex64, incX int, a []complex64, lda int) {
panic(noComplex)
}
func (Implementation) Chpr(ul blas.Uplo, n int, alpha float32, x []complex64, incX int, a []complex64) {
panic(noComplex)
}
func (Implementation) Cher2(ul blas.Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int) {
panic(noComplex)
}
func (Implementation) Chpr2(ul blas.Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, ap []complex64) {
panic(noComplex)
}
// Level 3 complex64 routines.
func (Implementation) Cgemm(tA, tB blas.Transpose, m, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
panic(noComplex)
}
func (Implementation) Csymm(s blas.Side, ul blas.Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
panic(noComplex)
}
func (Implementation) Csyrk(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, beta complex64, c []complex64, ldc int) {
panic(noComplex)
}
func (Implementation) Csyr2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
panic(noComplex)
}
func (Implementation) Ctrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int) {
panic(noComplex)
}
func (Implementation) Ctrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int) {
panic(noComplex)
}
func (Implementation) Chemm(s blas.Side, ul blas.Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
panic(noComplex)
}
func (Implementation) Cherk(ul blas.Uplo, t blas.Transpose, n, k int, alpha float32, a []complex64, lda int, beta float32, c []complex64, ldc int) {
panic(noComplex)
}
func (Implementation) Cher2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta float32, c []complex64, ldc int) {
panic(noComplex)
}
// Level 2 complex128 routines.
func (Implementation) Zgbmv(tA blas.Transpose, m, n int, kL int, kU int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int) {
panic(noComplex)
}
func (Implementation) Ztbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex128, lda int, x []complex128, incX int) {
panic(noComplex)
}
func (Implementation) Ztpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []complex128, x []complex128, incX int) {
panic(noComplex)
}
func (Implementation) Ztbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex128, lda int, x []complex128, incX int) {
panic(noComplex)
}
func (Implementation) Ztpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []complex128, x []complex128, incX int) {
panic(noComplex)
}
func (Implementation) Zhbmv(ul blas.Uplo, n, k int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int) {
panic(noComplex)
}
// Level 3 complex128 routines.
func (Implementation) Zgemm(tA, tB blas.Transpose, m, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) {
panic(noComplex)
}
func (Implementation) Zsymm(s blas.Side, ul blas.Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) {
panic(noComplex)
}
func (Implementation) Zsyrk(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, beta complex128, c []complex128, ldc int) {
panic(noComplex)
}
func (Implementation) Zsyr2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) {
panic(noComplex)
}
func (Implementation) Ztrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int) {
panic(noComplex)
}
func (Implementation) Ztrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int) {
panic(noComplex)
}
func (Implementation) Zhemm(s blas.Side, ul blas.Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) {
panic(noComplex)
}
func (Implementation) Zherk(ul blas.Uplo, t blas.Transpose, n, k int, alpha float64, a []complex128, lda int, beta float64, c []complex128, ldc int) {
panic(noComplex)
}
func (Implementation) Zher2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta float64, c []complex128, ldc int) {
panic(noComplex)
}

View File

@ -1,15 +1,15 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package native
package gonum
import (
"runtime"
"sync"
"github.com/gonum/blas"
"github.com/gonum/internal/asm/f64"
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/internal/asm/f64"
)
// Dgemm computes
@ -25,17 +25,17 @@ func (Implementation) Dgemm(tA, tB blas.Transpose, m, n, k int, alpha float64, a
}
aTrans := tA == blas.Trans || tA == blas.ConjTrans
if aTrans {
checkMatrix64(k, m, a, lda)
checkDMatrix('a', k, m, a, lda)
} else {
checkMatrix64(m, k, a, lda)
checkDMatrix('a', m, k, a, lda)
}
bTrans := tB == blas.Trans || tB == blas.ConjTrans
if bTrans {
checkMatrix64(n, k, b, ldb)
checkDMatrix('b', n, k, b, ldb)
} else {
checkMatrix64(k, n, b, ldb)
checkDMatrix('b', k, n, b, ldb)
}
checkMatrix64(m, n, c, ldc)
checkDMatrix('c', m, n, c, ldc)
// scale c
if beta != 1 {
@ -121,7 +121,7 @@ func dgemmParallel(aTrans, bTrans bool, m, n, k int, a []float64, lda int, b []f
go func() {
defer wg.Done()
// Make local copies of otherwise global variables to reduce shared memory.
// This has a noticable effect on benchmarks in some cases.
// This has a noticeable effect on benchmarks in some cases.
alpha := alpha
aTrans := aTrans
bTrans := bTrans
@ -259,18 +259,3 @@ func dgemmSerialTransTrans(m, n, k int, a []float64, lda int, b []float64, ldb i
func sliceView64(a []float64, lda, i, j, r, c int) []float64 {
return a[i*lda+j : (i+r-1)*lda+j+c]
}
func checkMatrix64(m, n int, a []float64, lda int) {
if m < 0 {
panic("blas: rows < 0")
}
if n < 0 {
panic("blas: cols < 0")
}
if lda < n {
panic("blas: illegal stride")
}
if len(a) < (m-1)*lda+n {
panic("blas: insufficient matrix slice length")
}
}

View File

@ -7,12 +7,12 @@
/*
Package native is a Go implementation of the BLAS API. This implementation
panics when the input arguments are invalid as per the standard, for example
if a vector increment is zero. Please note that the treatment of NaN values
if a vector increment is zero. Note that the treatment of NaN values
is not specified, and differs among the BLAS implementations.
github.com/gonum/blas/blas64 provides helpful wrapper functions to the BLAS
gonum.org/v1/gonum/blas/blas64 provides helpful wrapper functions to the BLAS
interface. The rest of this text describes the layout of the data for the input types.
Please note that in the function documentation, x[i] refers to the i^th element
Note that in the function documentation, x[i] refers to the i^th element
of the vector, which will be different from the i^th element of the slice if
incX != 1.
@ -85,4 +85,4 @@ which is given to the BLAS routine as [ 1 2 3 4 ...].
See http://www.crest.iu.edu/research/mtl/reference/html/banded.html
for more information
*/
package native
package gonum // import "gonum.org/v1/gonum/blas/gonum"

50
vendor/gonum.org/v1/gonum/blas/gonum/general_double.go generated vendored Normal file
View File

@ -0,0 +1,50 @@
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
"math"
)
type general64 struct {
data []float64
rows, cols int
stride int
}
func (g general64) clone() general64 {
data := make([]float64, len(g.data))
copy(data, g.data)
return general64{
data: data,
rows: g.rows,
cols: g.cols,
stride: g.stride,
}
}
func (g general64) equal(a general64) bool {
if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
return false
}
for i, v := range g.data {
if a.data[i] != v {
return false
}
}
return true
}
func (g general64) equalWithinAbs(a general64, tol float64) bool {
if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
return false
}
for i, v := range g.data {
if math.Abs(a.data[i]-v) > tol {
return false
}
}
return true
}

52
vendor/gonum.org/v1/gonum/blas/gonum/general_single.go generated vendored Normal file
View File

@ -0,0 +1,52 @@
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
math "gonum.org/v1/gonum/internal/math32"
)
type general32 struct {
data []float32
rows, cols int
stride int
}
func (g general32) clone() general32 {
data := make([]float32, len(g.data))
copy(data, g.data)
return general32{
data: data,
rows: g.rows,
cols: g.cols,
stride: g.stride,
}
}
func (g general32) equal(a general32) bool {
if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
return false
}
for i, v := range g.data {
if a.data[i] != v {
return false
}
}
return true
}
func (g general32) equalWithinAbs(a general32, tol float32) bool {
if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
return false
}
for i, v := range g.data {
if math.Abs(a.data[i]-v) > tol {
return false
}
}
return true
}

View File

@ -1,10 +1,12 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate ./single_precision.bash
package native
package gonum
import "math"
type Implementation struct{}
@ -43,9 +45,6 @@ const (
buffMul = 4 // how big is the buffer relative to the number of workers
)
// [SD]gemm debugging constant.
const debug = false
// subMul is a common type shared by [SD]gemm.
type subMul struct {
i, j int // index of block
@ -65,8 +64,70 @@ func min(a, b int) int {
return a
}
// blocks returns the number of divisons of the dimension length with the given
func checkSMatrix(name byte, m, n int, a []float32, lda int) {
if m < 0 {
panic(mLT0)
}
if n < 0 {
panic(nLT0)
}
if lda < n {
panic("blas: illegal stride of " + string(name))
}
if len(a) < (m-1)*lda+n {
panic("blas: index of " + string(name) + " out of range")
}
}
func checkDMatrix(name byte, m, n int, a []float64, lda int) {
if m < 0 {
panic(mLT0)
}
if n < 0 {
panic(nLT0)
}
if lda < n {
panic("blas: illegal stride of " + string(name))
}
if len(a) < (m-1)*lda+n {
panic("blas: index of " + string(name) + " out of range")
}
}
func checkZMatrix(name byte, m, n int, a []complex128, lda int) {
if m < 0 {
panic(mLT0)
}
if n < 0 {
panic(nLT0)
}
if lda < max(1, n) {
panic("blas: illegal stride of " + string(name))
}
if len(a) < (m-1)*lda+n {
panic("blas: insufficient " + string(name) + " matrix slice length")
}
}
func checkZVector(name byte, n int, x []complex128, incX int) {
if n < 0 {
panic(nLT0)
}
if incX == 0 {
panic(zeroIncX)
}
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
panic("blas: insufficient " + string(name) + " vector slice length")
}
}
// blocks returns the number of divisions of the dimension length with the given
// block size.
func blocks(dim, bsize int) int {
return (dim + bsize - 1) / bsize
}
// dcabs1 returns |real(z)|+|imag(z)|.
func dcabs1(z complex128) float64 {
return math.Abs(real(z)) + math.Abs(imag(z))
}

442
vendor/gonum.org/v1/gonum/blas/gonum/level1cmplx128.go generated vendored Normal file
View File

@ -0,0 +1,442 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
"math"
"gonum.org/v1/gonum/internal/asm/c128"
)
// Dzasum returns the sum of the absolute values of the elements of x
// \sum_i |Re(x[i])| + |Im(x[i])|
// Dzasum returns 0 if incX is negative.
func (Implementation) Dzasum(n int, x []complex128, incX int) float64 {
if n < 0 {
panic(negativeN)
}
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return 0
}
var sum float64
if incX == 1 {
if len(x) < n {
panic(badX)
}
for _, v := range x[:n] {
sum += dcabs1(v)
}
return sum
}
if (n-1)*incX >= len(x) {
panic(badX)
}
for i := 0; i < n; i++ {
v := x[i*incX]
sum += dcabs1(v)
}
return sum
}
// Dznrm2 computes the Euclidean norm of the complex vector x,
// ‖x‖_2 = sqrt(\sum_i x[i] * conj(x[i])).
// This function returns 0 if incX is negative.
func (Implementation) Dznrm2(n int, x []complex128, incX int) float64 {
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return 0
}
if n < 1 {
if n == 0 {
return 0
}
panic(negativeN)
}
if (n-1)*incX >= len(x) {
panic(badX)
}
var (
scale float64
ssq float64 = 1
)
if incX == 1 {
for _, v := range x[:n] {
re, im := math.Abs(real(v)), math.Abs(imag(v))
if re != 0 {
if re > scale {
ssq = 1 + ssq*(scale/re)*(scale/re)
scale = re
} else {
ssq += (re / scale) * (re / scale)
}
}
if im != 0 {
if im > scale {
ssq = 1 + ssq*(scale/im)*(scale/im)
scale = im
} else {
ssq += (im / scale) * (im / scale)
}
}
}
if math.IsInf(scale, 1) {
return math.Inf(1)
}
return scale * math.Sqrt(ssq)
}
for ix := 0; ix < n*incX; ix += incX {
re, im := math.Abs(real(x[ix])), math.Abs(imag(x[ix]))
if re != 0 {
if re > scale {
ssq = 1 + ssq*(scale/re)*(scale/re)
scale = re
} else {
ssq += (re / scale) * (re / scale)
}
}
if im != 0 {
if im > scale {
ssq = 1 + ssq*(scale/im)*(scale/im)
scale = im
} else {
ssq += (im / scale) * (im / scale)
}
}
}
if math.IsInf(scale, 1) {
return math.Inf(1)
}
return scale * math.Sqrt(ssq)
}
// Izamax returns the index of the first element of x having largest |Re(·)|+|Im(·)|.
// Izamax returns -1 if n is 0 or incX is negative.
func (Implementation) Izamax(n int, x []complex128, incX int) int {
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
// Return invalid index.
return -1
}
if n < 1 {
if n == 0 {
// Return invalid index.
return -1
}
panic(negativeN)
}
if len(x) <= (n-1)*incX {
panic(badX)
}
idx := 0
max := dcabs1(x[0])
if incX == 1 {
for i, v := range x[1:n] {
absV := dcabs1(v)
if absV > max {
max = absV
idx = i + 1
}
}
return idx
}
ix := incX
for i := 1; i < n; i++ {
absV := dcabs1(x[ix])
if absV > max {
max = absV
idx = i
}
ix += incX
}
return idx
}
// Zaxpy adds alpha times x to y:
// y[i] += alpha * x[i] for all i
func (Implementation) Zaxpy(n int, alpha complex128, x []complex128, incX int, y []complex128, incY int) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n < 1 {
if n == 0 {
return
}
panic(negativeN)
}
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
panic(badX)
}
if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
panic(badY)
}
if alpha == 0 {
return
}
if incX == 1 && incY == 1 {
c128.AxpyUnitary(alpha, x[:n], y[:n])
return
}
var ix, iy int
if incX < 0 {
ix = (1 - n) * incX
}
if incY < 0 {
iy = (1 - n) * incY
}
c128.AxpyInc(alpha, x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
}
// Zcopy copies the vector x to vector y.
func (Implementation) Zcopy(n int, x []complex128, incX int, y []complex128, incY int) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n < 1 {
if n == 0 {
return
}
panic(negativeN)
}
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
panic(badX)
}
if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
panic(badY)
}
if incX == 1 && incY == 1 {
copy(y[:n], x[:n])
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
y[iy] = x[ix]
ix += incX
iy += incY
}
}
// Zdotc computes the dot product
// x^H · y
// of two complex vectors x and y.
func (Implementation) Zdotc(n int, x []complex128, incX int, y []complex128, incY int) complex128 {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n <= 0 {
if n == 0 {
return 0
}
panic(negativeN)
}
if incX == 1 && incY == 1 {
if len(x) < n {
panic(badX)
}
if len(y) < n {
panic(badY)
}
return c128.DotcUnitary(x[:n], y[:n])
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
if ix >= len(x) || (n-1)*incX >= len(x) {
panic(badX)
}
if iy >= len(y) || (n-1)*incY >= len(y) {
panic(badY)
}
return c128.DotcInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
}
// Zdotu computes the dot product
// x^T · y
// of two complex vectors x and y.
func (Implementation) Zdotu(n int, x []complex128, incX int, y []complex128, incY int) complex128 {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n <= 0 {
if n == 0 {
return 0
}
panic(negativeN)
}
if incX == 1 && incY == 1 {
if len(x) < n {
panic(badX)
}
if len(y) < n {
panic(badY)
}
return c128.DotuUnitary(x[:n], y[:n])
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
if ix >= len(x) || (n-1)*incX >= len(x) {
panic(badX)
}
if iy >= len(y) || (n-1)*incY >= len(y) {
panic(badY)
}
return c128.DotuInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
}
// Zdscal scales the vector x by a real scalar alpha.
// Zdscal has no effect if incX < 0.
func (Implementation) Zdscal(n int, alpha float64, x []complex128, incX int) {
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return
}
if (n-1)*incX >= len(x) {
panic(badX)
}
if n < 1 {
if n == 0 {
return
}
panic(negativeN)
}
if alpha == 0 {
if incX == 1 {
x = x[:n]
for i := range x {
x[i] = 0
}
return
}
for ix := 0; ix < n*incX; ix += incX {
x[ix] = 0
}
return
}
if incX == 1 {
x = x[:n]
for i, v := range x {
x[i] = complex(alpha*real(v), alpha*imag(v))
}
return
}
for ix := 0; ix < n*incX; ix += incX {
v := x[ix]
x[ix] = complex(alpha*real(v), alpha*imag(v))
}
}
// Zscal scales the vector x by a complex scalar alpha.
// Zscal has no effect if incX < 0.
func (Implementation) Zscal(n int, alpha complex128, x []complex128, incX int) {
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return
}
if (n-1)*incX >= len(x) {
panic(badX)
}
if n < 1 {
if n == 0 {
return
}
panic(negativeN)
}
if alpha == 0 {
if incX == 1 {
x = x[:n]
for i := range x {
x[i] = 0
}
return
}
for ix := 0; ix < n*incX; ix += incX {
x[ix] = 0
}
return
}
if incX == 1 {
c128.ScalUnitary(alpha, x[:n])
return
}
c128.ScalInc(alpha, x, uintptr(n), uintptr(incX))
}
// Zswap exchanges the elements of two complex vectors x and y.
func (Implementation) Zswap(n int, x []complex128, incX int, y []complex128, incY int) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n < 1 {
if n == 0 {
return
}
panic(negativeN)
}
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
panic(badX)
}
if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
panic(badY)
}
if incX == 1 && incY == 1 {
x = x[:n]
for i, v := range x {
x[i], y[i] = y[i], v
}
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
x[ix], y[iy] = y[iy], x[ix]
ix += incX
iy += incY
}
}

View File

@ -1,14 +1,14 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package native
package gonum
import (
"math"
"github.com/gonum/blas"
"github.com/gonum/internal/asm/f64"
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/internal/asm/f64"
)
var _ blas.Float64Level1 = Implementation{}
@ -270,13 +270,7 @@ func (Implementation) Daxpy(n int, alpha float64, x []float64, incX int, y []flo
return
}
if incX == 1 && incY == 1 {
if len(x) < n {
panic(badLenX)
}
if len(y) < n {
panic(badLenY)
}
f64.AxpyUnitaryTo(y, alpha, x[:n], y)
f64.AxpyUnitary(alpha, x[:n], y[:n])
return
}
var ix, iy int
@ -286,12 +280,6 @@ func (Implementation) Daxpy(n int, alpha float64, x []float64, incX int, y []flo
if incY < 0 {
iy = (-n + 1) * incY
}
if ix >= len(x) || ix+(n-1)*incX >= len(x) {
panic(badLenX)
}
if iy >= len(y) || iy+(n-1)*incY >= len(y) {
panic(badLenY)
}
f64.AxpyInc(alpha, x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
}
@ -565,7 +553,6 @@ func (Implementation) Drotm(n int, x []float64, incX int, y []float64, incY int,
ix += incX
iy += incY
}
return
}
// Dscal scales x by alpha.
@ -604,7 +591,5 @@ func (Implementation) Dscal(n int, alpha float64, x []float64, incX int) {
f64.ScalUnitary(alpha, x[:n])
return
}
for ix := 0; ix < n*incX; ix += incX {
x[ix] *= alpha
}
f64.ScalInc(alpha, x, uintptr(n), uintptr(incX))
}

View File

@ -1,11 +1,11 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package native
package gonum
import (
"github.com/gonum/internal/asm/f64"
"gonum.org/v1/gonum/internal/asm/f64"
)
// Ddot computes the dot product of the two vectors

View File

@ -1,16 +1,16 @@
// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
// Copyright ©2015 The gonum Authors. All rights reserved.
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package native
package gonum
import (
math "github.com/gonum/blas/native/internal/math32"
math "gonum.org/v1/gonum/internal/math32"
"github.com/gonum/blas"
"github.com/gonum/internal/asm/f32"
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/internal/asm/f32"
)
var _ blas.Float32Level1 = Implementation{}
@ -284,13 +284,7 @@ func (Implementation) Saxpy(n int, alpha float32, x []float32, incX int, y []flo
return
}
if incX == 1 && incY == 1 {
if len(x) < n {
panic(badLenX)
}
if len(y) < n {
panic(badLenY)
}
f32.AxpyUnitaryTo(y, alpha, x[:n], y)
f32.AxpyUnitary(alpha, x[:n], y[:n])
return
}
var ix, iy int
@ -300,12 +294,6 @@ func (Implementation) Saxpy(n int, alpha float32, x []float32, incX int, y []flo
if incY < 0 {
iy = (-n + 1) * incY
}
if ix >= len(x) || ix+(n-1)*incX >= len(x) {
panic(badLenX)
}
if iy >= len(y) || iy+(n-1)*incY >= len(y) {
panic(badLenY)
}
f32.AxpyInc(alpha, x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
}
@ -587,7 +575,6 @@ func (Implementation) Srotm(n int, x []float32, incX int, y []float32, incY int,
ix += incX
iy += incY
}
return
}
// Sscal scales x by alpha.
@ -628,7 +615,5 @@ func (Implementation) Sscal(n int, alpha float32, x []float32, incX int) {
f32.ScalUnitary(alpha, x[:n])
return
}
for ix := 0; ix < n*incX; ix += incX {
x[ix] *= alpha
}
f32.ScalInc(alpha, x, uintptr(n), uintptr(incX))
}

View File

@ -1,13 +1,13 @@
// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
// Copyright ©2015 The gonum Authors. All rights reserved.
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package native
package gonum
import (
"github.com/gonum/internal/asm/f32"
"gonum.org/v1/gonum/internal/asm/f32"
)
// Dsdot computes the dot product of the two vectors

View File

@ -1,13 +1,13 @@
// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
// Copyright ©2015 The gonum Authors. All rights reserved.
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package native
package gonum
import (
"github.com/gonum/internal/asm/f32"
"gonum.org/v1/gonum/internal/asm/f32"
)
// Sdot computes the dot product of the two vectors

View File

@ -1,13 +1,13 @@
// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
// Copyright ©2015 The gonum Authors. All rights reserved.
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package native
package gonum
import (
"github.com/gonum/internal/asm/f32"
"gonum.org/v1/gonum/internal/asm/f32"
)
// Sdsdot computes the dot product of the two vectors plus a constant

1321
vendor/gonum.org/v1/gonum/blas/gonum/level2cmplx128.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,20 +1,20 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package native
package gonum
import (
"github.com/gonum/blas"
"github.com/gonum/internal/asm/f64"
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/internal/asm/f64"
)
var _ blas.Float64Level2 = Implementation{}
// Dgemv computes
// y = alpha * a * x + beta * y if tA = blas.NoTrans
// y = alpha * A^T * x + beta * y if tA = blas.Trans or blas.ConjTrans
// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.
// y = alpha * A * x + beta * y if tA = blas.NoTrans
// y = alpha * A^T * x + beta * y if tA = blas.Trans or blas.ConjTrans
// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars.
func (Implementation) Dgemv(tA blas.Transpose, m, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) {
if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
panic(badTranspose)
@ -167,21 +167,14 @@ func (Implementation) Dger(m, n int, alpha float64, x []float64, incX int, y []f
x = x[:m]
y = y[:n]
for i, xv := range x {
tmp := alpha * xv
if tmp != 0 {
atmp := a[i*lda : i*lda+n]
f64.AxpyUnitaryTo(atmp, tmp, y, atmp)
}
f64.AxpyUnitary(alpha*xv, y, a[i*lda:i*lda+n])
}
return
}
ix := kx
for i := 0; i < m; i++ {
tmp := alpha * x[ix]
if tmp != 0 {
f64.AxpyInc(tmp, y, a[i*lda:i*lda+n], uintptr(n), uintptr(incY), 1, uintptr(ky), 0)
}
f64.AxpyInc(alpha*x[ix], y, a[i*lda:i*lda+n], uintptr(n), uintptr(incY), 1, uintptr(ky), 0)
ix += incX
}
}
@ -230,7 +223,7 @@ func (Implementation) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64,
if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) {
panic(badY)
}
if lda*(m-1)+kL+kU+1 > len(a) || lda < kL+kU+1 {
if lda*(min(m, n+kL)-1)+kL+kU+1 > len(a) || lda < kL+kU+1 {
panic(badLdA)
}
@ -269,7 +262,7 @@ func (Implementation) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64,
if tA == blas.NoTrans {
iy := ky
if incX == 1 {
for i := 0; i < m; i++ {
for i := 0; i < min(m, n+kL); i++ {
l := max(0, kL-i)
u := min(nCol, ld+kL-i)
off := max(0, i-kL)
@ -284,7 +277,7 @@ func (Implementation) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64,
}
return
}
for i := 0; i < m; i++ {
for i := 0; i < min(m, n+kL); i++ {
l := max(0, kL-i)
u := min(nCol, ld+kL-i)
off := max(0, i-kL)
@ -301,7 +294,7 @@ func (Implementation) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64,
return
}
if incX == 1 {
for i := 0; i < m; i++ {
for i := 0; i < min(m, n+kL); i++ {
l := max(0, kL-i)
u := min(nCol, ld+kL-i)
off := max(0, i-kL)
@ -316,7 +309,7 @@ func (Implementation) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64,
return
}
ix := kx
for i := 0; i < m; i++ {
for i := 0; i < min(m, n+kL); i++ {
l := max(0, kL-i)
u := min(nCol, ld+kL-i)
off := max(0, i-kL)
@ -1530,7 +1523,6 @@ func (Implementation) Dsbmv(ul blas.Uplo, n, k int, alpha float64, a []float64,
ix += incX
iy += incY
}
return
}
// Dsyr performs the rank-one update
@ -1718,7 +1710,6 @@ func (Implementation) Dsyr2(ul blas.Uplo, n int, alpha float64, x []float64, inc
ix += incX
iy += incY
}
return
}
// Dtpsv solves

View File

@ -1,22 +1,22 @@
// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
// Copyright ©2014 The gonum Authors. All rights reserved.
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package native
package gonum
import (
"github.com/gonum/blas"
"github.com/gonum/internal/asm/f32"
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/internal/asm/f32"
)
var _ blas.Float32Level2 = Implementation{}
// Sgemv computes
// y = alpha * a * x + beta * y if tA = blas.NoTrans
// y = alpha * A^T * x + beta * y if tA = blas.Trans or blas.ConjTrans
// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.
// y = alpha * A * x + beta * y if tA = blas.NoTrans
// y = alpha * A^T * x + beta * y if tA = blas.Trans or blas.ConjTrans
// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Sgemv(tA blas.Transpose, m, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) {
@ -173,21 +173,14 @@ func (Implementation) Sger(m, n int, alpha float32, x []float32, incX int, y []f
x = x[:m]
y = y[:n]
for i, xv := range x {
tmp := alpha * xv
if tmp != 0 {
atmp := a[i*lda : i*lda+n]
f32.AxpyUnitaryTo(atmp, tmp, y, atmp)
}
f32.AxpyUnitary(alpha*xv, y, a[i*lda:i*lda+n])
}
return
}
ix := kx
for i := 0; i < m; i++ {
tmp := alpha * x[ix]
if tmp != 0 {
f32.AxpyInc(tmp, y, a[i*lda:i*lda+n], uintptr(n), uintptr(incY), 1, uintptr(ky), 0)
}
f32.AxpyInc(alpha*x[ix], y, a[i*lda:i*lda+n], uintptr(n), uintptr(incY), 1, uintptr(ky), 0)
ix += incX
}
}
@ -238,7 +231,7 @@ func (Implementation) Sgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float32,
if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) {
panic(badY)
}
if lda*(m-1)+kL+kU+1 > len(a) || lda < kL+kU+1 {
if lda*(min(m, n+kL)-1)+kL+kU+1 > len(a) || lda < kL+kU+1 {
panic(badLdA)
}
@ -277,7 +270,7 @@ func (Implementation) Sgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float32,
if tA == blas.NoTrans {
iy := ky
if incX == 1 {
for i := 0; i < m; i++ {
for i := 0; i < min(m, n+kL); i++ {
l := max(0, kL-i)
u := min(nCol, ld+kL-i)
off := max(0, i-kL)
@ -292,7 +285,7 @@ func (Implementation) Sgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float32,
}
return
}
for i := 0; i < m; i++ {
for i := 0; i < min(m, n+kL); i++ {
l := max(0, kL-i)
u := min(nCol, ld+kL-i)
off := max(0, i-kL)
@ -309,7 +302,7 @@ func (Implementation) Sgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float32,
return
}
if incX == 1 {
for i := 0; i < m; i++ {
for i := 0; i < min(m, n+kL); i++ {
l := max(0, kL-i)
u := min(nCol, ld+kL-i)
off := max(0, i-kL)
@ -324,7 +317,7 @@ func (Implementation) Sgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float32,
return
}
ix := kx
for i := 0; i < m; i++ {
for i := 0; i < min(m, n+kL); i++ {
l := max(0, kL-i)
u := min(nCol, ld+kL-i)
off := max(0, i-kL)
@ -1552,7 +1545,6 @@ func (Implementation) Ssbmv(ul blas.Uplo, n, k int, alpha float32, a []float32,
ix += incX
iy += incY
}
return
}
// Ssyr performs the rank-one update
@ -1744,7 +1736,6 @@ func (Implementation) Ssyr2(ul blas.Uplo, n int, alpha float32, x []float32, inc
ix += incX
iy += incY
}
return
}
// Stpsv solves

View File

@ -1,12 +1,12 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package native
package gonum
import (
"github.com/gonum/blas"
"github.com/gonum/internal/asm/f64"
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/internal/asm/f64"
)
var _ blas.Float64Level3 = Implementation{}

View File

@ -1,14 +1,14 @@
// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
// Copyright ©2014 The gonum Authors. All rights reserved.
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package native
package gonum
import (
"github.com/gonum/blas"
"github.com/gonum/internal/asm/f32"
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/internal/asm/f32"
)
var _ blas.Float32Level3 = Implementation{}

View File

@ -1,17 +1,17 @@
// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
// Copyright ©2014 The gonum Authors. All rights reserved.
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package native
package gonum
import (
"runtime"
"sync"
"github.com/gonum/blas"
"github.com/gonum/internal/asm/f32"
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/internal/asm/f32"
)
// Sgemm computes
@ -29,17 +29,17 @@ func (Implementation) Sgemm(tA, tB blas.Transpose, m, n, k int, alpha float32, a
}
aTrans := tA == blas.Trans || tA == blas.ConjTrans
if aTrans {
checkMatrix32(k, m, a, lda)
checkSMatrix('a', k, m, a, lda)
} else {
checkMatrix32(m, k, a, lda)
checkSMatrix('a', m, k, a, lda)
}
bTrans := tB == blas.Trans || tB == blas.ConjTrans
if bTrans {
checkMatrix32(n, k, b, ldb)
checkSMatrix('b', n, k, b, ldb)
} else {
checkMatrix32(k, n, b, ldb)
checkSMatrix('b', k, n, b, ldb)
}
checkMatrix32(m, n, c, ldc)
checkSMatrix('c', m, n, c, ldc)
// scale c
if beta != 1 {
@ -125,7 +125,7 @@ func sgemmParallel(aTrans, bTrans bool, m, n, k int, a []float32, lda int, b []f
go func() {
defer wg.Done()
// Make local copies of otherwise global variables to reduce shared memory.
// This has a noticable effect on benchmarks in some cases.
// This has a noticeable effect on benchmarks in some cases.
alpha := alpha
aTrans := aTrans
bTrans := bTrans
@ -263,18 +263,3 @@ func sgemmSerialTransTrans(m, n, k int, a []float32, lda int, b []float32, ldb i
func sliceView32(a []float32, lda, i, j, r, c int) []float32 {
return a[i*lda+j : (i+r-1)*lda+j+c]
}
func checkMatrix32(m, n int, a []float32, lda int) {
if m < 0 {
panic("blas: rows < 0")
}
if n < 0 {
panic("blas: cols < 0")
}
if lda < n {
panic("blas: illegal stride")
}
if len(a) < (m-1)*lda+n {
panic("blas: insufficient matrix slice length")
}
}

11
vendor/gonum.org/v1/gonum/floats/doc.go generated vendored Normal file
View File

@ -0,0 +1,11 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package floats provides a set of helper routines for dealing with slices
// of float64. The functions avoid allocations to allow for use within tight
// loops without garbage collection overhead.
//
// The convention used is that when a slice is being modified in place, it has
// the name dst.
package floats // import "gonum.org/v1/gonum/floats"

View File

@ -2,20 +2,15 @@
// Use of this code is governed by a BSD-style
// license that can be found in the LICENSE file
// Package floats provides a set of helper routines for dealing with slices
// of float64. The functions avoid allocations to allow for use within tight
// loops without garbage collection overhead.
//
// The convention used is that when a slice is being modified in place, it has
// the name dst.
package floats
import (
"errors"
"math"
"sort"
"strconv"
"github.com/gonum/internal/asm/f64"
"gonum.org/v1/gonum/internal/asm/f64"
)
// Add adds, element-wise, the elements of s and dst, and stores in dst.
@ -89,8 +84,8 @@ func (a argsort) Swap(i, j int) {
a.inds[i], a.inds[j] = a.inds[j], a.inds[i]
}
// Argsort sorts the elements of s while tracking their original order.
// At the conclusion of Argsort, s will contain the original elements of s
// Argsort sorts the elements of dst while tracking their original order.
// At the conclusion of Argsort, dst will contain the original elements of dst
// but sorted in increasing order, and inds will contain the original position
// of the elements in the slice such that dst[i] = origDst[inds[i]].
// It panics if the lengths of dst and inds do not match.
@ -342,7 +337,6 @@ func EqualLengths(slices ...[]float64) bool {
// all of the found elements will be returned along with an error.
// At the return of the function, the input inds will be in an undetermined state.
func Find(inds []int, f func(float64) bool, s []float64, k int) ([]int, error) {
// inds is also returned to allow for calling with nil
// Reslice inds to have zero length
@ -495,6 +489,29 @@ func MulTo(dst, s, t []float64) []float64 {
return dst
}
const (
nanBits = 0x7ff8000000000000
nanMask = 0xfff8000000000000
)
// NaNWith returns an IEEE 754 "quiet not-a-number" value with the
// payload specified in the low 51 bits of payload.
// The NaN returned by math.NaN has a bit pattern equal to NaNWith(1).
func NaNWith(payload uint64) float64 {
return math.Float64frombits(nanBits | (payload &^ nanMask))
}
// NaNPayload returns the lowest 51 bits payload of an IEEE 754 "quiet
// not-a-number". For values of f other than quiet-NaN, NaNPayload
// returns zero and false.
func NaNPayload(f float64) (payload uint64, ok bool) {
b := math.Float64bits(f)
if b&nanBits != nanBits {
return 0, false
}
return b &^ nanMask, true
}
// Nearest returns the index of the element in s
// whose value is nearest to v. If several such
// elements exist, the lowest index is returned.
@ -567,6 +584,19 @@ func Norm(s []float64, L float64) float64 {
return math.Pow(norm, 1/L)
}
// ParseWithNA converts the string s to a float64 in v.
// If s equals missing, w is returned as 0, otherwise 1.
func ParseWithNA(s, missing string) (v, w float64, err error) {
if s == missing {
return 0, 0, nil
}
v, err = strconv.ParseFloat(s, 64)
if err == nil {
w = 1
}
return v, w, err
}
// Prod returns the product of the elements of the slice.
// Returns 1 if len(s) = 0.
func Prod(s []float64) float64 {

View File

@ -0,0 +1,134 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine
#include "textflag.h"
// MOVDDUP X2, X3
#define MOVDDUP_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xDA
// MOVDDUP X4, X5
#define MOVDDUP_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xEC
// MOVDDUP X6, X7
#define MOVDDUP_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xFE
// MOVDDUP X8, X9
#define MOVDDUP_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC8
// ADDSUBPD X2, X3
#define ADDSUBPD_X2_X3 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA
// ADDSUBPD X4, X5
#define ADDSUBPD_X4_X5 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC
// ADDSUBPD X6, X7
#define ADDSUBPD_X6_X7 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE
// ADDSUBPD X8, X9
#define ADDSUBPD_X8_X9 BYTE $0x66; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8
// func AxpyInc(alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr)
TEXT ·AxpyInc(SB), NOSPLIT, $0
MOVQ x_base+16(FP), SI // SI = &x
MOVQ y_base+40(FP), DI // DI = &y
MOVQ n+64(FP), CX // CX = n
CMPQ CX, $0 // if n==0 { return }
JE axpyi_end
MOVQ ix+88(FP), R8 // R8 = ix // Load the first index
SHLQ $4, R8 // R8 *= sizeof(complex128)
MOVQ iy+96(FP), R9 // R9 = iy
SHLQ $4, R9 // R9 *= sizeof(complex128)
LEAQ (SI)(R8*1), SI // SI = &(x[ix])
LEAQ (DI)(R9*1), DI // DI = &(y[iy])
MOVQ DI, DX // DX = DI // Separate Read/Write pointers
MOVQ incX+72(FP), R8 // R8 = incX
SHLQ $4, R8 // R8 *= sizeof(complex128)
MOVQ incY+80(FP), R9 // R9 = iy
SHLQ $4, R9 // R9 *= sizeof(complex128)
MOVUPS alpha+0(FP), X0 // X0 = { imag(a), real(a) }
MOVAPS X0, X1
SHUFPD $0x1, X1, X1 // X1 = { real(a), imag(a) }
MOVAPS X0, X10 // Copy X0 and X1 for pipelining
MOVAPS X1, X11
MOVQ CX, BX
ANDQ $3, CX // CX = n % 4
SHRQ $2, BX // BX = floor( n / 4 )
JZ axpyi_tail // if BX == 0 { goto axpyi_tail }
axpyi_loop: // do {
MOVUPS (SI), X2 // X_i = { imag(x[i]), real(x[i]) }
MOVUPS (SI)(R8*1), X4
LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2])
MOVUPS (SI), X6
MOVUPS (SI)(R8*1), X8
// X_(i+1) = { real(x[i], real(x[i]) }
MOVDDUP_X2_X3
MOVDDUP_X4_X5
MOVDDUP_X6_X7
MOVDDUP_X8_X9
// X_i = { imag(x[i]), imag(x[i]) }
SHUFPD $0x3, X2, X2
SHUFPD $0x3, X4, X4
SHUFPD $0x3, X6, X6
SHUFPD $0x3, X8, X8
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
MULPD X1, X2
MULPD X0, X3
MULPD X11, X4
MULPD X10, X5
MULPD X1, X6
MULPD X0, X7
MULPD X11, X8
MULPD X10, X9
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
ADDSUBPD_X4_X5
ADDSUBPD_X6_X7
ADDSUBPD_X8_X9
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
ADDPD (DX), X3
ADDPD (DX)(R9*1), X5
LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2])
ADDPD (DX), X7
ADDPD (DX)(R9*1), X9
MOVUPS X3, (DI) // dst[i] = X_(i+1)
MOVUPS X5, (DI)(R9*1)
LEAQ (DI)(R9*2), DI
MOVUPS X7, (DI)
MOVUPS X9, (DI)(R9*1)
LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2])
LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2])
LEAQ (DI)(R9*2), DI // DI = &(DI[incY*2])
DECQ BX
JNZ axpyi_loop // } while --BX > 0
CMPQ CX, $0 // if CX == 0 { return }
JE axpyi_end
axpyi_tail: // do {
MOVUPS (SI), X2 // X_i = { imag(x[i]), real(x[i]) }
MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) }
SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
MULPD X1, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
MULPD X0, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
ADDPD (DI), X3
MOVUPS X3, (DI) // y[i] = X_i
ADDQ R8, SI // SI = &(SI[incX])
ADDQ R9, DI // DI = &(DI[incY])
LOOP axpyi_tail // } while --CX > 0
axpyi_end:
RET

View File

@ -0,0 +1,141 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine
#include "textflag.h"
// MOVDDUP X2, X3
#define MOVDDUP_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xDA
// MOVDDUP X4, X5
#define MOVDDUP_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xEC
// MOVDDUP X6, X7
#define MOVDDUP_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xFE
// MOVDDUP X8, X9
#define MOVDDUP_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC8
// ADDSUBPD X2, X3
#define ADDSUBPD_X2_X3 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA
// ADDSUBPD X4, X5
#define ADDSUBPD_X4_X5 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC
// ADDSUBPD X6, X7
#define ADDSUBPD_X6_X7 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE
// ADDSUBPD X8, X9
#define ADDSUBPD_X8_X9 BYTE $0x66; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8
// func AxpyIncTo(dst []complex128, incDst, idst uintptr, alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr)
TEXT ·AxpyIncTo(SB), NOSPLIT, $0
MOVQ dst_base+0(FP), DI // DI = &dst
MOVQ x_base+56(FP), SI // SI = &x
MOVQ y_base+80(FP), DX // DX = &y
MOVQ n+104(FP), CX // CX = n
CMPQ CX, $0 // if n==0 { return }
JE axpyi_end
MOVQ ix+128(FP), R8 // R8 = ix // Load the first index
SHLQ $4, R8 // R8 *= sizeof(complex128)
MOVQ iy+136(FP), R9 // R9 = iy
SHLQ $4, R9 // R9 *= sizeof(complex128)
MOVQ idst+32(FP), R10 // R10 = idst
SHLQ $4, R10 // R10 *= sizeof(complex128)
LEAQ (SI)(R8*1), SI // SI = &(x[ix])
LEAQ (DX)(R9*1), DX // DX = &(y[iy])
LEAQ (DI)(R10*1), DI // DI = &(dst[idst])
MOVQ incX+112(FP), R8 // R8 = incX
SHLQ $4, R8 // R8 *= sizeof(complex128)
MOVQ incY+120(FP), R9 // R9 = incY
SHLQ $4, R9 // R9 *= sizeof(complex128)
MOVQ incDst+24(FP), R10 // R10 = incDst
SHLQ $4, R10 // R10 *= sizeof(complex128)
MOVUPS alpha+40(FP), X0 // X0 = { imag(a), real(a) }
MOVAPS X0, X1
SHUFPD $0x1, X1, X1 // X1 = { real(a), imag(a) }
MOVAPS X0, X10 // Copy X0 and X1 for pipelining
MOVAPS X1, X11
MOVQ CX, BX
ANDQ $3, CX // CX = n % 4
SHRQ $2, BX // BX = floor( n / 4 )
JZ axpyi_tail // if BX == 0 { goto axpyi_tail }
axpyi_loop: // do {
MOVUPS (SI), X2 // X_i = { imag(x[i]), real(x[i]) }
MOVUPS (SI)(R8*1), X4
LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2])
MOVUPS (SI), X6
MOVUPS (SI)(R8*1), X8
// X_(i+1) = { real(x[i], real(x[i]) }
MOVDDUP_X2_X3
MOVDDUP_X4_X5
MOVDDUP_X6_X7
MOVDDUP_X8_X9
// X_i = { imag(x[i]), imag(x[i]) }
SHUFPD $0x3, X2, X2
SHUFPD $0x3, X4, X4
SHUFPD $0x3, X6, X6
SHUFPD $0x3, X8, X8
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
MULPD X1, X2
MULPD X0, X3
MULPD X11, X4
MULPD X10, X5
MULPD X1, X6
MULPD X0, X7
MULPD X11, X8
MULPD X10, X9
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
ADDSUBPD_X4_X5
ADDSUBPD_X6_X7
ADDSUBPD_X8_X9
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
ADDPD (DX), X3
ADDPD (DX)(R9*1), X5
LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2])
ADDPD (DX), X7
ADDPD (DX)(R9*1), X9
MOVUPS X3, (DI) // dst[i] = X_(i+1)
MOVUPS X5, (DI)(R10*1)
LEAQ (DI)(R10*2), DI
MOVUPS X7, (DI)
MOVUPS X9, (DI)(R10*1)
LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2])
LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2])
LEAQ (DI)(R10*2), DI // DI = &(DI[incDst*2])
DECQ BX
JNZ axpyi_loop // } while --BX > 0
CMPQ CX, $0 // if CX == 0 { return }
JE axpyi_end
axpyi_tail: // do {
MOVUPS (SI), X2 // X_i = { imag(x[i]), real(x[i]) }
MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) }
SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
MULPD X1, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
MULPD X0, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
ADDPD (DX), X3
MOVUPS X3, (DI) // y[i] X_(i+1)
ADDQ R8, SI // SI += incX
ADDQ R9, DX // DX += incY
ADDQ R10, DI // DI += incDst
LOOP axpyi_tail // } while --CX > 0
axpyi_end:
RET

View File

@ -0,0 +1,122 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine
#include "textflag.h"
// MOVDDUP X2, X3
#define MOVDDUP_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xDA
// MOVDDUP X4, X5
#define MOVDDUP_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xEC
// MOVDDUP X6, X7
#define MOVDDUP_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xFE
// MOVDDUP X8, X9
#define MOVDDUP_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC8
// ADDSUBPD X2, X3
#define ADDSUBPD_X2_X3 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA
// ADDSUBPD X4, X5
#define ADDSUBPD_X4_X5 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC
// ADDSUBPD X6, X7
#define ADDSUBPD_X6_X7 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE
// ADDSUBPD X8, X9
#define ADDSUBPD_X8_X9 BYTE $0x66; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8
// func AxpyUnitary(alpha complex128, x, y []complex128)
TEXT ·AxpyUnitary(SB), NOSPLIT, $0
MOVQ x_base+16(FP), SI // SI = &x
MOVQ y_base+40(FP), DI // DI = &y
MOVQ x_len+24(FP), CX // CX = min( len(x), len(y) )
CMPQ y_len+48(FP), CX
CMOVQLE y_len+48(FP), CX
CMPQ CX, $0 // if CX == 0 { return }
JE caxy_end
PXOR X0, X0 // Clear work registers and cache-align loop
PXOR X1, X1
MOVUPS alpha+0(FP), X0 // X0 = { imag(a), real(a) }
MOVAPS X0, X1
SHUFPD $0x1, X1, X1 // X1 = { real(a), imag(a) }
XORQ AX, AX // i = 0
MOVAPS X0, X10 // Copy X0 and X1 for pipelining
MOVAPS X1, X11
MOVQ CX, BX
ANDQ $3, CX // CX = n % 4
SHRQ $2, BX // BX = floor( n / 4 )
JZ caxy_tail // if BX == 0 { goto caxy_tail }
caxy_loop: // do {
MOVUPS (SI)(AX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
MOVUPS 16(SI)(AX*8), X4
MOVUPS 32(SI)(AX*8), X6
MOVUPS 48(SI)(AX*8), X8
// X_(i+1) = { real(x[i], real(x[i]) }
MOVDDUP_X2_X3
MOVDDUP_X4_X5
MOVDDUP_X6_X7
MOVDDUP_X8_X9
// X_i = { imag(x[i]), imag(x[i]) }
SHUFPD $0x3, X2, X2
SHUFPD $0x3, X4, X4
SHUFPD $0x3, X6, X6
SHUFPD $0x3, X8, X8
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
MULPD X1, X2
MULPD X0, X3
MULPD X11, X4
MULPD X10, X5
MULPD X1, X6
MULPD X0, X7
MULPD X11, X8
MULPD X10, X9
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
ADDSUBPD_X4_X5
ADDSUBPD_X6_X7
ADDSUBPD_X8_X9
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
ADDPD (DI)(AX*8), X3
ADDPD 16(DI)(AX*8), X5
ADDPD 32(DI)(AX*8), X7
ADDPD 48(DI)(AX*8), X9
MOVUPS X3, (DI)(AX*8) // y[i] = X_(i+1)
MOVUPS X5, 16(DI)(AX*8)
MOVUPS X7, 32(DI)(AX*8)
MOVUPS X9, 48(DI)(AX*8)
ADDQ $8, AX // i += 8
DECQ BX
JNZ caxy_loop // } while --BX > 0
CMPQ CX, $0 // if CX == 0 { return }
JE caxy_end
caxy_tail: // do {
MOVUPS (SI)(AX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) }
SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
MULPD X1, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
MULPD X0, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
ADDPD (DI)(AX*8), X3
MOVUPS X3, (DI)(AX*8) // y[i] = X_(i+1)
ADDQ $2, AX // i += 2
LOOP caxy_tail // } while --CX > 0
caxy_end:
RET

View File

@ -0,0 +1,123 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine
#include "textflag.h"
// MOVDDUP X2, X3
#define MOVDDUP_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xDA
// MOVDDUP X4, X5
#define MOVDDUP_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xEC
// MOVDDUP X6, X7
#define MOVDDUP_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xFE
// MOVDDUP X8, X9
#define MOVDDUP_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC8
// ADDSUBPD X2, X3
#define ADDSUBPD_X2_X3 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA
// ADDSUBPD X4, X5
#define ADDSUBPD_X4_X5 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC
// ADDSUBPD X6, X7
#define ADDSUBPD_X6_X7 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE
// ADDSUBPD X8, X9
#define ADDSUBPD_X8_X9 BYTE $0x66; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8
// func AxpyUnitaryTo(dst []complex128, alpha complex64, x, y []complex128)
TEXT ·AxpyUnitaryTo(SB), NOSPLIT, $0
MOVQ dst_base+0(FP), DI // DI = &dst
MOVQ x_base+40(FP), SI // SI = &x
MOVQ y_base+64(FP), DX // DX = &y
MOVQ x_len+48(FP), CX // CX = min( len(x), len(y), len(dst) )
CMPQ y_len+72(FP), CX
CMOVQLE y_len+72(FP), CX
CMPQ dst_len+8(FP), CX
CMOVQLE dst_len+8(FP), CX
CMPQ CX, $0 // if CX == 0 { return }
JE caxy_end
MOVUPS alpha+24(FP), X0 // X0 = { imag(a), real(a) }
MOVAPS X0, X1
SHUFPD $0x1, X1, X1 // X1 = { real(a), imag(a) }
XORQ AX, AX // i = 0
MOVAPS X0, X10 // Copy X0 and X1 for pipelining
MOVAPS X1, X11
MOVQ CX, BX
ANDQ $3, CX // CX = n % 4
SHRQ $2, BX // BX = floor( n / 4 )
JZ caxy_tail // if BX == 0 { goto caxy_tail }
caxy_loop: // do {
MOVUPS (SI)(AX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
MOVUPS 16(SI)(AX*8), X4
MOVUPS 32(SI)(AX*8), X6
MOVUPS 48(SI)(AX*8), X8
// X_(i+1) = { real(x[i], real(x[i]) }
MOVDDUP_X2_X3 // Load and duplicate imag elements (xi, xi)
MOVDDUP_X4_X5
MOVDDUP_X6_X7
MOVDDUP_X8_X9
// X_i = { imag(x[i]), imag(x[i]) }
SHUFPD $0x3, X2, X2 // duplicate real elements (xr, xr)
SHUFPD $0x3, X4, X4
SHUFPD $0x3, X6, X6
SHUFPD $0x3, X8, X8
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
MULPD X1, X2
MULPD X0, X3
MULPD X11, X4
MULPD X10, X5
MULPD X1, X6
MULPD X0, X7
MULPD X11, X8
MULPD X10, X9
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
ADDSUBPD_X4_X5
ADDSUBPD_X6_X7
ADDSUBPD_X8_X9
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
ADDPD (DX)(AX*8), X3
ADDPD 16(DX)(AX*8), X5
ADDPD 32(DX)(AX*8), X7
ADDPD 48(DX)(AX*8), X9
MOVUPS X3, (DI)(AX*8) // y[i] = X_(i+1)
MOVUPS X5, 16(DI)(AX*8)
MOVUPS X7, 32(DI)(AX*8)
MOVUPS X9, 48(DI)(AX*8)
ADDQ $8, AX // i += 8
DECQ BX
JNZ caxy_loop // } while --BX > 0
CMPQ CX, $0 // if CX == 0 { return }
JE caxy_end
caxy_tail: // Same calculation, but read in values to avoid trampling memory
MOVUPS (SI)(AX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) }
SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
MULPD X1, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
MULPD X0, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
ADDPD (DX)(AX*8), X3
MOVUPS X3, (DI)(AX*8) // y[i] = X_(i+1)
ADDQ $2, AX // i += 2
LOOP caxy_tail // } while --CX > 0
caxy_end:
RET

6
vendor/gonum.org/v1/gonum/internal/asm/c128/doc.go generated vendored Normal file
View File

@ -0,0 +1,6 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package c128 provides complex128 vector primitives.
package c128 // import "gonum.org/v1/gonum/internal/asm/c128"

View File

@ -0,0 +1,153 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine
#include "textflag.h"
#define MOVDDUP_XPTR__X3 LONG $0x1E120FF2 // MOVDDUP (SI), X3
#define MOVDDUP_XPTR_INCX__X5 LONG $0x120F42F2; WORD $0x062C // MOVDDUP (SI)(R8*1), X5
#define MOVDDUP_XPTR_INCX_2__X7 LONG $0x120F42F2; WORD $0x463C // MOVDDUP (SI)(R8*2), X7
#define MOVDDUP_XPTR_INCx3X__X9 LONG $0x120F46F2; WORD $0x0E0C // MOVDDUP (SI)(R9*1), X9
#define MOVDDUP_8_XPTR__X2 LONG $0x56120FF2; BYTE $0x08 // MOVDDUP 8(SI), X2
#define MOVDDUP_8_XPTR_INCX__X4 LONG $0x120F42F2; WORD $0x0664; BYTE $0x08 // MOVDDUP 8(SI)(R8*1), X4
#define MOVDDUP_8_XPTR_INCX_2__X6 LONG $0x120F42F2; WORD $0x4674; BYTE $0x08 // MOVDDUP 8(SI)(R8*2), X6
#define MOVDDUP_8_XPTR_INCx3X__X8 LONG $0x120F46F2; WORD $0x0E44; BYTE $0x08 // MOVDDUP 8(SI)(R9*1), X8
#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3
#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5
#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7
#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
#define X_PTR SI
#define Y_PTR DI
#define LEN CX
#define TAIL BX
#define SUM X0
#define P_SUM X1
#define INC_X R8
#define INCx3_X R9
#define INC_Y R10
#define INCx3_Y R11
#define NEG1 X15
#define P_NEG1 X14
// func DotcInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128)
TEXT ·DotcInc(SB), NOSPLIT, $0
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
MOVQ n+48(FP), LEN // LEN = n
PXOR SUM, SUM // SUM = 0
CMPQ LEN, $0 // if LEN == 0 { return }
JE dot_end
PXOR P_SUM, P_SUM // P_SUM = 0
MOVQ ix+72(FP), INC_X // INC_X = ix * sizeof(complex128)
SHLQ $4, INC_X
MOVQ iy+80(FP), INC_Y // INC_Y = iy * sizeof(complex128)
SHLQ $4, INC_Y
LEAQ (X_PTR)(INC_X*1), X_PTR // X_PTR = &(X_PTR[ix])
LEAQ (Y_PTR)(INC_Y*1), Y_PTR // Y_PTR = &(Y_PTR[iy])
MOVQ incX+56(FP), INC_X // INC_X = incX
SHLQ $4, INC_X // INC_X *= sizeof(complex128)
MOVQ incY+64(FP), INC_Y // INC_Y = incY
SHLQ $4, INC_Y // INC_Y *= sizeof(complex128)
MOVSD $(-1.0), NEG1
SHUFPD $0, NEG1, NEG1 // { -1, -1 }
MOVQ LEN, TAIL
ANDQ $3, TAIL // TAIL = n % 4
SHRQ $2, LEN // LEN = floor( n / 4 )
JZ dot_tail // if n <= 4 { goto dot_tail }
MOVAPS NEG1, P_NEG1 // Copy NEG1 to P_NEG1 for pipelining
LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = 3 * incX * sizeof(complex128)
LEAQ (INC_Y)(INC_Y*2), INCx3_Y // INCx3_Y = 3 * incY * sizeof(complex128)
dot_loop: // do {
MOVDDUP_XPTR__X3 // X_(i+1) = { real(x[i], real(x[i]) }
MOVDDUP_XPTR_INCX__X5
MOVDDUP_XPTR_INCX_2__X7
MOVDDUP_XPTR_INCx3X__X9
MOVDDUP_8_XPTR__X2 // X_i = { imag(x[i]), imag(x[i]) }
MOVDDUP_8_XPTR_INCX__X4
MOVDDUP_8_XPTR_INCX_2__X6
MOVDDUP_8_XPTR_INCx3X__X8
// X_i = { -imag(x[i]), -imag(x[i]) }
MULPD NEG1, X2
MULPD P_NEG1, X4
MULPD NEG1, X6
MULPD P_NEG1, X8
// X_j = { imag(y[i]), real(y[i]) }
MOVUPS (Y_PTR), X10
MOVUPS (Y_PTR)(INC_Y*1), X11
MOVUPS (Y_PTR)(INC_Y*2), X12
MOVUPS (Y_PTR)(INCx3_Y*1), X13
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
MULPD X10, X3
MULPD X11, X5
MULPD X12, X7
MULPD X13, X9
// X_j = { real(y[i]), imag(y[i]) }
SHUFPD $0x1, X10, X10
SHUFPD $0x1, X11, X11
SHUFPD $0x1, X12, X12
SHUFPD $0x1, X13, X13
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
MULPD X10, X2
MULPD X11, X4
MULPD X12, X6
MULPD X13, X8
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
ADDSUBPD_X4_X5
ADDSUBPD_X6_X7
ADDSUBPD_X8_X9
// psum += result[i]
ADDPD X3, SUM
ADDPD X5, P_SUM
ADDPD X7, SUM
ADDPD X9, P_SUM
LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[incX*4])
LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(Y_PTR[incY*4])
DECQ LEN
JNZ dot_loop // } while --LEN > 0
ADDPD P_SUM, SUM // sum += psum
CMPQ TAIL, $0 // if TAIL == 0 { return }
JE dot_end
dot_tail: // do {
MOVDDUP_XPTR__X3 // X_(i+1) = { real(x[i], real(x[i]) }
MOVDDUP_8_XPTR__X2 // X_i = { imag(x[i]), imag(x[i]) }
MULPD NEG1, X2 // X_i = { -imag(x[i]) , -imag(x[i]) }
MOVUPS (Y_PTR), X10 // X_j = { imag(y[i]) , real(y[i]) }
MULPD X10, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
SHUFPD $0x1, X10, X10 // X_j = { real(y[i]) , imag(y[i]) }
MULPD X10, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
ADDPD X3, SUM // sum += result[i]
ADDQ INC_X, X_PTR // X_PTR += incX
ADDQ INC_Y, Y_PTR // Y_PTR += incY
DECQ TAIL
JNZ dot_tail // } while --TAIL > 0
dot_end:
MOVUPS SUM, sum+88(FP)
RET

View File

@ -0,0 +1,143 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine
#include "textflag.h"
#define MOVDDUP_XPTR_IDX_8__X3 LONG $0x1C120FF2; BYTE $0xC6 // MOVDDUP (SI)(AX*8), X3
#define MOVDDUP_16_XPTR_IDX_8__X5 LONG $0x6C120FF2; WORD $0x10C6 // MOVDDUP 16(SI)(AX*8), X5
#define MOVDDUP_32_XPTR_IDX_8__X7 LONG $0x7C120FF2; WORD $0x20C6 // MOVDDUP 32(SI)(AX*8), X7
#define MOVDDUP_48_XPTR_IDX_8__X9 LONG $0x120F44F2; WORD $0xC64C; BYTE $0x30 // MOVDDUP 48(SI)(AX*8), X9
#define MOVDDUP_XPTR_IIDX_8__X2 LONG $0x14120FF2; BYTE $0xD6 // MOVDDUP (SI)(DX*8), X2
#define MOVDDUP_16_XPTR_IIDX_8__X4 LONG $0x64120FF2; WORD $0x10D6 // MOVDDUP 16(SI)(DX*8), X4
#define MOVDDUP_32_XPTR_IIDX_8__X6 LONG $0x74120FF2; WORD $0x20D6 // MOVDDUP 32(SI)(DX*8), X6
#define MOVDDUP_48_XPTR_IIDX_8__X8 LONG $0x120F44F2; WORD $0xD644; BYTE $0x30 // MOVDDUP 48(SI)(DX*8), X8
#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3
#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5
#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7
#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
#define X_PTR SI
#define Y_PTR DI
#define LEN CX
#define TAIL BX
#define SUM X0
#define P_SUM X1
#define IDX AX
#define I_IDX DX
#define NEG1 X15
#define P_NEG1 X14
// func DotcUnitary(x, y []complex128) (sum complex128)
TEXT ·DotcUnitary(SB), NOSPLIT, $0
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
MOVQ x_len+8(FP), LEN // LEN = min( len(x), len(y) )
CMPQ y_len+32(FP), LEN
CMOVQLE y_len+32(FP), LEN
PXOR SUM, SUM // sum = 0
CMPQ LEN, $0 // if LEN == 0 { return }
JE dot_end
XORPS P_SUM, P_SUM // psum = 0
MOVSD $(-1.0), NEG1
SHUFPD $0, NEG1, NEG1 // { -1, -1 }
XORQ IDX, IDX // i := 0
MOVQ $1, I_IDX // j := 1
MOVQ LEN, TAIL
ANDQ $3, TAIL // TAIL = floor( TAIL / 4 )
SHRQ $2, LEN // LEN = TAIL % 4
JZ dot_tail // if LEN == 0 { goto dot_tail }
MOVAPS NEG1, P_NEG1 // Copy NEG1 to P_NEG1 for pipelining
dot_loop: // do {
MOVDDUP_XPTR_IDX_8__X3 // X_(i+1) = { real(x[i], real(x[i]) }
MOVDDUP_16_XPTR_IDX_8__X5
MOVDDUP_32_XPTR_IDX_8__X7
MOVDDUP_48_XPTR_IDX_8__X9
MOVDDUP_XPTR_IIDX_8__X2 // X_i = { imag(x[i]), imag(x[i]) }
MOVDDUP_16_XPTR_IIDX_8__X4
MOVDDUP_32_XPTR_IIDX_8__X6
MOVDDUP_48_XPTR_IIDX_8__X8
// X_i = { -imag(x[i]), -imag(x[i]) }
MULPD NEG1, X2
MULPD P_NEG1, X4
MULPD NEG1, X6
MULPD P_NEG1, X8
// X_j = { imag(y[i]), real(y[i]) }
MOVUPS (Y_PTR)(IDX*8), X10
MOVUPS 16(Y_PTR)(IDX*8), X11
MOVUPS 32(Y_PTR)(IDX*8), X12
MOVUPS 48(Y_PTR)(IDX*8), X13
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
MULPD X10, X3
MULPD X11, X5
MULPD X12, X7
MULPD X13, X9
// X_j = { real(y[i]), imag(y[i]) }
SHUFPD $0x1, X10, X10
SHUFPD $0x1, X11, X11
SHUFPD $0x1, X12, X12
SHUFPD $0x1, X13, X13
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
MULPD X10, X2
MULPD X11, X4
MULPD X12, X6
MULPD X13, X8
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
ADDSUBPD_X4_X5
ADDSUBPD_X6_X7
ADDSUBPD_X8_X9
// psum += result[i]
ADDPD X3, SUM
ADDPD X5, P_SUM
ADDPD X7, SUM
ADDPD X9, P_SUM
ADDQ $8, IDX // IDX += 8
ADDQ $8, I_IDX // I_IDX += 8
DECQ LEN
JNZ dot_loop // } while --LEN > 0
ADDPD P_SUM, SUM // sum += psum
CMPQ TAIL, $0 // if TAIL == 0 { return }
JE dot_end
dot_tail: // do {
MOVDDUP_XPTR_IDX_8__X3 // X_(i+1) = { real(x[i]) , real(x[i]) }
MOVDDUP_XPTR_IIDX_8__X2 // X_i = { imag(x[i]) , imag(x[i]) }
MULPD NEG1, X2 // X_i = { -imag(x[i]) , -imag(x[i]) }
MOVUPS (Y_PTR)(IDX*8), X10 // X_j = { imag(y[i]) , real(y[i]) }
MULPD X10, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
SHUFPD $0x1, X10, X10 // X_j = { real(y[i]) , imag(y[i]) }
MULPD X10, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
ADDPD X3, SUM // SUM += result[i]
ADDQ $2, IDX // IDX += 2
ADDQ $2, I_IDX // I_IDX += 2
DECQ TAIL
JNZ dot_tail // } while --TAIL > 0
dot_end:
MOVUPS SUM, sum+48(FP)
RET

View File

@ -0,0 +1,141 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine
#include "textflag.h"
#define MOVDDUP_XPTR__X3 LONG $0x1E120FF2 // MOVDDUP (SI), X3
#define MOVDDUP_XPTR_INCX__X5 LONG $0x120F42F2; WORD $0x062C // MOVDDUP (SI)(R8*1), X5
#define MOVDDUP_XPTR_INCX_2__X7 LONG $0x120F42F2; WORD $0x463C // MOVDDUP (SI)(R8*2), X7
#define MOVDDUP_XPTR_INCx3X__X9 LONG $0x120F46F2; WORD $0x0E0C // MOVDDUP (SI)(R9*1), X9
#define MOVDDUP_8_XPTR__X2 LONG $0x56120FF2; BYTE $0x08 // MOVDDUP 8(SI), X2
#define MOVDDUP_8_XPTR_INCX__X4 LONG $0x120F42F2; WORD $0x0664; BYTE $0x08 // MOVDDUP 8(SI)(R8*1), X4
#define MOVDDUP_8_XPTR_INCX_2__X6 LONG $0x120F42F2; WORD $0x4674; BYTE $0x08 // MOVDDUP 8(SI)(R8*2), X6
#define MOVDDUP_8_XPTR_INCx3X__X8 LONG $0x120F46F2; WORD $0x0E44; BYTE $0x08 // MOVDDUP 8(SI)(R9*1), X8
#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3
#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5
#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7
#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
#define X_PTR SI
#define Y_PTR DI
#define LEN CX
#define TAIL BX
#define SUM X0
#define P_SUM X1
#define INC_X R8
#define INCx3_X R9
#define INC_Y R10
#define INCx3_Y R11
// func DotuInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128)
TEXT ·DotuInc(SB), NOSPLIT, $0
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
MOVQ n+48(FP), LEN // LEN = n
PXOR SUM, SUM // sum = 0
CMPQ LEN, $0 // if LEN == 0 { return }
JE dot_end
MOVQ ix+72(FP), INC_X // INC_X = ix * sizeof(complex128)
SHLQ $4, INC_X
MOVQ iy+80(FP), INC_Y // INC_Y = iy * sizeof(complex128)
SHLQ $4, INC_Y
LEAQ (X_PTR)(INC_X*1), X_PTR // X_PTR = &(X_PTR[ix])
LEAQ (Y_PTR)(INC_Y*1), Y_PTR // Y_PTR = &(Y_PTR[iy])
MOVQ incX+56(FP), INC_X // INC_X = incX
SHLQ $4, INC_X // INC_X *= sizeof(complex128)
MOVQ incY+64(FP), INC_Y // INC_Y = incY
SHLQ $4, INC_Y // INC_Y *= sizeof(complex128)
MOVQ LEN, TAIL
ANDQ $3, TAIL // LEN = LEN % 4
SHRQ $2, LEN // LEN = floor( LEN / 4 )
JZ dot_tail // if LEN <= 4 { goto dot_tail }
PXOR P_SUM, P_SUM // psum = 0
LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = 3 * incX * sizeof(complex128)
LEAQ (INC_Y)(INC_Y*2), INCx3_Y // INCx3_Y = 3 * incY * sizeof(complex128)
dot_loop: // do {
MOVDDUP_XPTR__X3 // X_(i+1) = { real(x[i], real(x[i]) }
MOVDDUP_XPTR_INCX__X5
MOVDDUP_XPTR_INCX_2__X7
MOVDDUP_XPTR_INCx3X__X9
MOVDDUP_8_XPTR__X2 // X_i = { imag(x[i]), imag(x[i]) }
MOVDDUP_8_XPTR_INCX__X4
MOVDDUP_8_XPTR_INCX_2__X6
MOVDDUP_8_XPTR_INCx3X__X8
// X_j = { imag(y[i]), real(y[i]) }
MOVUPS (Y_PTR), X10
MOVUPS (Y_PTR)(INC_Y*1), X11
MOVUPS (Y_PTR)(INC_Y*2), X12
MOVUPS (Y_PTR)(INCx3_Y*1), X13
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
MULPD X10, X3
MULPD X11, X5
MULPD X12, X7
MULPD X13, X9
// X_j = { real(y[i]), imag(y[i]) }
SHUFPD $0x1, X10, X10
SHUFPD $0x1, X11, X11
SHUFPD $0x1, X12, X12
SHUFPD $0x1, X13, X13
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
MULPD X10, X2
MULPD X11, X4
MULPD X12, X6
MULPD X13, X8
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
ADDSUBPD_X4_X5
ADDSUBPD_X6_X7
ADDSUBPD_X8_X9
// psum += result[i]
ADDPD X3, SUM
ADDPD X5, P_SUM
ADDPD X7, SUM
ADDPD X9, P_SUM
LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[incX*4])
LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(Y_PTR[incY*4])
DECQ LEN
JNZ dot_loop // } while --BX > 0
ADDPD P_SUM, SUM // sum += psum
CMPQ TAIL, $0 // if TAIL == 0 { return }
JE dot_end
dot_tail: // do {
MOVDDUP_XPTR__X3 // X_(i+1) = { real(x[i], real(x[i]) }
MOVDDUP_8_XPTR__X2 // X_i = { imag(x[i]), imag(x[i]) }
MOVUPS (Y_PTR), X10 // X_j = { imag(y[i]) , real(y[i]) }
MULPD X10, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
SHUFPD $0x1, X10, X10 // X_j = { real(y[i]) , imag(y[i]) }
MULPD X10, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
ADDPD X3, SUM // sum += result[i]
ADDQ INC_X, X_PTR // X_PTR += incX
ADDQ INC_Y, Y_PTR // Y_PTR += incY
DECQ TAIL // --TAIL
JNZ dot_tail // } while TAIL > 0
dot_end:
MOVUPS SUM, sum+88(FP)
RET

View File

@ -0,0 +1,130 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine
#include "textflag.h"
#define MOVDDUP_XPTR_IDX_8__X3 LONG $0x1C120FF2; BYTE $0xC6 // MOVDDUP (SI)(AX*8), X3
#define MOVDDUP_16_XPTR_IDX_8__X5 LONG $0x6C120FF2; WORD $0x10C6 // MOVDDUP 16(SI)(AX*8), X5
#define MOVDDUP_32_XPTR_IDX_8__X7 LONG $0x7C120FF2; WORD $0x20C6 // MOVDDUP 32(SI)(AX*8), X7
#define MOVDDUP_48_XPTR_IDX_8__X9 LONG $0x120F44F2; WORD $0xC64C; BYTE $0x30 // MOVDDUP 48(SI)(AX*8), X9
#define MOVDDUP_XPTR_IIDX_8__X2 LONG $0x14120FF2; BYTE $0xD6 // MOVDDUP (SI)(DX*8), X2
#define MOVDDUP_16_XPTR_IIDX_8__X4 LONG $0x64120FF2; WORD $0x10D6 // MOVDDUP 16(SI)(DX*8), X4
#define MOVDDUP_32_XPTR_IIDX_8__X6 LONG $0x74120FF2; WORD $0x20D6 // MOVDDUP 32(SI)(DX*8), X6
#define MOVDDUP_48_XPTR_IIDX_8__X8 LONG $0x120F44F2; WORD $0xD644; BYTE $0x30 // MOVDDUP 48(SI)(DX*8), X8
#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3
#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5
#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7
#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
#define X_PTR SI
#define Y_PTR DI
#define LEN CX
#define TAIL BX
#define SUM X0
#define P_SUM X1
#define IDX AX
#define I_IDX DX
// func DotuUnitary(x, y []complex128) (sum complex128)
TEXT ·DotuUnitary(SB), NOSPLIT, $0
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
MOVQ x_len+8(FP), LEN // LEN = min( len(x), len(y) )
CMPQ y_len+32(FP), LEN
CMOVQLE y_len+32(FP), LEN
PXOR SUM, SUM // SUM = 0
CMPQ LEN, $0 // if LEN == 0 { return }
JE dot_end
PXOR P_SUM, P_SUM // P_SUM = 0
XORQ IDX, IDX // IDX = 0
MOVQ $1, DX // j = 1
MOVQ LEN, TAIL
ANDQ $3, TAIL // TAIL = floor( LEN / 4 )
SHRQ $2, LEN // LEN = LEN % 4
JZ dot_tail // if LEN == 0 { goto dot_tail }
dot_loop: // do {
MOVDDUP_XPTR_IDX_8__X3 // X_(i+1) = { real(x[i], real(x[i]) }
MOVDDUP_16_XPTR_IDX_8__X5
MOVDDUP_32_XPTR_IDX_8__X7
MOVDDUP_48_XPTR_IDX_8__X9
MOVDDUP_XPTR_IIDX_8__X2 // X_i = { imag(x[i]), imag(x[i]) }
MOVDDUP_16_XPTR_IIDX_8__X4
MOVDDUP_32_XPTR_IIDX_8__X6
MOVDDUP_48_XPTR_IIDX_8__X8
// X_j = { imag(y[i]), real(y[i]) }
MOVUPS (Y_PTR)(IDX*8), X10
MOVUPS 16(Y_PTR)(IDX*8), X11
MOVUPS 32(Y_PTR)(IDX*8), X12
MOVUPS 48(Y_PTR)(IDX*8), X13
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
MULPD X10, X3
MULPD X11, X5
MULPD X12, X7
MULPD X13, X9
// X_j = { real(y[i]), imag(y[i]) }
SHUFPD $0x1, X10, X10
SHUFPD $0x1, X11, X11
SHUFPD $0x1, X12, X12
SHUFPD $0x1, X13, X13
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
MULPD X10, X2
MULPD X11, X4
MULPD X12, X6
MULPD X13, X8
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
ADDSUBPD_X4_X5
ADDSUBPD_X6_X7
ADDSUBPD_X8_X9
// psum += result[i]
ADDPD X3, SUM
ADDPD X5, P_SUM
ADDPD X7, SUM
ADDPD X9, P_SUM
ADDQ $8, IDX // IDX += 8
ADDQ $8, I_IDX // I_IDX += 8
DECQ LEN
JNZ dot_loop // } while --LEN > 0
ADDPD P_SUM, SUM // SUM += P_SUM
CMPQ TAIL, $0 // if TAIL == 0 { return }
JE dot_end
dot_tail: // do {
MOVDDUP_XPTR_IDX_8__X3 // X_(i+1) = { real(x[i] , real(x[i]) }
MOVDDUP_XPTR_IIDX_8__X2 // X_i = { imag(x[i]) , imag(x[i]) }
MOVUPS (Y_PTR)(IDX*8), X10 // X_j = { imag(y[i]) , real(y[i]) }
MULPD X10, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
SHUFPD $0x1, X10, X10 // X_j = { real(y[i]) , imag(y[i]) }
MULPD X10, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
ADDPD X3, SUM // psum += result[i]
ADDQ $2, IDX // IDX += 2
ADDQ $2, I_IDX // I_IDX += 2
DECQ TAIL // --TAIL
JNZ dot_tail // } while TAIL > 0
dot_end:
MOVUPS SUM, sum+48(FP)
RET

View File

@ -0,0 +1,69 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine
#include "textflag.h"
#define SRC SI
#define DST SI
#define LEN CX
#define TAIL BX
#define INC R9
#define INC3 R10
#define ALPHA X0
#define ALPHA_2 X1
#define MOVDDUP_ALPHA LONG $0x44120FF2; WORD $0x0824 // MOVDDUP 8(SP), X0
// func DscalInc(alpha float64, x []complex128, n, inc uintptr)
TEXT ·DscalInc(SB), NOSPLIT, $0
MOVQ x_base+8(FP), SRC // SRC = &x
MOVQ n+32(FP), LEN // LEN = n
CMPQ LEN, $0 // if LEN == 0 { return }
JE dscal_end
MOVDDUP_ALPHA // ALPHA = alpha
MOVQ inc+40(FP), INC // INC = inc
SHLQ $4, INC // INC = INC * sizeof(complex128)
LEAQ (INC)(INC*2), INC3 // INC3 = 3 * INC
MOVUPS ALPHA, ALPHA_2 // Copy ALPHA and ALPHA_2 for pipelining
MOVQ LEN, TAIL // TAIL = LEN
SHRQ $2, LEN // LEN = floor( n / 4 )
JZ dscal_tail // if LEN == 0 { goto dscal_tail }
dscal_loop: // do {
MOVUPS (SRC), X2 // X_i = x[i]
MOVUPS (SRC)(INC*1), X3
MOVUPS (SRC)(INC*2), X4
MOVUPS (SRC)(INC3*1), X5
MULPD ALPHA, X2 // X_i *= ALPHA
MULPD ALPHA_2, X3
MULPD ALPHA, X4
MULPD ALPHA_2, X5
MOVUPS X2, (DST) // x[i] = X_i
MOVUPS X3, (DST)(INC*1)
MOVUPS X4, (DST)(INC*2)
MOVUPS X5, (DST)(INC3*1)
LEAQ (SRC)(INC*4), SRC // SRC += INC*4
DECQ LEN
JNZ dscal_loop // } while --LEN > 0
dscal_tail:
ANDQ $3, TAIL // TAIL = TAIL % 4
JE dscal_end // if TAIL == 0 { return }
dscal_tail_loop: // do {
MOVUPS (SRC), X2 // X_i = x[i]
MULPD ALPHA, X2 // X_i *= ALPHA
MOVUPS X2, (DST) // x[i] = X_i
ADDQ INC, SRC // SRC += INC
DECQ TAIL
JNZ dscal_tail_loop // } while --TAIL > 0
dscal_end:
RET

View File

@ -0,0 +1,66 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine
#include "textflag.h"
#define SRC SI
#define DST SI
#define LEN CX
#define IDX AX
#define TAIL BX
#define ALPHA X0
#define ALPHA_2 X1
#define MOVDDUP_ALPHA LONG $0x44120FF2; WORD $0x0824 // MOVDDUP 8(SP), X0
// func DscalUnitary(alpha float64, x []complex128)
TEXT ·DscalUnitary(SB), NOSPLIT, $0
MOVQ x_base+8(FP), SRC // SRC = &x
MOVQ x_len+16(FP), LEN // LEN = len(x)
CMPQ LEN, $0 // if LEN == 0 { return }
JE dscal_end
MOVDDUP_ALPHA // ALPHA = alpha
XORQ IDX, IDX // IDX = 0
MOVUPS ALPHA, ALPHA_2 // Copy ALPHA to ALPHA_2 for pipelining
MOVQ LEN, TAIL // TAIL = LEN
SHRQ $2, LEN // LEN = floor( n / 4 )
JZ dscal_tail // if LEN == 0 { goto dscal_tail }
dscal_loop: // do {
MOVUPS (SRC)(IDX*8), X2 // X_i = x[i]
MOVUPS 16(SRC)(IDX*8), X3
MOVUPS 32(SRC)(IDX*8), X4
MOVUPS 48(SRC)(IDX*8), X5
MULPD ALPHA, X2 // X_i *= ALPHA
MULPD ALPHA_2, X3
MULPD ALPHA, X4
MULPD ALPHA_2, X5
MOVUPS X2, (DST)(IDX*8) // x[i] = X_i
MOVUPS X3, 16(DST)(IDX*8)
MOVUPS X4, 32(DST)(IDX*8)
MOVUPS X5, 48(DST)(IDX*8)
ADDQ $8, IDX // IDX += 8
DECQ LEN
JNZ dscal_loop // } while --LEN > 0
dscal_tail:
ANDQ $3, TAIL // TAIL = TAIL % 4
JZ dscal_end // if TAIL == 0 { return }
dscal_tail_loop: // do {
MOVUPS (SRC)(IDX*8), X2 // X_i = x[i]
MULPD ALPHA, X2 // X_i *= ALPHA
MOVUPS X2, (DST)(IDX*8) // x[i] = X_i
ADDQ $2, IDX // IDX += 2
DECQ TAIL
JNZ dscal_tail_loop // } while --TAIL > 0
dscal_end:
RET

31
vendor/gonum.org/v1/gonum/internal/asm/c128/scal.go generated vendored Normal file
View File

@ -0,0 +1,31 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package c128
// ScalUnitaryTo is
// for i, v := range x {
// dst[i] = alpha * v
// }
func ScalUnitaryTo(dst []complex128, alpha complex128, x []complex128) {
for i, v := range x {
dst[i] = alpha * v
}
}
// ScalIncTo is
// var idst, ix uintptr
// for i := 0; i < int(n); i++ {
// dst[idst] = alpha * x[ix]
// ix += incX
// idst += incDst
// }
func ScalIncTo(dst []complex128, incDst uintptr, alpha complex128, x []complex128, n, incX uintptr) {
var idst, ix uintptr
for i := 0; i < int(n); i++ {
dst[idst] = alpha * x[ix]
ix += incX
idst += incDst
}
}

View File

@ -0,0 +1,116 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine
#include "textflag.h"
#define SRC SI
#define DST SI
#define LEN CX
#define IDX AX
#define TAIL BX
#define ALPHA X0
#define ALPHA_C X1
#define ALPHA2 X10
#define ALPHA_C2 X11
#define MOVDDUP_X2_X3 LONG $0xDA120FF2 // MOVDDUP X2, X3
#define MOVDDUP_X4_X5 LONG $0xEC120FF2 // MOVDDUP X4, X5
#define MOVDDUP_X6_X7 LONG $0xFE120FF2 // MOVDDUP X6, X7
#define MOVDDUP_X8_X9 LONG $0x120F45F2; BYTE $0xC8 // MOVDDUP X8, X9
#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3
#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5
#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7
#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
// func ScalUnitary(alpha complex128, x []complex128)
TEXT ·ScalUnitary(SB), NOSPLIT, $0
MOVQ x_base+16(FP), SRC // SRC = &x
MOVQ x_len+24(FP), LEN // LEN = len(x)
CMPQ LEN, $0 // if LEN == 0 { return }
JE scal_end
MOVUPS alpha+0(FP), ALPHA // ALPHA = { imag(alpha), real(alpha) }
MOVAPS ALPHA, ALPHA_C
SHUFPD $0x1, ALPHA_C, ALPHA_C // ALPHA_C = { real(alpha), imag(alpha) }
XORQ IDX, IDX // IDX = 0
MOVAPS ALPHA, ALPHA2 // Copy ALPHA and ALPHA_C for pipelining
MOVAPS ALPHA_C, ALPHA_C2
MOVQ LEN, TAIL
SHRQ $2, LEN // LEN = floor( n / 4 )
JZ scal_tail // if BX == 0 { goto scal_tail }
scal_loop: // do {
MOVUPS (SRC)(IDX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
MOVUPS 16(SRC)(IDX*8), X4
MOVUPS 32(SRC)(IDX*8), X6
MOVUPS 48(SRC)(IDX*8), X8
// X_(i+1) = { real(x[i], real(x[i]) }
MOVDDUP_X2_X3
MOVDDUP_X4_X5
MOVDDUP_X6_X7
MOVDDUP_X8_X9
// X_i = { imag(x[i]), imag(x[i]) }
SHUFPD $0x3, X2, X2
SHUFPD $0x3, X4, X4
SHUFPD $0x3, X6, X6
SHUFPD $0x3, X8, X8
// X_i = { real(ALPHA) * imag(x[i]), imag(ALPHA) * imag(x[i]) }
// X_(i+1) = { imag(ALPHA) * real(x[i]), real(ALPHA) * real(x[i]) }
MULPD ALPHA_C, X2
MULPD ALPHA, X3
MULPD ALPHA_C2, X4
MULPD ALPHA2, X5
MULPD ALPHA_C, X6
MULPD ALPHA, X7
MULPD ALPHA_C2, X8
MULPD ALPHA2, X9
// X_(i+1) = {
// imag(result[i]): imag(ALPHA)*real(x[i]) + real(ALPHA)*imag(x[i]),
// real(result[i]): real(ALPHA)*real(x[i]) - imag(ALPHA)*imag(x[i])
// }
ADDSUBPD_X2_X3
ADDSUBPD_X4_X5
ADDSUBPD_X6_X7
ADDSUBPD_X8_X9
MOVUPS X3, (DST)(IDX*8) // x[i] = X_(i+1)
MOVUPS X5, 16(DST)(IDX*8)
MOVUPS X7, 32(DST)(IDX*8)
MOVUPS X9, 48(DST)(IDX*8)
ADDQ $8, IDX // IDX += 8
DECQ LEN
JNZ scal_loop // } while --LEN > 0
scal_tail:
ANDQ $3, TAIL // TAIL = TAIL % 4
JZ scal_end // if TAIL == 0 { return }
scal_tail_loop: // do {
MOVUPS (SRC)(IDX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) }
SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
MULPD ALPHA_C, X2 // X_i = { real(ALPHA) * imag(x[i]), imag(ALPHA) * imag(x[i]) }
MULPD ALPHA, X3 // X_(i+1) = { imag(ALPHA) * real(x[i]), real(ALPHA) * real(x[i]) }
// X_(i+1) = {
// imag(result[i]): imag(ALPHA)*real(x[i]) + real(ALPHA)*imag(x[i]),
// real(result[i]): real(ALPHA)*real(x[i]) - imag(ALPHA)*imag(x[i])
// }
ADDSUBPD_X2_X3
MOVUPS X3, (DST)(IDX*8) // x[i] = X_(i+1)
ADDQ $2, IDX // IDX += 2
DECQ TAIL
JNZ scal_tail_loop // } while --LEN > 0
scal_end:
RET

View File

@ -0,0 +1,121 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine
#include "textflag.h"
#define SRC SI
#define DST SI
#define LEN CX
#define TAIL BX
#define INC R9
#define INC3 R10
#define ALPHA X0
#define ALPHA_C X1
#define ALPHA2 X10
#define ALPHA_C2 X11
#define MOVDDUP_X2_X3 LONG $0xDA120FF2 // MOVDDUP X2, X3
#define MOVDDUP_X4_X5 LONG $0xEC120FF2 // MOVDDUP X4, X5
#define MOVDDUP_X6_X7 LONG $0xFE120FF2 // MOVDDUP X6, X7
#define MOVDDUP_X8_X9 LONG $0x120F45F2; BYTE $0xC8 // MOVDDUP X8, X9
#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3
#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5
#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7
#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
// func ScalInc(alpha complex128, x []complex128, n, inc uintptr)
TEXT ·ScalInc(SB), NOSPLIT, $0
MOVQ x_base+16(FP), SRC // SRC = &x
MOVQ n+40(FP), LEN // LEN = len(x)
CMPQ LEN, $0
JE scal_end // if LEN == 0 { return }
MOVQ inc+48(FP), INC // INC = inc
SHLQ $4, INC // INC = INC * sizeof(complex128)
LEAQ (INC)(INC*2), INC3 // INC3 = 3 * INC
MOVUPS alpha+0(FP), ALPHA // ALPHA = { imag(alpha), real(alpha) }
MOVAPS ALPHA, ALPHA_C
SHUFPD $0x1, ALPHA_C, ALPHA_C // ALPHA_C = { real(alpha), imag(alpha) }
MOVAPS ALPHA, ALPHA2 // Copy ALPHA and ALPHA_C for pipelining
MOVAPS ALPHA_C, ALPHA_C2
MOVQ LEN, TAIL
SHRQ $2, LEN // LEN = floor( n / 4 )
JZ scal_tail // if BX == 0 { goto scal_tail }
scal_loop: // do {
MOVUPS (SRC), X2 // X_i = { imag(x[i]), real(x[i]) }
MOVUPS (SRC)(INC*1), X4
MOVUPS (SRC)(INC*2), X6
MOVUPS (SRC)(INC3*1), X8
// X_(i+1) = { real(x[i], real(x[i]) }
MOVDDUP_X2_X3
MOVDDUP_X4_X5
MOVDDUP_X6_X7
MOVDDUP_X8_X9
// X_i = { imag(x[i]), imag(x[i]) }
SHUFPD $0x3, X2, X2
SHUFPD $0x3, X4, X4
SHUFPD $0x3, X6, X6
SHUFPD $0x3, X8, X8
// X_i = { real(ALPHA) * imag(x[i]), imag(ALPHA) * imag(x[i]) }
// X_(i+1) = { imag(ALPHA) * real(x[i]), real(ALPHA) * real(x[i]) }
MULPD ALPHA_C, X2
MULPD ALPHA, X3
MULPD ALPHA_C2, X4
MULPD ALPHA2, X5
MULPD ALPHA_C, X6
MULPD ALPHA, X7
MULPD ALPHA_C2, X8
MULPD ALPHA2, X9
// X_(i+1) = {
// imag(result[i]): imag(ALPHA)*real(x[i]) + real(ALPHA)*imag(x[i]),
// real(result[i]): real(ALPHA)*real(x[i]) - imag(ALPHA)*imag(x[i])
// }
ADDSUBPD_X2_X3
ADDSUBPD_X4_X5
ADDSUBPD_X6_X7
ADDSUBPD_X8_X9
MOVUPS X3, (DST) // x[i] = X_(i+1)
MOVUPS X5, (DST)(INC*1)
MOVUPS X7, (DST)(INC*2)
MOVUPS X9, (DST)(INC3*1)
LEAQ (SRC)(INC*4), SRC // SRC = &(SRC[inc*4])
DECQ LEN
JNZ scal_loop // } while --BX > 0
scal_tail:
ANDQ $3, TAIL // TAIL = TAIL % 4
JE scal_end // if TAIL == 0 { return }
scal_tail_loop: // do {
MOVUPS (SRC), X2 // X_i = { imag(x[i]), real(x[i]) }
MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) }
SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
MULPD ALPHA_C, X2 // X_i = { real(ALPHA) * imag(x[i]), imag(ALPHA) * imag(x[i]) }
MULPD ALPHA, X3 // X_(i+1) = { imag(ALPHA) * real(x[i]), real(ALPHA) * real(x[i]) }
// X_(i+1) = {
// imag(result[i]): imag(ALPHA)*real(x[i]) + real(ALPHA)*imag(x[i]),
// real(result[i]): real(ALPHA)*real(x[i]) - imag(ALPHA)*imag(x[i])
// }
ADDSUBPD_X2_X3
MOVUPS X3, (DST) // x[i] = X_i
ADDQ INC, SRC // SRC = &(SRC[incX])
DECQ TAIL
JNZ scal_tail_loop // } while --TAIL > 0
scal_end:
RET

View File

@ -0,0 +1,96 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine
package c128
// AxpyUnitary is
// for i, v := range x {
// y[i] += alpha * v
// }
func AxpyUnitary(alpha complex128, x, y []complex128)
// AxpyUnitaryTo is
// for i, v := range x {
// dst[i] = alpha*v + y[i]
// }
func AxpyUnitaryTo(dst []complex128, alpha complex128, x, y []complex128)
// AxpyInc is
// for i := 0; i < int(n); i++ {
// y[iy] += alpha * x[ix]
// ix += incX
// iy += incY
// }
func AxpyInc(alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr)
// AxpyIncTo is
// for i := 0; i < int(n); i++ {
// dst[idst] = alpha*x[ix] + y[iy]
// ix += incX
// iy += incY
// idst += incDst
// }
func AxpyIncTo(dst []complex128, incDst, idst uintptr, alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr)
// DscalUnitary is
// for i, v := range x {
// x[i] = complex(real(v)*alpha, imag(v)*alpha)
// }
func DscalUnitary(alpha float64, x []complex128)
// DscalInc is
// var ix uintptr
// for i := 0; i < int(n); i++ {
// x[ix] = complex(real(x[ix])*alpha, imag(x[ix])*alpha)
// ix += inc
// }
func DscalInc(alpha float64, x []complex128, n, inc uintptr)
// ScalInc is
// var ix uintptr
// for i := 0; i < int(n); i++ {
// x[ix] *= alpha
// ix += incX
// }
func ScalInc(alpha complex128, x []complex128, n, inc uintptr)
// ScalUnitary is
// for i := range x {
// x[i] *= alpha
// }
func ScalUnitary(alpha complex128, x []complex128)
// DotcUnitary is
// for i, v := range x {
// sum += y[i] * cmplx.Conj(v)
// }
// return sum
func DotcUnitary(x, y []complex128) (sum complex128)
// DotcInc is
// for i := 0; i < int(n); i++ {
// sum += y[iy] * cmplx.Conj(x[ix])
// ix += incX
// iy += incY
// }
// return sum
func DotcInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128)
// DotuUnitary is
// for i, v := range x {
// sum += y[i] * v
// }
// return sum
func DotuUnitary(x, y []complex128) (sum complex128)
// DotuInc is
// for i := 0; i < int(n); i++ {
// sum += y[iy] * x[ix]
// ix += incX
// iy += incY
// }
// return sum
func DotuInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128)

View File

@ -0,0 +1,163 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !amd64 noasm appengine
package c128
import "math/cmplx"
// AxpyUnitary is
// for i, v := range x {
// y[i] += alpha * v
// }
func AxpyUnitary(alpha complex128, x, y []complex128) {
for i, v := range x {
y[i] += alpha * v
}
}
// AxpyUnitaryTo is
// for i, v := range x {
// dst[i] = alpha*v + y[i]
// }
func AxpyUnitaryTo(dst []complex128, alpha complex128, x, y []complex128) {
for i, v := range x {
dst[i] = alpha*v + y[i]
}
}
// AxpyInc is
// for i := 0; i < int(n); i++ {
// y[iy] += alpha * x[ix]
// ix += incX
// iy += incY
// }
func AxpyInc(alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr) {
for i := 0; i < int(n); i++ {
y[iy] += alpha * x[ix]
ix += incX
iy += incY
}
}
// AxpyIncTo is
// for i := 0; i < int(n); i++ {
// dst[idst] = alpha*x[ix] + y[iy]
// ix += incX
// iy += incY
// idst += incDst
// }
func AxpyIncTo(dst []complex128, incDst, idst uintptr, alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr) {
for i := 0; i < int(n); i++ {
dst[idst] = alpha*x[ix] + y[iy]
ix += incX
iy += incY
idst += incDst
}
}
// DscalUnitary is
// for i, v := range x {
// x[i] = complex(real(v)*alpha, imag(v)*alpha)
// }
func DscalUnitary(alpha float64, x []complex128) {
for i, v := range x {
x[i] = complex(real(v)*alpha, imag(v)*alpha)
}
}
// DscalInc is
// var ix uintptr
// for i := 0; i < int(n); i++ {
// x[ix] = complex(real(x[ix])*alpha, imag(x[ix])*alpha)
// ix += inc
// }
func DscalInc(alpha float64, x []complex128, n, inc uintptr) {
var ix uintptr
for i := 0; i < int(n); i++ {
x[ix] = complex(real(x[ix])*alpha, imag(x[ix])*alpha)
ix += inc
}
}
// ScalInc is
// var ix uintptr
// for i := 0; i < int(n); i++ {
// x[ix] *= alpha
// ix += incX
// }
func ScalInc(alpha complex128, x []complex128, n, inc uintptr) {
var ix uintptr
for i := 0; i < int(n); i++ {
x[ix] *= alpha
ix += inc
}
}
// ScalUnitary is
// for i := range x {
// x[i] *= alpha
// }
func ScalUnitary(alpha complex128, x []complex128) {
for i := range x {
x[i] *= alpha
}
}
// DotcUnitary is
// for i, v := range x {
// sum += y[i] * cmplx.Conj(v)
// }
// return sum
func DotcUnitary(x, y []complex128) (sum complex128) {
for i, v := range x {
sum += y[i] * cmplx.Conj(v)
}
return sum
}
// DotcInc is
// for i := 0; i < int(n); i++ {
// sum += y[iy] * cmplx.Conj(x[ix])
// ix += incX
// iy += incY
// }
// return sum
func DotcInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128) {
for i := 0; i < int(n); i++ {
sum += y[iy] * cmplx.Conj(x[ix])
ix += incX
iy += incY
}
return sum
}
// DotuUnitary is
// for i, v := range x {
// sum += y[i] * v
// }
// return sum
func DotuUnitary(x, y []complex128) (sum complex128) {
for i, v := range x {
sum += y[i] * v
}
return sum
}
// DotuInc is
// for i := 0; i < int(n); i++ {
// sum += y[iy] * x[ix]
// ix += incX
// iy += incY
// }
// return sum
func DotuInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128) {
for i := 0; i < int(n); i++ {
sum += y[iy] * x[ix]
ix += incX
iy += incY
}
return sum
}

View File

@ -1,4 +1,4 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

View File

@ -1,4 +1,4 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

View File

@ -1,4 +1,4 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

View File

@ -1,4 +1,4 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

View File

@ -0,0 +1,91 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine
#include "textflag.h"
#define X_PTR SI
#define Y_PTR DI
#define LEN CX
#define TAIL BX
#define INC_X R8
#define INCx3_X R10
#define INC_Y R9
#define INCx3_Y R11
#define SUM X0
#define P_SUM X1
// func DdotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float64)
TEXT ·DdotInc(SB), NOSPLIT, $0
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
MOVQ n+48(FP), LEN // LEN = n
PXOR SUM, SUM // SUM = 0
CMPQ LEN, $0
JE dot_end
MOVQ ix+72(FP), INC_X // INC_X = ix
MOVQ iy+80(FP), INC_Y // INC_Y = iy
LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(x[ix])
LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(y[iy])
MOVQ incX+56(FP), INC_X // INC_X = incX * sizeof(float32)
SHLQ $2, INC_X
MOVQ incY+64(FP), INC_Y // INC_Y = incY * sizeof(float32)
SHLQ $2, INC_Y
MOVQ LEN, TAIL
ANDQ $3, TAIL // TAIL = LEN % 4
SHRQ $2, LEN // LEN = floor( LEN / 4 )
JZ dot_tail // if LEN == 0 { goto dot_tail }
PXOR P_SUM, P_SUM // P_SUM = 0 for pipelining
LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = INC_X * 3
LEAQ (INC_Y)(INC_Y*2), INCx3_Y // INCx3_Y = INC_Y * 3
dot_loop: // Loop unrolled 4x do {
CVTSS2SD (X_PTR), X2 // X_i = x[i:i+1]
CVTSS2SD (X_PTR)(INC_X*1), X3
CVTSS2SD (X_PTR)(INC_X*2), X4
CVTSS2SD (X_PTR)(INCx3_X*1), X5
CVTSS2SD (Y_PTR), X6 // X_j = y[i:i+1]
CVTSS2SD (Y_PTR)(INC_Y*1), X7
CVTSS2SD (Y_PTR)(INC_Y*2), X8
CVTSS2SD (Y_PTR)(INCx3_Y*1), X9
MULSD X6, X2 // X_i *= X_j
MULSD X7, X3
MULSD X8, X4
MULSD X9, X5
ADDSD X2, SUM // SUM += X_i
ADDSD X3, P_SUM
ADDSD X4, SUM
ADDSD X5, P_SUM
LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[INC_X * 4])
LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(Y_PTR[INC_Y * 4])
DECQ LEN
JNZ dot_loop // } while --LEN > 0
ADDSD P_SUM, SUM // SUM += P_SUM
CMPQ TAIL, $0 // if TAIL == 0 { return }
JE dot_end
dot_tail: // do {
CVTSS2SD (X_PTR), X2 // X2 = x[i]
CVTSS2SD (Y_PTR), X3 // X2 *= y[i]
MULSD X3, X2
ADDSD X2, SUM // SUM += X2
ADDQ INC_X, X_PTR // X_PTR += INC_X
ADDQ INC_Y, Y_PTR // Y_PTR += INC_Y
DECQ TAIL
JNZ dot_tail // } while --TAIL > 0
dot_end:
MOVSD SUM, sum+88(FP) // return SUM
RET

View File

@ -0,0 +1,110 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine
#include "textflag.h"
#define HADDPD_SUM_SUM LONG $0xC07C0F66 // @ HADDPD X0, X0
#define X_PTR SI
#define Y_PTR DI
#define LEN CX
#define TAIL BX
#define IDX AX
#define SUM X0
#define P_SUM X1
// func DdotUnitary(x, y []float32) (sum float32)
TEXT ·DdotUnitary(SB), NOSPLIT, $0
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
MOVQ x_len+8(FP), LEN // LEN = min( len(x), len(y) )
CMPQ y_len+32(FP), LEN
CMOVQLE y_len+32(FP), LEN
PXOR SUM, SUM // psum = 0
CMPQ LEN, $0
JE dot_end
XORQ IDX, IDX
MOVQ Y_PTR, DX
ANDQ $0xF, DX // Align on 16-byte boundary for ADDPS
JZ dot_no_trim // if DX == 0 { goto dot_no_trim }
SUBQ $16, DX
dot_align: // Trim first value(s) in unaligned buffer do {
CVTSS2SD (X_PTR)(IDX*4), X2 // X2 = float64(x[i])
CVTSS2SD (Y_PTR)(IDX*4), X3 // X3 = float64(y[i])
MULSD X3, X2
ADDSD X2, SUM // SUM += X2
INCQ IDX // IDX++
DECQ LEN
JZ dot_end // if --TAIL == 0 { return }
ADDQ $4, DX
JNZ dot_align // } while --LEN > 0
dot_no_trim:
PXOR P_SUM, P_SUM // P_SUM = 0 for pipelining
MOVQ LEN, TAIL
ANDQ $0x7, TAIL // TAIL = LEN % 8
SHRQ $3, LEN // LEN = floor( LEN / 8 )
JZ dot_tail_start // if LEN == 0 { goto dot_tail_start }
dot_loop: // Loop unrolled 8x do {
CVTPS2PD (X_PTR)(IDX*4), X2 // X_i = x[i:i+1]
CVTPS2PD 8(X_PTR)(IDX*4), X3
CVTPS2PD 16(X_PTR)(IDX*4), X4
CVTPS2PD 24(X_PTR)(IDX*4), X5
CVTPS2PD (Y_PTR)(IDX*4), X6 // X_j = y[i:i+1]
CVTPS2PD 8(Y_PTR)(IDX*4), X7
CVTPS2PD 16(Y_PTR)(IDX*4), X8
CVTPS2PD 24(Y_PTR)(IDX*4), X9
MULPD X6, X2 // X_i *= X_j
MULPD X7, X3
MULPD X8, X4
MULPD X9, X5
ADDPD X2, SUM // SUM += X_i
ADDPD X3, P_SUM
ADDPD X4, SUM
ADDPD X5, P_SUM
ADDQ $8, IDX // IDX += 8
DECQ LEN
JNZ dot_loop // } while --LEN > 0
ADDPD P_SUM, SUM // SUM += P_SUM
CMPQ TAIL, $0 // if TAIL == 0 { return }
JE dot_end
dot_tail_start:
MOVQ TAIL, LEN
SHRQ $1, LEN
JZ dot_tail_one
dot_tail_two:
CVTPS2PD (X_PTR)(IDX*4), X2 // X_i = x[i:i+1]
CVTPS2PD (Y_PTR)(IDX*4), X6 // X_j = y[i:i+1]
MULPD X6, X2 // X_i *= X_j
ADDPD X2, SUM // SUM += X_i
ADDQ $2, IDX // IDX += 2
DECQ LEN
JNZ dot_tail_two // } while --LEN > 0
ANDQ $1, TAIL
JZ dot_end
dot_tail_one:
CVTSS2SD (X_PTR)(IDX*4), X2 // X2 = float64(x[i])
CVTSS2SD (Y_PTR)(IDX*4), X3 // X3 = float64(y[i])
MULSD X3, X2 // X2 *= X3
ADDSD X2, SUM // SUM += X2
dot_end:
HADDPD_SUM_SUM // SUM = \sum{ SUM[i] }
MOVSD SUM, sum+48(FP) // return SUM
RET

View File

@ -1,6 +1,6 @@
// Copyright ©2017 The gonum Authors. All rights reserved.
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package f32 provides float32 vector primitives.
package f32
package f32 // import "gonum.org/v1/gonum/internal/asm/f32"

View File

@ -0,0 +1,85 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine
#include "textflag.h"
#define X_PTR SI
#define Y_PTR DI
#define LEN CX
#define TAIL BX
#define INC_X R8
#define INCx3_X R10
#define INC_Y R9
#define INCx3_Y R11
#define SUM X0
#define P_SUM X1
// func DotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float32)
TEXT ·DotInc(SB), NOSPLIT, $0
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
PXOR SUM, SUM // SUM = 0
MOVQ n+48(FP), LEN // LEN = n
CMPQ LEN, $0
JE dot_end
MOVQ ix+72(FP), INC_X // INC_X = ix
MOVQ iy+80(FP), INC_Y // INC_Y = iy
LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(x[ix])
LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(y[iy])
MOVQ incX+56(FP), INC_X // INC_X := incX * sizeof(float32)
SHLQ $2, INC_X
MOVQ incY+64(FP), INC_Y // INC_Y := incY * sizeof(float32)
SHLQ $2, INC_Y
MOVQ LEN, TAIL
ANDQ $0x3, TAIL // TAIL = LEN % 4
SHRQ $2, LEN // LEN = floor( LEN / 4 )
JZ dot_tail // if LEN == 0 { goto dot_tail }
PXOR P_SUM, P_SUM // P_SUM = 0 for pipelining
LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = INC_X * 3
LEAQ (INC_Y)(INC_Y*2), INCx3_Y // INCx3_Y = INC_Y * 3
dot_loop: // Loop unrolled 4x do {
MOVSS (X_PTR), X2 // X_i = x[i:i+1]
MOVSS (X_PTR)(INC_X*1), X3
MOVSS (X_PTR)(INC_X*2), X4
MOVSS (X_PTR)(INCx3_X*1), X5
MULSS (Y_PTR), X2 // X_i *= y[i:i+1]
MULSS (Y_PTR)(INC_Y*1), X3
MULSS (Y_PTR)(INC_Y*2), X4
MULSS (Y_PTR)(INCx3_Y*1), X5
ADDSS X2, SUM // SUM += X_i
ADDSS X3, P_SUM
ADDSS X4, SUM
ADDSS X5, P_SUM
LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[INC_X * 4])
LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(Y_PTR[INC_Y * 4])
DECQ LEN
JNZ dot_loop // } while --LEN > 0
ADDSS P_SUM, SUM // P_SUM += SUM
CMPQ TAIL, $0 // if TAIL == 0 { return }
JE dot_end
dot_tail: // do {
MOVSS (X_PTR), X2 // X2 = x[i]
MULSS (Y_PTR), X2 // X2 *= y[i]
ADDSS X2, SUM // SUM += X2
ADDQ INC_X, X_PTR // X_PTR += INC_X
ADDQ INC_Y, Y_PTR // Y_PTR += INC_Y
DECQ TAIL
JNZ dot_tail // } while --TAIL > 0
dot_end:
MOVSS SUM, sum+88(FP) // return SUM
RET

View File

@ -0,0 +1,106 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine
#include "textflag.h"
#define HADDPS_SUM_SUM LONG $0xC07C0FF2 // @ HADDPS X0, X0
#define X_PTR SI
#define Y_PTR DI
#define LEN CX
#define TAIL BX
#define IDX AX
#define SUM X0
#define P_SUM X1
// func DotUnitary(x, y []float32) (sum float32)
TEXT ·DotUnitary(SB), NOSPLIT, $0
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
PXOR SUM, SUM // SUM = 0
MOVQ x_len+8(FP), LEN // LEN = min( len(x), len(y) )
CMPQ y_len+32(FP), LEN
CMOVQLE y_len+32(FP), LEN
CMPQ LEN, $0
JE dot_end
XORQ IDX, IDX
MOVQ Y_PTR, DX
ANDQ $0xF, DX // Align on 16-byte boundary for MULPS
JZ dot_no_trim // if DX == 0 { goto dot_no_trim }
SUBQ $16, DX
dot_align: // Trim first value(s) in unaligned buffer do {
MOVSS (X_PTR)(IDX*4), X2 // X2 = x[i]
MULSS (Y_PTR)(IDX*4), X2 // X2 *= y[i]
ADDSS X2, SUM // SUM += X2
INCQ IDX // IDX++
DECQ LEN
JZ dot_end // if --TAIL == 0 { return }
ADDQ $4, DX
JNZ dot_align // } while --DX > 0
dot_no_trim:
PXOR P_SUM, P_SUM // P_SUM = 0 for pipelining
MOVQ LEN, TAIL
ANDQ $0xF, TAIL // TAIL = LEN % 16
SHRQ $4, LEN // LEN = floor( LEN / 16 )
JZ dot_tail4_start // if LEN == 0 { goto dot_tail4_start }
dot_loop: // Loop unrolled 16x do {
MOVUPS (X_PTR)(IDX*4), X2 // X_i = x[i:i+1]
MOVUPS 16(X_PTR)(IDX*4), X3
MOVUPS 32(X_PTR)(IDX*4), X4
MOVUPS 48(X_PTR)(IDX*4), X5
MULPS (Y_PTR)(IDX*4), X2 // X_i *= y[i:i+1]
MULPS 16(Y_PTR)(IDX*4), X3
MULPS 32(Y_PTR)(IDX*4), X4
MULPS 48(Y_PTR)(IDX*4), X5
ADDPS X2, SUM // SUM += X_i
ADDPS X3, P_SUM
ADDPS X4, SUM
ADDPS X5, P_SUM
ADDQ $16, IDX // IDX += 16
DECQ LEN
JNZ dot_loop // } while --LEN > 0
ADDPS P_SUM, SUM // SUM += P_SUM
CMPQ TAIL, $0 // if TAIL == 0 { return }
JE dot_end
dot_tail4_start: // Reset loop counter for 4-wide tail loop
MOVQ TAIL, LEN // LEN = floor( TAIL / 4 )
SHRQ $2, LEN
JZ dot_tail_start // if LEN == 0 { goto dot_tail_start }
dot_tail4_loop: // Loop unrolled 4x do {
MOVUPS (X_PTR)(IDX*4), X2 // X_i = x[i:i+1]
MULPS (Y_PTR)(IDX*4), X2 // X_i *= y[i:i+1]
ADDPS X2, SUM // SUM += X_i
ADDQ $4, IDX // i += 4
DECQ LEN
JNZ dot_tail4_loop // } while --LEN > 0
dot_tail_start: // Reset loop counter for 1-wide tail loop
ANDQ $3, TAIL // TAIL = TAIL % 4
JZ dot_end // if TAIL == 0 { return }
dot_tail: // do {
MOVSS (X_PTR)(IDX*4), X2 // X2 = x[i]
MULSS (Y_PTR)(IDX*4), X2 // X2 *= y[i]
ADDSS X2, SUM // psum += X2
INCQ IDX // IDX++
DECQ TAIL
JNZ dot_tail // } while --TAIL > 0
dot_end:
HADDPS_SUM_SUM // SUM = \sum{ SUM[i] }
HADDPS_SUM_SUM
MOVSS SUM, sum+48(FP) // return SUM
RET

View File

@ -1,4 +1,4 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

View File

@ -1,4 +1,4 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
@ -34,3 +34,35 @@ func AxpyInc(alpha float32, x, y []float32, n, incX, incY, ix, iy uintptr)
// idst += incDst
// }
func AxpyIncTo(dst []float32, incDst, idst uintptr, alpha float32, x, y []float32, n, incX, incY, ix, iy uintptr)
// DdotUnitary is
// for i, v := range x {
// sum += float64(y[i]) * float64(v)
// }
// return
func DdotUnitary(x, y []float32) (sum float64)
// DdotInc is
// for i := 0; i < int(n); i++ {
// sum += float64(y[iy]) * float64(x[ix])
// ix += incX
// iy += incY
// }
// return
func DdotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float64)
// DotUnitary is
// for i, v := range x {
// sum += y[i] * v
// }
// return sum
func DotUnitary(x, y []float32) (sum float32)
// DotInc is
// for i := 0; i < int(n); i++ {
// sum += y[iy] * x[ix]
// ix += incX
// iy += incY
// }
// return sum
func DotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float32)

View File

@ -1,4 +1,4 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
@ -55,3 +55,59 @@ func AxpyIncTo(dst []float32, incDst, idst uintptr, alpha float32, x, y []float3
idst += incDst
}
}
// DotUnitary is
// for i, v := range x {
// sum += y[i] * v
// }
// return sum
func DotUnitary(x, y []float32) (sum float32) {
for i, v := range x {
sum += y[i] * v
}
return sum
}
// DotInc is
// for i := 0; i < int(n); i++ {
// sum += y[iy] * x[ix]
// ix += incX
// iy += incY
// }
// return sum
func DotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float32) {
for i := 0; i < int(n); i++ {
sum += y[iy] * x[ix]
ix += incX
iy += incY
}
return sum
}
// DdotUnitary is
// for i, v := range x {
// sum += float64(y[i]) * float64(v)
// }
// return
func DdotUnitary(x, y []float32) (sum float64) {
for i, v := range x {
sum += float64(y[i]) * float64(v)
}
return
}
// DdotInc is
// for i := 0; i < int(n); i++ {
// sum += float64(y[iy]) * float64(x[ix])
// ix += incX
// iy += incY
// }
// return
func DdotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float64) {
for i := 0; i < int(n); i++ {
sum += float64(y[iy]) * float64(x[ix])
ix += incX
iy += incY
}
return
}

View File

@ -1,4 +1,4 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

View File

@ -1,4 +1,4 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

View File

@ -1,4 +1,4 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

View File

@ -1,4 +1,4 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

View File

@ -1,4 +1,4 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

View File

@ -1,4 +1,4 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//

View File

@ -1,4 +1,4 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//

View File

@ -1,4 +1,4 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//

View File

@ -1,4 +1,4 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//

View File

@ -1,4 +1,4 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

View File

@ -1,4 +1,4 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

View File

@ -1,4 +1,4 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

View File

@ -1,4 +1,4 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

View File

@ -1,6 +1,6 @@
// Copyright ©2017 The gonum Authors. All rights reserved.
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package f64 provides float64 vector primitives.
package f64
package f64 // import "gonum.org/v1/gonum/internal/asm/f64"

View File

@ -1,4 +1,4 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

View File

@ -1,4 +1,4 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//

View File

@ -1,4 +1,4 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

View File

@ -1,4 +1,4 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

View File

@ -1,4 +1,4 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

View File

@ -1,4 +1,4 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//

View File

@ -1,4 +1,4 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//

View File

@ -1,4 +1,4 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//

View File

@ -1,4 +1,4 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//

View File

@ -1,4 +1,4 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

View File

@ -1,4 +1,4 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

Some files were not shown because too many files have changed in this diff Show More