mirror of https://github.com/etcd-io/dbtester.git
vendor: update import paths to "gonum.org/v1"
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
This commit is contained in:
parent
f4955798bc
commit
e7294311a0
|
|
@ -99,70 +99,6 @@
|
|||
revision = "1e59b77b52bf8e4b449a57e6f79f21226d571845"
|
||||
source = "https://github.com/golang/protobuf"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/gonum/blas"
|
||||
packages = [
|
||||
".",
|
||||
"blas64",
|
||||
"native",
|
||||
"native/internal/math32"
|
||||
]
|
||||
revision = "37e82626499e1df7c54aeaba0959fd6e7e8dc1e4"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/gonum/floats"
|
||||
packages = ["."]
|
||||
revision = "f74b330d45c56584a6ea7a27f5c64ea2900631e9"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/gonum/internal"
|
||||
packages = [
|
||||
"asm/f32",
|
||||
"asm/f64"
|
||||
]
|
||||
revision = "e57e4534cf9b3b00ef6c0175f59d8d2d34f60914"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/gonum/lapack"
|
||||
packages = [
|
||||
".",
|
||||
"lapack64",
|
||||
"native"
|
||||
]
|
||||
revision = "5ed4b826becd1807e09377508f51756586d1a98c"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/gonum/matrix"
|
||||
packages = [
|
||||
".",
|
||||
"mat64"
|
||||
]
|
||||
revision = "dd6034299e4242c9f0ea36735e6d4264dfcb3f9f"
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/gonum/plot"
|
||||
packages = [
|
||||
".",
|
||||
"palette",
|
||||
"plotter",
|
||||
"plotutil",
|
||||
"tools/bezier",
|
||||
"vg",
|
||||
"vg/draw",
|
||||
"vg/fonts",
|
||||
"vg/vgeps",
|
||||
"vg/vgimg",
|
||||
"vg/vgpdf",
|
||||
"vg/vgsvg"
|
||||
]
|
||||
revision = "51b62dc5319d7fce41240d13e780a93e640b9a38"
|
||||
source = "https://github.com/gonum/plot"
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/googleapis/gax-go"
|
||||
packages = ["."]
|
||||
|
|
@ -344,6 +280,44 @@
|
|||
revision = "6dc17368e09b0e8634d71cac8168d853e869a0c7"
|
||||
source = "https://github.com/golang/time"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "gonum.org/v1/gonum"
|
||||
packages = [
|
||||
"blas",
|
||||
"blas/blas64",
|
||||
"blas/gonum",
|
||||
"floats",
|
||||
"internal/asm/c128",
|
||||
"internal/asm/f32",
|
||||
"internal/asm/f64",
|
||||
"internal/math32",
|
||||
"lapack",
|
||||
"lapack/gonum",
|
||||
"lapack/lapack64",
|
||||
"mat"
|
||||
]
|
||||
revision = "69fc04c7c31754cf196d3bcc70c3bc0eec9da1b7"
|
||||
|
||||
[[projects]]
|
||||
name = "gonum.org/v1/plot"
|
||||
packages = [
|
||||
".",
|
||||
"palette",
|
||||
"plotter",
|
||||
"plotutil",
|
||||
"tools/bezier",
|
||||
"vg",
|
||||
"vg/draw",
|
||||
"vg/fonts",
|
||||
"vg/vgeps",
|
||||
"vg/vgimg",
|
||||
"vg/vgpdf",
|
||||
"vg/vgsvg"
|
||||
]
|
||||
revision = "feab214a240f4312b98ab52baf662b55ff1ee377"
|
||||
source = "https://github.com/gonum/plot"
|
||||
|
||||
[[projects]]
|
||||
name = "google.golang.org/api"
|
||||
packages = [
|
||||
|
|
@ -421,6 +395,6 @@
|
|||
[solve-meta]
|
||||
analyzer-name = "dep"
|
||||
analyzer-version = 1
|
||||
inputs-digest = "057a961c21585211116e57f99b60e5bdd28cfe46411715ec4e13dbcf12ddd876"
|
||||
inputs-digest = "21abf25cf507cffab0266c5836fb8acc3cb14f96b0f3fba49e0c55b85fedf5e5"
|
||||
solver-name = "gps-cdcl"
|
||||
solver-version = 1
|
||||
|
|
|
|||
|
|
@ -88,9 +88,9 @@
|
|||
|
||||
|
||||
[[constraint]]
|
||||
name = "github.com/gonum/plot"
|
||||
name = "gonum.org/v1/plot"
|
||||
source = "https://github.com/gonum/plot"
|
||||
revision = "51b62dc5319d7fce41240d13e780a93e640b9a38"
|
||||
revision = "feab214a240f4312b98ab52baf662b55ff1ee377"
|
||||
|
||||
|
||||
[[constraint]]
|
||||
|
|
|
|||
|
|
@ -19,11 +19,11 @@ import (
|
|||
|
||||
"github.com/coreos/dbtester/dbtesterpb"
|
||||
|
||||
"github.com/gonum/plot"
|
||||
"github.com/gonum/plot/plotter"
|
||||
"github.com/gonum/plot/plotutil"
|
||||
"github.com/gonum/plot/vg"
|
||||
"github.com/gyuho/dataframe"
|
||||
"gonum.org/v1/plot"
|
||||
"gonum.org/v1/plot/plotter"
|
||||
"gonum.org/v1/plot/plotutil"
|
||||
"gonum.org/v1/plot/vg"
|
||||
)
|
||||
|
||||
var (
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ import (
|
|||
"image/color"
|
||||
"sort"
|
||||
|
||||
"github.com/gonum/plot/plotutil"
|
||||
"gonum.org/v1/plot/plotutil"
|
||||
)
|
||||
|
||||
// IsValidDatabaseID returns false if the database id is not supported.
|
||||
|
|
|
|||
|
|
@ -1,155 +0,0 @@
|
|||
// Copyright ©2014 The gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package native
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
)
|
||||
|
||||
func newGeneral64(r, c int) general64 {
|
||||
return general64{
|
||||
data: make([]float64, r*c),
|
||||
rows: r,
|
||||
cols: c,
|
||||
stride: c,
|
||||
}
|
||||
}
|
||||
|
||||
type general64 struct {
|
||||
data []float64
|
||||
rows, cols int
|
||||
stride int
|
||||
}
|
||||
|
||||
// adds element-wise into receiver. rows and columns must match
|
||||
func (g general64) add(h general64) {
|
||||
if debug {
|
||||
if g.rows != h.rows {
|
||||
panic("blas: row size mismatch")
|
||||
}
|
||||
if g.cols != h.cols {
|
||||
panic("blas: col size mismatch")
|
||||
}
|
||||
}
|
||||
for i := 0; i < g.rows; i++ {
|
||||
gtmp := g.data[i*g.stride : i*g.stride+g.cols]
|
||||
for j, v := range h.data[i*h.stride : i*h.stride+h.cols] {
|
||||
gtmp[j] += v
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// at returns the value at the ith row and jth column. For speed reasons, the
|
||||
// rows and columns are not bounds checked.
|
||||
func (g general64) at(i, j int) float64 {
|
||||
if debug {
|
||||
if i < 0 || i >= g.rows {
|
||||
panic("blas: row out of bounds")
|
||||
}
|
||||
if j < 0 || j >= g.cols {
|
||||
panic("blas: col out of bounds")
|
||||
}
|
||||
}
|
||||
return g.data[i*g.stride+j]
|
||||
}
|
||||
|
||||
func (g general64) check(c byte) error {
|
||||
if g.rows < 0 {
|
||||
return errors.New("blas: rows < 0")
|
||||
}
|
||||
if g.cols < 0 {
|
||||
return errors.New("blas: cols < 0")
|
||||
}
|
||||
if g.stride < 1 {
|
||||
return errors.New("blas: stride < 1")
|
||||
}
|
||||
if g.stride < g.cols {
|
||||
return errors.New("blas: illegal stride")
|
||||
}
|
||||
if (g.rows-1)*g.stride+g.cols > len(g.data) {
|
||||
return fmt.Errorf("blas: index of %c out of range", c)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (g general64) clone() general64 {
|
||||
data := make([]float64, len(g.data))
|
||||
copy(data, g.data)
|
||||
return general64{
|
||||
data: data,
|
||||
rows: g.rows,
|
||||
cols: g.cols,
|
||||
stride: g.stride,
|
||||
}
|
||||
}
|
||||
|
||||
// assumes they are the same size
|
||||
func (g general64) copy(h general64) {
|
||||
if debug {
|
||||
if g.rows != h.rows {
|
||||
panic("blas: row mismatch")
|
||||
}
|
||||
if g.cols != h.cols {
|
||||
panic("blas: col mismatch")
|
||||
}
|
||||
}
|
||||
for k := 0; k < g.rows; k++ {
|
||||
copy(g.data[k*g.stride:(k+1)*g.stride], h.data[k*h.stride:(k+1)*h.stride])
|
||||
}
|
||||
}
|
||||
|
||||
func (g general64) equal(a general64) bool {
|
||||
if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
|
||||
return false
|
||||
}
|
||||
for i, v := range g.data {
|
||||
if a.data[i] != v {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
/*
|
||||
// print is to aid debugging. Commented out to avoid fmt import
|
||||
func (g general64) print() {
|
||||
fmt.Println("r = ", g.rows, "c = ", g.cols, "stride: ", g.stride)
|
||||
for i := 0; i < g.rows; i++ {
|
||||
fmt.Println(g.data[i*g.stride : (i+1)*g.stride])
|
||||
}
|
||||
|
||||
}
|
||||
*/
|
||||
|
||||
func (g general64) view(i, j, r, c int) general64 {
|
||||
if debug {
|
||||
if i < 0 || i+r > g.rows {
|
||||
panic("blas: row out of bounds")
|
||||
}
|
||||
if j < 0 || j+c > g.cols {
|
||||
panic("blas: col out of bounds")
|
||||
}
|
||||
}
|
||||
return general64{
|
||||
data: g.data[i*g.stride+j : (i+r-1)*g.stride+j+c],
|
||||
rows: r,
|
||||
cols: c,
|
||||
stride: g.stride,
|
||||
}
|
||||
}
|
||||
|
||||
func (g general64) equalWithinAbs(a general64, tol float64) bool {
|
||||
if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
|
||||
return false
|
||||
}
|
||||
for i, v := range g.data {
|
||||
if math.Abs(a.data[i]-v) > tol {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
|
@ -1,157 +0,0 @@
|
|||
// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
|
||||
|
||||
// Copyright ©2014 The gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package native
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
math "github.com/gonum/blas/native/internal/math32"
|
||||
)
|
||||
|
||||
func newGeneral32(r, c int) general32 {
|
||||
return general32{
|
||||
data: make([]float32, r*c),
|
||||
rows: r,
|
||||
cols: c,
|
||||
stride: c,
|
||||
}
|
||||
}
|
||||
|
||||
type general32 struct {
|
||||
data []float32
|
||||
rows, cols int
|
||||
stride int
|
||||
}
|
||||
|
||||
// adds element-wise into receiver. rows and columns must match
|
||||
func (g general32) add(h general32) {
|
||||
if debug {
|
||||
if g.rows != h.rows {
|
||||
panic("blas: row size mismatch")
|
||||
}
|
||||
if g.cols != h.cols {
|
||||
panic("blas: col size mismatch")
|
||||
}
|
||||
}
|
||||
for i := 0; i < g.rows; i++ {
|
||||
gtmp := g.data[i*g.stride : i*g.stride+g.cols]
|
||||
for j, v := range h.data[i*h.stride : i*h.stride+h.cols] {
|
||||
gtmp[j] += v
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// at returns the value at the ith row and jth column. For speed reasons, the
|
||||
// rows and columns are not bounds checked.
|
||||
func (g general32) at(i, j int) float32 {
|
||||
if debug {
|
||||
if i < 0 || i >= g.rows {
|
||||
panic("blas: row out of bounds")
|
||||
}
|
||||
if j < 0 || j >= g.cols {
|
||||
panic("blas: col out of bounds")
|
||||
}
|
||||
}
|
||||
return g.data[i*g.stride+j]
|
||||
}
|
||||
|
||||
func (g general32) check(c byte) error {
|
||||
if g.rows < 0 {
|
||||
return errors.New("blas: rows < 0")
|
||||
}
|
||||
if g.cols < 0 {
|
||||
return errors.New("blas: cols < 0")
|
||||
}
|
||||
if g.stride < 1 {
|
||||
return errors.New("blas: stride < 1")
|
||||
}
|
||||
if g.stride < g.cols {
|
||||
return errors.New("blas: illegal stride")
|
||||
}
|
||||
if (g.rows-1)*g.stride+g.cols > len(g.data) {
|
||||
return fmt.Errorf("blas: index of %c out of range", c)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (g general32) clone() general32 {
|
||||
data := make([]float32, len(g.data))
|
||||
copy(data, g.data)
|
||||
return general32{
|
||||
data: data,
|
||||
rows: g.rows,
|
||||
cols: g.cols,
|
||||
stride: g.stride,
|
||||
}
|
||||
}
|
||||
|
||||
// assumes they are the same size
|
||||
func (g general32) copy(h general32) {
|
||||
if debug {
|
||||
if g.rows != h.rows {
|
||||
panic("blas: row mismatch")
|
||||
}
|
||||
if g.cols != h.cols {
|
||||
panic("blas: col mismatch")
|
||||
}
|
||||
}
|
||||
for k := 0; k < g.rows; k++ {
|
||||
copy(g.data[k*g.stride:(k+1)*g.stride], h.data[k*h.stride:(k+1)*h.stride])
|
||||
}
|
||||
}
|
||||
|
||||
func (g general32) equal(a general32) bool {
|
||||
if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
|
||||
return false
|
||||
}
|
||||
for i, v := range g.data {
|
||||
if a.data[i] != v {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
/*
|
||||
// print is to aid debugging. Commented out to avoid fmt import
|
||||
func (g general32) print() {
|
||||
fmt.Println("r = ", g.rows, "c = ", g.cols, "stride: ", g.stride)
|
||||
for i := 0; i < g.rows; i++ {
|
||||
fmt.Println(g.data[i*g.stride : (i+1)*g.stride])
|
||||
}
|
||||
|
||||
}
|
||||
*/
|
||||
|
||||
func (g general32) view(i, j, r, c int) general32 {
|
||||
if debug {
|
||||
if i < 0 || i+r > g.rows {
|
||||
panic("blas: row out of bounds")
|
||||
}
|
||||
if j < 0 || j+c > g.cols {
|
||||
panic("blas: col out of bounds")
|
||||
}
|
||||
}
|
||||
return general32{
|
||||
data: g.data[i*g.stride+j : (i+r-1)*g.stride+j+c],
|
||||
rows: r,
|
||||
cols: c,
|
||||
stride: g.stride,
|
||||
}
|
||||
}
|
||||
|
||||
func (g general32) equalWithinAbs(a general32, tol float32) bool {
|
||||
if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
|
||||
return false
|
||||
}
|
||||
for i, v := range g.data {
|
||||
if math.Abs(a.data[i]-v) > tol {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
|
@ -1,33 +0,0 @@
|
|||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package f32
|
||||
|
||||
// DdotUnitary is
|
||||
// for i, v := range x {
|
||||
// sum += float64(y[i]) * float64(v)
|
||||
// }
|
||||
// return
|
||||
func DdotUnitary(x, y []float32) (sum float64) {
|
||||
for i, v := range x {
|
||||
sum += float64(y[i]) * float64(v)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DdotInc is
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// sum += float64(y[iy]) * float64(x[ix])
|
||||
// ix += incX
|
||||
// iy += incY
|
||||
// }
|
||||
// return
|
||||
func DdotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float64) {
|
||||
for i := 0; i < int(n); i++ {
|
||||
sum += float64(y[iy]) * float64(x[ix])
|
||||
ix += incX
|
||||
iy += incY
|
||||
}
|
||||
return
|
||||
}
|
||||
|
|
@ -1,33 +0,0 @@
|
|||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package f32
|
||||
|
||||
// DotUnitary is
|
||||
// for i, v := range x {
|
||||
// sum += y[i] * v
|
||||
// }
|
||||
// return sum
|
||||
func DotUnitary(x, y []float32) (sum float32) {
|
||||
for i, v := range x {
|
||||
sum += y[i] * v
|
||||
}
|
||||
return sum
|
||||
}
|
||||
|
||||
// DotInc is
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// sum += y[iy] * x[ix]
|
||||
// ix += incX
|
||||
// iy += incY
|
||||
// }
|
||||
// return sum
|
||||
func DotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float32) {
|
||||
for i := 0; i < int(n); i++ {
|
||||
sum += y[iy] * x[ix]
|
||||
ix += incX
|
||||
iy += incY
|
||||
}
|
||||
return sum
|
||||
}
|
||||
|
|
@ -1,103 +0,0 @@
|
|||
// Generated by running
|
||||
// go generate github.com/gonum/matrix
|
||||
// DO NOT EDIT.
|
||||
|
||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package matrix provides common error handling mechanisms for matrix operations
|
||||
// in mat64 and cmat128.
|
||||
//
|
||||
// Overview
|
||||
//
|
||||
// This section provides a quick overview of the matrix package. The following
|
||||
// sections provide more in depth commentary.
|
||||
//
|
||||
// matrix provides:
|
||||
// - Error type definitions
|
||||
// - Error recovery mechanisms
|
||||
// - Common constants used by mat64 and cmat128
|
||||
//
|
||||
// Errors
|
||||
//
|
||||
// The mat64 and cmat128 matrix packages share a common set of errors
|
||||
// provided by matrix via the matrix.Error type.
|
||||
//
|
||||
// Errors are either returned directly or used as the parameter of a panic
|
||||
// depending on the class of error encountered. Returned errors indicate
|
||||
// that a call was not able to complete successfully while panics generally
|
||||
// indicate a programmer or unrecoverable error.
|
||||
//
|
||||
// Examples of each type are found in the mat64 Solve methods, which find
|
||||
// x such that A*x = b.
|
||||
//
|
||||
// An error value is returned from the function or method when the operation
|
||||
// can meaningfully fail. The Solve operation cannot complete if A is
|
||||
// singular. However, determining the singularity of A is most easily
|
||||
// discovered during the Solve procedure itself and is a valid result from
|
||||
// the operation, so in this case an error is returned.
|
||||
//
|
||||
// A function will panic when the input parameters are inappropriate for
|
||||
// the function. In Solve, for example, the number of rows of each input
|
||||
// matrix must be equal because of the rules of matrix multiplication.
|
||||
// Similarly, for solving A*x = b, a non-zero receiver must have the same
|
||||
// number of rows as A has columns and must have the same number of columns
|
||||
// as b. In all cases where a function will panic, conditions that would
|
||||
// lead to a panic can easily be checked prior to a call.
|
||||
//
|
||||
// Error Recovery
|
||||
//
|
||||
// When a matrix.Error is the parameter of a panic, the panic can be
|
||||
// recovered by a Maybe function, which will then return the error.
|
||||
// Panics that are not of type matrix.Error are re-panicked by the
|
||||
// Maybe functions.
|
||||
//
|
||||
// Invariants
|
||||
//
|
||||
// Matrix input arguments to functions are never directly modified. If an operation
|
||||
// changes Matrix data, the mutated matrix will be the receiver of a function.
|
||||
//
|
||||
// For convenience, a matrix may be used as both a receiver and as an input, e.g.
|
||||
// a.Pow(a, 6)
|
||||
// v.SolveVec(a.T(), v)
|
||||
// though in many cases this will cause an allocation (see Element Aliasing).
|
||||
// An exception to this rule is Copy, which does not allow a.Copy(a.T()).
|
||||
//
|
||||
// Element Aliasing
|
||||
//
|
||||
// Most methods in the matrix packages modify receiver data. It is forbidden for the modified
|
||||
// data region of the receiver to overlap the used data area of the input
|
||||
// arguments. The exception to this rule is when the method receiver is equal to one
|
||||
// of the input arguments, as in the a.Pow(a, 6) call above, or its implicit transpose.
|
||||
//
|
||||
// This prohibition is to help avoid subtle mistakes when the method needs to read
|
||||
// from and write to the same data region. There are ways to make mistakes using the
|
||||
// matrix API, and matrix functions will detect and complain about those.
|
||||
// There are many ways to make mistakes by excursion from the matrix API via
|
||||
// interaction with raw matrix values.
|
||||
//
|
||||
// If you need to read the rest of this section to understand the behavior of
|
||||
// your program, you are being clever. Don't be clever. If you must be clever,
|
||||
// blas64/cblas128 and lapack64/clapack128 may be used to call the behavior directly.
|
||||
//
|
||||
// The matrix packages will use the following rules to detect overlap between the receiver and one
|
||||
// of the inputs:
|
||||
// - the input implements one of the Raw methods, and
|
||||
// - the Raw type matches that of the receiver or
|
||||
// one is a RawMatrixer and the other is a RawVectorer, and
|
||||
// - the address ranges of the backing data slices overlap, and
|
||||
// - the strides differ or there is an overlap in the used data elements.
|
||||
// If such an overlap is detected, the method will panic.
|
||||
//
|
||||
// The following cases will not panic:
|
||||
// - the data slices do not overlap,
|
||||
// - there is pointer identity between the receiver and input values after
|
||||
// the value has been untransposed if necessary.
|
||||
//
|
||||
// The matrix packages will not attempt to detect element overlap if the input does not implement a
|
||||
// Raw method, or if the Raw method differs from that of the receiver except when a
|
||||
// conversion has occurred through a matrix API function. Method behavior is undefined
|
||||
// if there is undetected overlap.
|
||||
//
|
||||
package matrix
|
||||
|
|
@ -1,343 +0,0 @@
|
|||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build ignore
|
||||
|
||||
// gendoc creates the matrix, mat64 and cmat128 package doc comments.
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"text/template"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
var docs = template.Must(template.New("docs").Funcs(funcs).Parse(`{{define "common"}}// Generated by running
|
||||
// go generate github.com/gonum/matrix
|
||||
// DO NOT EDIT.
|
||||
|
||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package {{.Name}} provides {{.Provides}}
|
||||
//
|
||||
// Overview
|
||||
//
|
||||
// This section provides a quick overview of the {{.Name}} package. The following
|
||||
// sections provide more in depth commentary.
|
||||
//
|
||||
{{.Overview}}
|
||||
//{{end}}
|
||||
{{define "interfaces"}}// The Matrix Interfaces
|
||||
//
|
||||
// The Matrix interface is the common link between the concrete types. The Matrix
|
||||
// interface is defined by three functions: Dims, which returns the dimensions
|
||||
// of the Matrix, At, which returns the element in the specified location, and
|
||||
// T for returning a Transpose (discussed later). All of the concrete types can
|
||||
// perform these behaviors and so implement the interface. Methods and functions
|
||||
// are designed to use this interface, so in particular the method
|
||||
// func (m *Dense) Mul(a, b Matrix)
|
||||
// constructs a *Dense from the result of a multiplication with any Matrix types,
|
||||
// not just *Dense. Where more restrictive requirements must be met, there are also the
|
||||
// Symmetric and Triangular interfaces. For example, in
|
||||
// func (s *SymDense) AddSym(a, b Symmetric)
|
||||
// the Symmetric interface guarantees a symmetric result.
|
||||
//
|
||||
// Transposes
|
||||
//
|
||||
// The T method is used for transposition. For example, c.Mul(a.T(), b) computes
|
||||
// c = a^T * b. The {{if .ExamplePackage}}{{.ExamplePackage}}{{else}}{{.Name}}{{end}} types implement this method using an implicit transpose —
|
||||
// see the Transpose type for more details. Note that some operations have a
|
||||
// transpose as part of their definition, as in *SymDense.SymOuterK.
|
||||
//{{end}}
|
||||
{{define "factorization"}}// Matrix Factorization
|
||||
//
|
||||
// Matrix factorizations, such as the LU decomposition, typically have their own
|
||||
// specific data storage, and so are each implemented as a specific type. The
|
||||
// factorization can be computed through a call to Factorize
|
||||
// var lu {{if .ExamplePackage}}{{.ExamplePackage}}{{else}}{{.Name}}{{end}}.LU
|
||||
// lu.Factorize(a)
|
||||
// The elements of the factorization can be extracted through methods on the
|
||||
// appropriate type, i.e. *TriDense.LFromLU and *TriDense.UFromLU. Alternatively,
|
||||
// they can be used directly, as in *Dense.SolveLU. Some factorizations can be
|
||||
// updated directly, without needing to update the original matrix and refactorize,
|
||||
// as in *LU.RankOne.
|
||||
//{{end}}
|
||||
{{define "blas"}}// BLAS and LAPACK
|
||||
//
|
||||
// BLAS and LAPACK are the standard APIs for linear algebra routines. Many
|
||||
// operations in {{if .Description}}{{.Description}}{{else}}{{.Name}}{{end}} are implemented using calls to the wrapper functions
|
||||
// in gonum/blas/{{.BLAS|alts}} and gonum/lapack/{{.LAPACK|alts}}. By default, {{.BLAS|join "/"}} and
|
||||
// {{.LAPACK|join "/"}} call the native Go implementations of the routines. Alternatively,
|
||||
// it is possible to use C-based implementations of the APIs through the respective
|
||||
// cgo packages and "Use" functions. The Go implementation of LAPACK makes calls
|
||||
// through {{.BLAS|join "/"}}, so if a cgo BLAS implementation is registered, the {{.LAPACK|join "/"}}
|
||||
// calls will be partially executed in Go and partially executed in C.
|
||||
//{{end}}
|
||||
{{define "switching"}}// Type Switching
|
||||
//
|
||||
// The Matrix abstraction enables efficiency as well as interoperability. Go's
|
||||
// type reflection capabilities are used to choose the most efficient routine
|
||||
// given the specific concrete types. For example, in
|
||||
// c.Mul(a, b)
|
||||
// if a and b both implement RawMatrixer, that is, they can be represented as a
|
||||
// {{.BLAS|alts}}.General, {{.BLAS|alts}}.Gemm (general matrix multiplication) is called, while
|
||||
// instead if b is a RawSymmetricer {{.BLAS|alts}}.Symm is used (general-symmetric
|
||||
// multiplication), and if b is a *Vector {{.BLAS|alts}}.Gemv is used.
|
||||
//
|
||||
// There are many possible type combinations and special cases. No specific guarantees
|
||||
// are made about the performance of any method, and in particular, note that an
|
||||
// abstract matrix type may be copied into a concrete type of the corresponding
|
||||
// value. If there are specific special cases that are needed, please submit a
|
||||
// pull-request or file an issue.
|
||||
//{{end}}
|
||||
{{define "invariants"}}// Invariants
|
||||
//
|
||||
// Matrix input arguments to functions are never directly modified. If an operation
|
||||
// changes Matrix data, the mutated matrix will be the receiver of a function.
|
||||
//
|
||||
// For convenience, a matrix may be used as both a receiver and as an input, e.g.
|
||||
// a.Pow(a, 6)
|
||||
// v.SolveVec(a.T(), v)
|
||||
// though in many cases this will cause an allocation (see Element Aliasing).
|
||||
// An exception to this rule is Copy, which does not allow a.Copy(a.T()).
|
||||
//{{end}}
|
||||
{{define "aliasing"}}// Element Aliasing
|
||||
//
|
||||
// Most methods in {{if .Description}}{{.Description}}{{else}}{{.Name}}{{end}} modify receiver data. It is forbidden for the modified
|
||||
// data region of the receiver to overlap the used data area of the input
|
||||
// arguments. The exception to this rule is when the method receiver is equal to one
|
||||
// of the input arguments, as in the a.Pow(a, 6) call above, or its implicit transpose.
|
||||
//
|
||||
// This prohibition is to help avoid subtle mistakes when the method needs to read
|
||||
// from and write to the same data region. There are ways to make mistakes using the
|
||||
// {{.Name}} API, and {{.Name}} functions will detect and complain about those.
|
||||
// There are many ways to make mistakes by excursion from the {{.Name}} API via
|
||||
// interaction with raw matrix values.
|
||||
//
|
||||
// If you need to read the rest of this section to understand the behavior of
|
||||
// your program, you are being clever. Don't be clever. If you must be clever,
|
||||
// {{.BLAS|join "/"}} and {{.LAPACK|join "/"}} may be used to call the behavior directly.
|
||||
//
|
||||
// {{if .Description}}{{.Description|sentence}}{{else}}{{.Name}}{{end}} will use the following rules to detect overlap between the receiver and one
|
||||
// of the inputs:
|
||||
// - the input implements one of the Raw methods, and
|
||||
// - the Raw type matches that of the receiver or
|
||||
// one is a RawMatrixer and the other is a RawVectorer, and
|
||||
// - the address ranges of the backing data slices overlap, and
|
||||
// - the strides differ or there is an overlap in the used data elements.
|
||||
// If such an overlap is detected, the method will panic.
|
||||
//
|
||||
// The following cases will not panic:
|
||||
// - the data slices do not overlap,
|
||||
// - there is pointer identity between the receiver and input values after
|
||||
// the value has been untransposed if necessary.
|
||||
//
|
||||
// {{if .Description}}{{.Description|sentence}}{{else}}{{.Name}}{{end}} will not attempt to detect element overlap if the input does not implement a
|
||||
// Raw method, or if the Raw method differs from that of the receiver except when a
|
||||
// conversion has occurred through a {{.Name}} API function. Method behavior is undefined
|
||||
// if there is undetected overlap.
|
||||
//{{end}}`))
|
||||
|
||||
type Package struct {
|
||||
path string
|
||||
|
||||
Name string
|
||||
Provides string
|
||||
Description string
|
||||
ExamplePackage string
|
||||
Overview string
|
||||
|
||||
BLAS []string
|
||||
LAPACK []string
|
||||
|
||||
template string
|
||||
}
|
||||
|
||||
var pkgs = []Package{
|
||||
{
|
||||
path: ".",
|
||||
|
||||
Name: "matrix",
|
||||
Description: "the matrix packages",
|
||||
Provides: `common error handling mechanisms for matrix operations
|
||||
// in mat64 and cmat128.`,
|
||||
ExamplePackage: "mat64",
|
||||
|
||||
Overview: `// matrix provides:
|
||||
// - Error type definitions
|
||||
// - Error recovery mechanisms
|
||||
// - Common constants used by mat64 and cmat128
|
||||
//
|
||||
// Errors
|
||||
//
|
||||
// The mat64 and cmat128 matrix packages share a common set of errors
|
||||
// provided by matrix via the matrix.Error type.
|
||||
//
|
||||
// Errors are either returned directly or used as the parameter of a panic
|
||||
// depending on the class of error encountered. Returned errors indicate
|
||||
// that a call was not able to complete successfully while panics generally
|
||||
// indicate a programmer or unrecoverable error.
|
||||
//
|
||||
// Examples of each type are found in the mat64 Solve methods, which find
|
||||
// x such that A*x = b.
|
||||
//
|
||||
// An error value is returned from the function or method when the operation
|
||||
// can meaningfully fail. The Solve operation cannot complete if A is
|
||||
// singular. However, determining the singularity of A is most easily
|
||||
// discovered during the Solve procedure itself and is a valid result from
|
||||
// the operation, so in this case an error is returned.
|
||||
//
|
||||
// A function will panic when the input parameters are inappropriate for
|
||||
// the function. In Solve, for example, the number of rows of each input
|
||||
// matrix must be equal because of the rules of matrix multiplication.
|
||||
// Similarly, for solving A*x = b, a non-zero receiver must have the same
|
||||
// number of rows as A has columns and must have the same number of columns
|
||||
// as b. In all cases where a function will panic, conditions that would
|
||||
// lead to a panic can easily be checked prior to a call.
|
||||
//
|
||||
// Error Recovery
|
||||
//
|
||||
// When a matrix.Error is the parameter of a panic, the panic can be
|
||||
// recovered by a Maybe function, which will then return the error.
|
||||
// Panics that are not of type matrix.Error are re-panicked by the
|
||||
// Maybe functions.`,
|
||||
BLAS: []string{"blas64", "cblas128"},
|
||||
LAPACK: []string{"lapack64", "clapack128"},
|
||||
|
||||
template: `{{template "common" .}}
|
||||
{{template "invariants" .}}
|
||||
{{template "aliasing" .}}
|
||||
package {{.Name}}
|
||||
`,
|
||||
},
|
||||
{
|
||||
path: "mat64",
|
||||
|
||||
Name: "mat64",
|
||||
Provides: `implementations of float64 matrix structures and
|
||||
// linear algebra operations on them.`,
|
||||
|
||||
Overview: `// mat64 provides:
|
||||
// - Interfaces for Matrix classes (Matrix, Symmetric, Triangular)
|
||||
// - Concrete implementations (Dense, SymDense, TriDense)
|
||||
// - Methods and functions for using matrix data (Add, Trace, SymRankOne)
|
||||
// - Types for constructing and using matrix factorizations (QR, LU)
|
||||
//
|
||||
// A matrix may be constructed through the corresponding New function. If no
|
||||
// backing array is provided the matrix will be initialized to all zeros.
|
||||
// // Allocate a zeroed matrix of size 3×5
|
||||
// zero := mat64.NewDense(3, 5, nil)
|
||||
// If a backing data slice is provided, the matrix will have those elements.
|
||||
// Matrices are all stored in row-major format.
|
||||
// // Generate a 6×6 matrix of random values.
|
||||
// data := make([]float64, 36)
|
||||
// for i := range data {
|
||||
// data[i] = rand.NormFloat64()
|
||||
// }
|
||||
// a := mat64.NewDense(6, 6, data)
|
||||
//
|
||||
// Operations involving matrix data are implemented as functions when the values
|
||||
// of the matrix remain unchanged
|
||||
// tr := mat64.Trace(a)
|
||||
// and are implemented as methods when the operation modifies the receiver.
|
||||
// zero.Copy(a)
|
||||
//
|
||||
// Receivers must be the correct size for the matrix operations, otherwise the
|
||||
// operation will panic. As a special case for convenience, a zero-sized matrix
|
||||
// will be modified to have the correct size, allocating data if necessary.
|
||||
// var c mat64.Dense // construct a new zero-sized matrix
|
||||
// c.Mul(a, a) // c is automatically adjusted to be 6×6`,
|
||||
|
||||
BLAS: []string{"blas64"},
|
||||
LAPACK: []string{"lapack64"},
|
||||
|
||||
template: `{{template "common" .}}
|
||||
{{template "interfaces" .}}
|
||||
{{template "factorization" .}}
|
||||
{{template "blas" .}}
|
||||
{{template "switching" .}}
|
||||
{{template "invariants" .}}
|
||||
{{template "aliasing" .}}
|
||||
package {{.Name}}
|
||||
`,
|
||||
},
|
||||
{
|
||||
path: "cmat128",
|
||||
|
||||
Name: "cmat128",
|
||||
Provides: `implementations of complex128 matrix structures and
|
||||
// linear algebra operations on them.`,
|
||||
|
||||
Overview: `// cmat128 provides:
|
||||
// - Interfaces for a complex Matrix`,
|
||||
|
||||
BLAS: []string{"cblas128"},
|
||||
LAPACK: []string{"clapack128"},
|
||||
|
||||
template: `{{template "common" . }}
|
||||
{{template "blas" .}}
|
||||
{{template "switching" .}}
|
||||
{{template "invariants" .}}
|
||||
{{template "aliasing" .}}
|
||||
package {{.Name}}
|
||||
`,
|
||||
},
|
||||
}
|
||||
|
||||
var funcs = template.FuncMap{
|
||||
"sentence": sentence,
|
||||
"alts": alts,
|
||||
"join": join,
|
||||
}
|
||||
|
||||
// sentence converts a string to sentence case where the string is the prefix of the sentence.
|
||||
func sentence(s string) string {
|
||||
if len(s) == 0 {
|
||||
return ""
|
||||
}
|
||||
_, size := utf8.DecodeRune([]byte(s))
|
||||
return strings.ToUpper(s[:size]) + s[size:]
|
||||
}
|
||||
|
||||
// alts renders a []string as a glob alternatives list.
|
||||
func alts(s []string) string {
|
||||
switch len(s) {
|
||||
case 0:
|
||||
return ""
|
||||
case 1:
|
||||
return s[0]
|
||||
default:
|
||||
return fmt.Sprintf("{%s}", strings.Join(s, ","))
|
||||
}
|
||||
}
|
||||
|
||||
// join is strings.Join with the parameter order changed.
|
||||
func join(sep string, s []string) string {
|
||||
return strings.Join(s, sep)
|
||||
}
|
||||
|
||||
func main() {
|
||||
for _, pkg := range pkgs {
|
||||
t, err := template.Must(docs.Clone()).Parse(pkg.template)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to parse template: %v", err)
|
||||
}
|
||||
file := filepath.Join(pkg.path, "doc.go")
|
||||
f, err := os.Create(file)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to create %q: %v", file, err)
|
||||
}
|
||||
err = t.Execute(f, pkg)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to execute template: %v", err)
|
||||
}
|
||||
f.Close()
|
||||
}
|
||||
}
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run gendoc.go
|
||||
|
||||
package matrix
|
||||
|
|
@ -1,145 +0,0 @@
|
|||
// Copyright ©2014 The gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// This file must be kept in sync with index_no_bound_checks.go.
|
||||
|
||||
//+build bounds
|
||||
|
||||
package mat64
|
||||
|
||||
import "github.com/gonum/matrix"
|
||||
|
||||
// At returns the element at row i, column j.
|
||||
func (m *Dense) At(i, j int) float64 {
|
||||
return m.at(i, j)
|
||||
}
|
||||
|
||||
func (m *Dense) at(i, j int) float64 {
|
||||
if uint(i) >= uint(m.mat.Rows) {
|
||||
panic(matrix.ErrRowAccess)
|
||||
}
|
||||
if uint(j) >= uint(m.mat.Cols) {
|
||||
panic(matrix.ErrColAccess)
|
||||
}
|
||||
return m.mat.Data[i*m.mat.Stride+j]
|
||||
}
|
||||
|
||||
// Set sets the element at row i, column j to the value v.
|
||||
func (m *Dense) Set(i, j int, v float64) {
|
||||
m.set(i, j, v)
|
||||
}
|
||||
|
||||
func (m *Dense) set(i, j int, v float64) {
|
||||
if uint(i) >= uint(m.mat.Rows) {
|
||||
panic(matrix.ErrRowAccess)
|
||||
}
|
||||
if uint(j) >= uint(m.mat.Cols) {
|
||||
panic(matrix.ErrColAccess)
|
||||
}
|
||||
m.mat.Data[i*m.mat.Stride+j] = v
|
||||
}
|
||||
|
||||
// At returns the element at row i.
|
||||
// It panics if i is out of bounds or if j is not zero.
|
||||
func (v *Vector) At(i, j int) float64 {
|
||||
if j != 0 {
|
||||
panic(matrix.ErrColAccess)
|
||||
}
|
||||
return v.at(i)
|
||||
}
|
||||
|
||||
func (v *Vector) at(i int) float64 {
|
||||
if uint(i) >= uint(v.n) {
|
||||
panic(matrix.ErrRowAccess)
|
||||
}
|
||||
return v.mat.Data[i*v.mat.Inc]
|
||||
}
|
||||
|
||||
// SetVec sets the element at row i to the value val.
|
||||
// It panics if i is out of bounds.
|
||||
func (v *Vector) SetVec(i int, val float64) {
|
||||
v.setVec(i, val)
|
||||
}
|
||||
|
||||
func (v *Vector) setVec(i int, val float64) {
|
||||
if uint(i) >= uint(v.n) {
|
||||
panic(matrix.ErrVectorAccess)
|
||||
}
|
||||
v.mat.Data[i*v.mat.Inc] = val
|
||||
}
|
||||
|
||||
// At returns the element at row i and column j.
|
||||
func (t *SymDense) At(i, j int) float64 {
|
||||
return t.at(i, j)
|
||||
}
|
||||
|
||||
func (t *SymDense) at(i, j int) float64 {
|
||||
if uint(i) >= uint(t.mat.N) {
|
||||
panic(matrix.ErrRowAccess)
|
||||
}
|
||||
if uint(j) >= uint(t.mat.N) {
|
||||
panic(matrix.ErrColAccess)
|
||||
}
|
||||
if i > j {
|
||||
i, j = j, i
|
||||
}
|
||||
return t.mat.Data[i*t.mat.Stride+j]
|
||||
}
|
||||
|
||||
// SetSym sets the elements at (i,j) and (j,i) to the value v.
|
||||
func (t *SymDense) SetSym(i, j int, v float64) {
|
||||
t.set(i, j, v)
|
||||
}
|
||||
|
||||
func (t *SymDense) set(i, j int, v float64) {
|
||||
if uint(i) >= uint(t.mat.N) {
|
||||
panic(matrix.ErrRowAccess)
|
||||
}
|
||||
if uint(j) >= uint(t.mat.N) {
|
||||
panic(matrix.ErrColAccess)
|
||||
}
|
||||
if i > j {
|
||||
i, j = j, i
|
||||
}
|
||||
t.mat.Data[i*t.mat.Stride+j] = v
|
||||
}
|
||||
|
||||
// At returns the element at row i, column j.
|
||||
func (t *TriDense) At(i, j int) float64 {
|
||||
return t.at(i, j)
|
||||
}
|
||||
|
||||
func (t *TriDense) at(i, j int) float64 {
|
||||
if uint(i) >= uint(t.mat.N) {
|
||||
panic(matrix.ErrRowAccess)
|
||||
}
|
||||
if uint(j) >= uint(t.mat.N) {
|
||||
panic(matrix.ErrColAccess)
|
||||
}
|
||||
isUpper := t.isUpper()
|
||||
if (isUpper && i > j) || (!isUpper && i < j) {
|
||||
return 0
|
||||
}
|
||||
return t.mat.Data[i*t.mat.Stride+j]
|
||||
}
|
||||
|
||||
// SetTri sets the element of the triangular matrix at row i, column j to the value v.
|
||||
// It panics if the location is outside the appropriate half of the matrix.
|
||||
func (t *TriDense) SetTri(i, j int, v float64) {
|
||||
t.set(i, j, v)
|
||||
}
|
||||
|
||||
func (t *TriDense) set(i, j int, v float64) {
|
||||
if uint(i) >= uint(t.mat.N) {
|
||||
panic(matrix.ErrRowAccess)
|
||||
}
|
||||
if uint(j) >= uint(t.mat.N) {
|
||||
panic(matrix.ErrColAccess)
|
||||
}
|
||||
isUpper := t.isUpper()
|
||||
if (isUpper && i > j) || (!isUpper && i < j) {
|
||||
panic(matrix.ErrTriangleSet)
|
||||
}
|
||||
t.mat.Data[i*t.mat.Stride+j] = v
|
||||
}
|
||||
|
|
@ -1,145 +0,0 @@
|
|||
// Copyright ©2014 The gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// This file must be kept in sync with index_bound_checks.go.
|
||||
|
||||
//+build !bounds
|
||||
|
||||
package mat64
|
||||
|
||||
import "github.com/gonum/matrix"
|
||||
|
||||
// At returns the element at row i, column j.
|
||||
func (m *Dense) At(i, j int) float64 {
|
||||
if uint(i) >= uint(m.mat.Rows) {
|
||||
panic(matrix.ErrRowAccess)
|
||||
}
|
||||
if uint(j) >= uint(m.mat.Cols) {
|
||||
panic(matrix.ErrColAccess)
|
||||
}
|
||||
return m.at(i, j)
|
||||
}
|
||||
|
||||
func (m *Dense) at(i, j int) float64 {
|
||||
return m.mat.Data[i*m.mat.Stride+j]
|
||||
}
|
||||
|
||||
// Set sets the element at row i, column j to the value v.
|
||||
func (m *Dense) Set(i, j int, v float64) {
|
||||
if uint(i) >= uint(m.mat.Rows) {
|
||||
panic(matrix.ErrRowAccess)
|
||||
}
|
||||
if uint(j) >= uint(m.mat.Cols) {
|
||||
panic(matrix.ErrColAccess)
|
||||
}
|
||||
m.set(i, j, v)
|
||||
}
|
||||
|
||||
func (m *Dense) set(i, j int, v float64) {
|
||||
m.mat.Data[i*m.mat.Stride+j] = v
|
||||
}
|
||||
|
||||
// At returns the element at row i.
|
||||
// It panics if i is out of bounds or if j is not zero.
|
||||
func (v *Vector) At(i, j int) float64 {
|
||||
if uint(i) >= uint(v.n) {
|
||||
panic(matrix.ErrRowAccess)
|
||||
}
|
||||
if j != 0 {
|
||||
panic(matrix.ErrColAccess)
|
||||
}
|
||||
return v.at(i)
|
||||
}
|
||||
|
||||
func (v *Vector) at(i int) float64 {
|
||||
return v.mat.Data[i*v.mat.Inc]
|
||||
}
|
||||
|
||||
// SetVec sets the element at row i to the value val.
|
||||
// It panics if i is out of bounds.
|
||||
func (v *Vector) SetVec(i int, val float64) {
|
||||
if uint(i) >= uint(v.n) {
|
||||
panic(matrix.ErrVectorAccess)
|
||||
}
|
||||
v.setVec(i, val)
|
||||
}
|
||||
|
||||
func (v *Vector) setVec(i int, val float64) {
|
||||
v.mat.Data[i*v.mat.Inc] = val
|
||||
}
|
||||
|
||||
// At returns the element at row i and column j.
|
||||
func (s *SymDense) At(i, j int) float64 {
|
||||
if uint(i) >= uint(s.mat.N) {
|
||||
panic(matrix.ErrRowAccess)
|
||||
}
|
||||
if uint(j) >= uint(s.mat.N) {
|
||||
panic(matrix.ErrColAccess)
|
||||
}
|
||||
return s.at(i, j)
|
||||
}
|
||||
|
||||
func (s *SymDense) at(i, j int) float64 {
|
||||
if i > j {
|
||||
i, j = j, i
|
||||
}
|
||||
return s.mat.Data[i*s.mat.Stride+j]
|
||||
}
|
||||
|
||||
// SetSym sets the elements at (i,j) and (j,i) to the value v.
|
||||
func (s *SymDense) SetSym(i, j int, v float64) {
|
||||
if uint(i) >= uint(s.mat.N) {
|
||||
panic(matrix.ErrRowAccess)
|
||||
}
|
||||
if uint(j) >= uint(s.mat.N) {
|
||||
panic(matrix.ErrColAccess)
|
||||
}
|
||||
s.set(i, j, v)
|
||||
}
|
||||
|
||||
func (s *SymDense) set(i, j int, v float64) {
|
||||
if i > j {
|
||||
i, j = j, i
|
||||
}
|
||||
s.mat.Data[i*s.mat.Stride+j] = v
|
||||
}
|
||||
|
||||
// At returns the element at row i, column j.
|
||||
func (t *TriDense) At(i, j int) float64 {
|
||||
if uint(i) >= uint(t.mat.N) {
|
||||
panic(matrix.ErrRowAccess)
|
||||
}
|
||||
if uint(j) >= uint(t.mat.N) {
|
||||
panic(matrix.ErrColAccess)
|
||||
}
|
||||
return t.at(i, j)
|
||||
}
|
||||
|
||||
func (t *TriDense) at(i, j int) float64 {
|
||||
isUpper := t.triKind()
|
||||
if (isUpper && i > j) || (!isUpper && i < j) {
|
||||
return 0
|
||||
}
|
||||
return t.mat.Data[i*t.mat.Stride+j]
|
||||
}
|
||||
|
||||
// SetTri sets the element at row i, column j to the value v.
|
||||
// It panics if the location is outside the appropriate half of the matrix.
|
||||
func (t *TriDense) SetTri(i, j int, v float64) {
|
||||
if uint(i) >= uint(t.mat.N) {
|
||||
panic(matrix.ErrRowAccess)
|
||||
}
|
||||
if uint(j) >= uint(t.mat.N) {
|
||||
panic(matrix.ErrColAccess)
|
||||
}
|
||||
isUpper := t.isUpper()
|
||||
if (isUpper && i > j) || (!isUpper && i < j) {
|
||||
panic(matrix.ErrTriangleSet)
|
||||
}
|
||||
t.set(i, j, v)
|
||||
}
|
||||
|
||||
func (t *TriDense) set(i, j int, v float64) {
|
||||
t.mat.Data[i*t.mat.Stride+j] = v
|
||||
}
|
||||
|
|
@ -1,103 +0,0 @@
|
|||
// Copyright ©2014 The gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package mat64
|
||||
|
||||
import (
|
||||
"github.com/gonum/blas"
|
||||
"github.com/gonum/internal/asm/f64"
|
||||
"github.com/gonum/matrix"
|
||||
)
|
||||
|
||||
// Inner computes the generalized inner product
|
||||
// x^T A y
|
||||
// between vectors x and y with matrix A. This is only a true inner product if
|
||||
// A is symmetric positive definite, though the operation works for any matrix A.
|
||||
//
|
||||
// Inner panics if x.Len != m or y.Len != n when A is an m x n matrix.
|
||||
func Inner(x *Vector, A Matrix, y *Vector) float64 {
|
||||
m, n := A.Dims()
|
||||
if x.Len() != m {
|
||||
panic(matrix.ErrShape)
|
||||
}
|
||||
if y.Len() != n {
|
||||
panic(matrix.ErrShape)
|
||||
}
|
||||
if m == 0 || n == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
var sum float64
|
||||
|
||||
switch b := A.(type) {
|
||||
case RawSymmetricer:
|
||||
bmat := b.RawSymmetric()
|
||||
if bmat.Uplo != blas.Upper {
|
||||
// Panic as a string not a mat64.Error.
|
||||
panic(badSymTriangle)
|
||||
}
|
||||
for i := 0; i < x.Len(); i++ {
|
||||
xi := x.at(i)
|
||||
if xi != 0 {
|
||||
if y.mat.Inc == 1 {
|
||||
sum += xi * f64.DotUnitary(
|
||||
bmat.Data[i*bmat.Stride+i:i*bmat.Stride+n],
|
||||
y.mat.Data[i:],
|
||||
)
|
||||
} else {
|
||||
sum += xi * f64.DotInc(
|
||||
bmat.Data[i*bmat.Stride+i:i*bmat.Stride+n],
|
||||
y.mat.Data[i*y.mat.Inc:], uintptr(n-i),
|
||||
1, uintptr(y.mat.Inc),
|
||||
0, 0,
|
||||
)
|
||||
}
|
||||
}
|
||||
yi := y.at(i)
|
||||
if i != n-1 && yi != 0 {
|
||||
if x.mat.Inc == 1 {
|
||||
sum += yi * f64.DotUnitary(
|
||||
bmat.Data[i*bmat.Stride+i+1:i*bmat.Stride+n],
|
||||
x.mat.Data[i+1:],
|
||||
)
|
||||
} else {
|
||||
sum += yi * f64.DotInc(
|
||||
bmat.Data[i*bmat.Stride+i+1:i*bmat.Stride+n],
|
||||
x.mat.Data[(i+1)*x.mat.Inc:], uintptr(n-i-1),
|
||||
1, uintptr(x.mat.Inc),
|
||||
0, 0,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
case RawMatrixer:
|
||||
bmat := b.RawMatrix()
|
||||
for i := 0; i < x.Len(); i++ {
|
||||
xi := x.at(i)
|
||||
if xi != 0 {
|
||||
if y.mat.Inc == 1 {
|
||||
sum += xi * f64.DotUnitary(
|
||||
bmat.Data[i*bmat.Stride:i*bmat.Stride+n],
|
||||
y.mat.Data,
|
||||
)
|
||||
} else {
|
||||
sum += xi * f64.DotInc(
|
||||
bmat.Data[i*bmat.Stride:i*bmat.Stride+n],
|
||||
y.mat.Data, uintptr(n),
|
||||
1, uintptr(y.mat.Inc),
|
||||
0, 0,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
default:
|
||||
for i := 0; i < x.Len(); i++ {
|
||||
xi := x.at(i)
|
||||
for j := 0; j < y.Len(); j++ {
|
||||
sum += xi * A.At(i, j) * y.at(j)
|
||||
}
|
||||
}
|
||||
}
|
||||
return sum
|
||||
}
|
||||
|
|
@ -1,217 +0,0 @@
|
|||
// Copyright ©2013 The gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package mat64
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"github.com/gonum/blas"
|
||||
"github.com/gonum/blas/blas64"
|
||||
"github.com/gonum/lapack/lapack64"
|
||||
"github.com/gonum/matrix"
|
||||
)
|
||||
|
||||
// LQ is a type for creating and using the LQ factorization of a matrix.
|
||||
type LQ struct {
|
||||
lq *Dense
|
||||
tau []float64
|
||||
cond float64
|
||||
}
|
||||
|
||||
func (lq *LQ) updateCond() {
|
||||
// A = LQ, where Q is orthonormal. Orthonormal multiplications do not change
|
||||
// the condition number. Thus, ||A|| = ||L|| ||Q|| = ||Q||.
|
||||
m := lq.lq.mat.Rows
|
||||
work := make([]float64, 3*m)
|
||||
iwork := make([]int, m)
|
||||
l := lq.lq.asTriDense(m, blas.NonUnit, blas.Lower)
|
||||
v := lapack64.Trcon(matrix.CondNorm, l.mat, work, iwork)
|
||||
lq.cond = 1 / v
|
||||
}
|
||||
|
||||
// Factorize computes the LQ factorization of an m×n matrix a where n <= m. The LQ
|
||||
// factorization always exists even if A is singular.
|
||||
//
|
||||
// The LQ decomposition is a factorization of the matrix A such that A = L * Q.
|
||||
// The matrix Q is an orthonormal n×n matrix, and L is an m×n upper triangular matrix.
|
||||
// L and Q can be extracted from the LFromLQ and QFromLQ methods on Dense.
|
||||
func (lq *LQ) Factorize(a Matrix) {
|
||||
m, n := a.Dims()
|
||||
if m > n {
|
||||
panic(matrix.ErrShape)
|
||||
}
|
||||
k := min(m, n)
|
||||
if lq.lq == nil {
|
||||
lq.lq = &Dense{}
|
||||
}
|
||||
lq.lq.Clone(a)
|
||||
work := make([]float64, 1)
|
||||
lq.tau = make([]float64, k)
|
||||
lapack64.Gelqf(lq.lq.mat, lq.tau, work, -1)
|
||||
work = make([]float64, int(work[0]))
|
||||
lapack64.Gelqf(lq.lq.mat, lq.tau, work, len(work))
|
||||
lq.updateCond()
|
||||
}
|
||||
|
||||
// TODO(btracey): Add in the "Reduced" forms for extracting the m×m orthogonal
|
||||
// and upper triangular matrices.
|
||||
|
||||
// LFromLQ extracts the m×n lower trapezoidal matrix from a LQ decomposition.
|
||||
func (m *Dense) LFromLQ(lq *LQ) {
|
||||
r, c := lq.lq.Dims()
|
||||
m.reuseAs(r, c)
|
||||
|
||||
// Disguise the LQ as a lower triangular
|
||||
t := &TriDense{
|
||||
mat: blas64.Triangular{
|
||||
N: r,
|
||||
Stride: lq.lq.mat.Stride,
|
||||
Data: lq.lq.mat.Data,
|
||||
Uplo: blas.Lower,
|
||||
Diag: blas.NonUnit,
|
||||
},
|
||||
cap: lq.lq.capCols,
|
||||
}
|
||||
m.Copy(t)
|
||||
|
||||
if r == c {
|
||||
return
|
||||
}
|
||||
// Zero right of the triangular.
|
||||
for i := 0; i < r; i++ {
|
||||
zero(m.mat.Data[i*m.mat.Stride+r : i*m.mat.Stride+c])
|
||||
}
|
||||
}
|
||||
|
||||
// QFromLQ extracts the n×n orthonormal matrix Q from an LQ decomposition.
|
||||
func (m *Dense) QFromLQ(lq *LQ) {
|
||||
r, c := lq.lq.Dims()
|
||||
m.reuseAs(c, c)
|
||||
|
||||
// Set Q = I.
|
||||
for i := 0; i < c; i++ {
|
||||
v := m.mat.Data[i*m.mat.Stride : i*m.mat.Stride+c]
|
||||
zero(v)
|
||||
v[i] = 1
|
||||
}
|
||||
|
||||
// Construct Q from the elementary reflectors.
|
||||
h := blas64.General{
|
||||
Rows: c,
|
||||
Cols: c,
|
||||
Stride: c,
|
||||
Data: make([]float64, c*c),
|
||||
}
|
||||
qCopy := getWorkspace(c, c, false)
|
||||
v := blas64.Vector{
|
||||
Inc: 1,
|
||||
Data: make([]float64, c),
|
||||
}
|
||||
for i := 0; i < r; i++ {
|
||||
// Set h = I.
|
||||
zero(h.Data)
|
||||
for j := 0; j < len(h.Data); j += c + 1 {
|
||||
h.Data[j] = 1
|
||||
}
|
||||
|
||||
// Set the vector data as the elementary reflector.
|
||||
for j := 0; j < i; j++ {
|
||||
v.Data[j] = 0
|
||||
}
|
||||
v.Data[i] = 1
|
||||
for j := i + 1; j < c; j++ {
|
||||
v.Data[j] = lq.lq.mat.Data[i*lq.lq.mat.Stride+j]
|
||||
}
|
||||
|
||||
// Compute the multiplication matrix.
|
||||
blas64.Ger(-lq.tau[i], v, v, h)
|
||||
qCopy.Copy(m)
|
||||
blas64.Gemm(blas.NoTrans, blas.NoTrans,
|
||||
1, h, qCopy.mat,
|
||||
0, m.mat)
|
||||
}
|
||||
}
|
||||
|
||||
// SolveLQ finds a minimum-norm solution to a system of linear equations defined
|
||||
// by the matrices A and b, where A is an m×n matrix represented in its LQ factorized
|
||||
// form. If A is singular or near-singular a Condition error is returned. Please
|
||||
// see the documentation for Condition for more information.
|
||||
//
|
||||
// The minimization problem solved depends on the input parameters.
|
||||
// If trans == false, find the minimum norm solution of A * X = b.
|
||||
// If trans == true, find X such that ||A*X - b||_2 is minimized.
|
||||
// The solution matrix, X, is stored in place into the receiver.
|
||||
func (m *Dense) SolveLQ(lq *LQ, trans bool, b Matrix) error {
|
||||
r, c := lq.lq.Dims()
|
||||
br, bc := b.Dims()
|
||||
|
||||
// The LQ solve algorithm stores the result in-place into the right hand side.
|
||||
// The storage for the answer must be large enough to hold both b and x.
|
||||
// However, this method's receiver must be the size of x. Copy b, and then
|
||||
// copy the result into m at the end.
|
||||
if trans {
|
||||
if c != br {
|
||||
panic(matrix.ErrShape)
|
||||
}
|
||||
m.reuseAs(r, bc)
|
||||
} else {
|
||||
if r != br {
|
||||
panic(matrix.ErrShape)
|
||||
}
|
||||
m.reuseAs(c, bc)
|
||||
}
|
||||
// Do not need to worry about overlap between m and b because x has its own
|
||||
// independent storage.
|
||||
x := getWorkspace(max(r, c), bc, false)
|
||||
x.Copy(b)
|
||||
t := lq.lq.asTriDense(lq.lq.mat.Rows, blas.NonUnit, blas.Lower).mat
|
||||
if trans {
|
||||
work := make([]float64, 1)
|
||||
lapack64.Ormlq(blas.Left, blas.NoTrans, lq.lq.mat, lq.tau, x.mat, work, -1)
|
||||
work = make([]float64, int(work[0]))
|
||||
lapack64.Ormlq(blas.Left, blas.NoTrans, lq.lq.mat, lq.tau, x.mat, work, len(work))
|
||||
|
||||
ok := lapack64.Trtrs(blas.Trans, t, x.mat)
|
||||
if !ok {
|
||||
return matrix.Condition(math.Inf(1))
|
||||
}
|
||||
} else {
|
||||
ok := lapack64.Trtrs(blas.NoTrans, t, x.mat)
|
||||
if !ok {
|
||||
return matrix.Condition(math.Inf(1))
|
||||
}
|
||||
for i := r; i < c; i++ {
|
||||
zero(x.mat.Data[i*x.mat.Stride : i*x.mat.Stride+bc])
|
||||
}
|
||||
work := make([]float64, 1)
|
||||
lapack64.Ormlq(blas.Left, blas.Trans, lq.lq.mat, lq.tau, x.mat, work, -1)
|
||||
work = make([]float64, int(work[0]))
|
||||
lapack64.Ormlq(blas.Left, blas.Trans, lq.lq.mat, lq.tau, x.mat, work, len(work))
|
||||
}
|
||||
// M was set above to be the correct size for the result.
|
||||
m.Copy(x)
|
||||
putWorkspace(x)
|
||||
if lq.cond > matrix.ConditionTolerance {
|
||||
return matrix.Condition(lq.cond)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// SolveLQVec finds a minimum-norm solution to a system of linear equations.
|
||||
// Please see Dense.SolveLQ for the full documentation.
|
||||
func (v *Vector) SolveLQVec(lq *LQ, trans bool, b *Vector) error {
|
||||
if v != b {
|
||||
v.checkOverlap(b.mat)
|
||||
}
|
||||
r, c := lq.lq.Dims()
|
||||
// The Solve implementation is non-trivial, so rather than duplicate the code,
|
||||
// instead recast the Vectors as Dense and call the matrix code.
|
||||
if trans {
|
||||
v.reuseAs(r)
|
||||
} else {
|
||||
v.reuseAs(c)
|
||||
}
|
||||
return v.asDense().SolveLQ(lq, trans, b.asDense())
|
||||
}
|
||||
|
|
@ -1,183 +0,0 @@
|
|||
// Copyright ©2013 The gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package mat64
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"github.com/gonum/blas"
|
||||
"github.com/gonum/blas/blas64"
|
||||
"github.com/gonum/lapack/lapack64"
|
||||
"github.com/gonum/matrix"
|
||||
)
|
||||
|
||||
// QR is a type for creating and using the QR factorization of a matrix.
|
||||
type QR struct {
|
||||
qr *Dense
|
||||
tau []float64
|
||||
cond float64
|
||||
}
|
||||
|
||||
func (qr *QR) updateCond() {
|
||||
// A = QR, where Q is orthonormal. Orthonormal multiplications do not change
|
||||
// the condition number. Thus, ||A|| = ||Q|| ||R|| = ||R||.
|
||||
n := qr.qr.mat.Cols
|
||||
work := make([]float64, 3*n)
|
||||
iwork := make([]int, n)
|
||||
r := qr.qr.asTriDense(n, blas.NonUnit, blas.Upper)
|
||||
v := lapack64.Trcon(matrix.CondNorm, r.mat, work, iwork)
|
||||
qr.cond = 1 / v
|
||||
}
|
||||
|
||||
// Factorize computes the QR factorization of an m×n matrix a where m >= n. The QR
|
||||
// factorization always exists even if A is singular.
|
||||
//
|
||||
// The QR decomposition is a factorization of the matrix A such that A = Q * R.
|
||||
// The matrix Q is an orthonormal m×m matrix, and R is an m×n upper triangular matrix.
|
||||
// Q and R can be extracted from the QFromQR and RFromQR methods on Dense.
|
||||
func (qr *QR) Factorize(a Matrix) {
|
||||
m, n := a.Dims()
|
||||
if m < n {
|
||||
panic(matrix.ErrShape)
|
||||
}
|
||||
k := min(m, n)
|
||||
if qr.qr == nil {
|
||||
qr.qr = &Dense{}
|
||||
}
|
||||
qr.qr.Clone(a)
|
||||
work := make([]float64, 1)
|
||||
qr.tau = make([]float64, k)
|
||||
lapack64.Geqrf(qr.qr.mat, qr.tau, work, -1)
|
||||
|
||||
work = make([]float64, int(work[0]))
|
||||
lapack64.Geqrf(qr.qr.mat, qr.tau, work, len(work))
|
||||
qr.updateCond()
|
||||
}
|
||||
|
||||
// TODO(btracey): Add in the "Reduced" forms for extracting the n×n orthogonal
|
||||
// and upper triangular matrices.
|
||||
|
||||
// RFromQR extracts the m×n upper trapezoidal matrix from a QR decomposition.
|
||||
func (m *Dense) RFromQR(qr *QR) {
|
||||
r, c := qr.qr.Dims()
|
||||
m.reuseAs(r, c)
|
||||
|
||||
// Disguise the QR as an upper triangular
|
||||
t := &TriDense{
|
||||
mat: blas64.Triangular{
|
||||
N: c,
|
||||
Stride: qr.qr.mat.Stride,
|
||||
Data: qr.qr.mat.Data,
|
||||
Uplo: blas.Upper,
|
||||
Diag: blas.NonUnit,
|
||||
},
|
||||
cap: qr.qr.capCols,
|
||||
}
|
||||
m.Copy(t)
|
||||
|
||||
// Zero below the triangular.
|
||||
for i := r; i < c; i++ {
|
||||
zero(m.mat.Data[i*m.mat.Stride : i*m.mat.Stride+c])
|
||||
}
|
||||
}
|
||||
|
||||
// QFromQR extracts the m×m orthonormal matrix Q from a QR decomposition.
|
||||
func (m *Dense) QFromQR(qr *QR) {
|
||||
r, _ := qr.qr.Dims()
|
||||
m.reuseAsZeroed(r, r)
|
||||
|
||||
// Set Q = I.
|
||||
for i := 0; i < r*r; i += r + 1 {
|
||||
m.mat.Data[i] = 1
|
||||
}
|
||||
|
||||
// Construct Q from the elementary reflectors.
|
||||
work := make([]float64, 1)
|
||||
lapack64.Ormqr(blas.Left, blas.NoTrans, qr.qr.mat, qr.tau, m.mat, work, -1)
|
||||
work = make([]float64, int(work[0]))
|
||||
lapack64.Ormqr(blas.Left, blas.NoTrans, qr.qr.mat, qr.tau, m.mat, work, len(work))
|
||||
}
|
||||
|
||||
// SolveQR finds a minimum-norm solution to a system of linear equations defined
|
||||
// by the matrices A and b, where A is an m×n matrix represented in its QR factorized
|
||||
// form. If A is singular or near-singular a Condition error is returned. Please
|
||||
// see the documentation for Condition for more information.
|
||||
//
|
||||
// The minimization problem solved depends on the input parameters.
|
||||
// If trans == false, find X such that ||A*X - b||_2 is minimized.
|
||||
// If trans == true, find the minimum norm solution of A^T * X = b.
|
||||
// The solution matrix, X, is stored in place into the receiver.
|
||||
func (m *Dense) SolveQR(qr *QR, trans bool, b Matrix) error {
|
||||
r, c := qr.qr.Dims()
|
||||
br, bc := b.Dims()
|
||||
|
||||
// The QR solve algorithm stores the result in-place into the right hand side.
|
||||
// The storage for the answer must be large enough to hold both b and x.
|
||||
// However, this method's receiver must be the size of x. Copy b, and then
|
||||
// copy the result into m at the end.
|
||||
if trans {
|
||||
if c != br {
|
||||
panic(matrix.ErrShape)
|
||||
}
|
||||
m.reuseAs(r, bc)
|
||||
} else {
|
||||
if r != br {
|
||||
panic(matrix.ErrShape)
|
||||
}
|
||||
m.reuseAs(c, bc)
|
||||
}
|
||||
// Do not need to worry about overlap between m and b because x has its own
|
||||
// independent storage.
|
||||
x := getWorkspace(max(r, c), bc, false)
|
||||
x.Copy(b)
|
||||
t := qr.qr.asTriDense(qr.qr.mat.Cols, blas.NonUnit, blas.Upper).mat
|
||||
if trans {
|
||||
ok := lapack64.Trtrs(blas.Trans, t, x.mat)
|
||||
if !ok {
|
||||
return matrix.Condition(math.Inf(1))
|
||||
}
|
||||
for i := c; i < r; i++ {
|
||||
zero(x.mat.Data[i*x.mat.Stride : i*x.mat.Stride+bc])
|
||||
}
|
||||
work := make([]float64, 1)
|
||||
lapack64.Ormqr(blas.Left, blas.NoTrans, qr.qr.mat, qr.tau, x.mat, work, -1)
|
||||
work = make([]float64, int(work[0]))
|
||||
lapack64.Ormqr(blas.Left, blas.NoTrans, qr.qr.mat, qr.tau, x.mat, work, len(work))
|
||||
} else {
|
||||
work := make([]float64, 1)
|
||||
lapack64.Ormqr(blas.Left, blas.Trans, qr.qr.mat, qr.tau, x.mat, work, -1)
|
||||
work = make([]float64, int(work[0]))
|
||||
lapack64.Ormqr(blas.Left, blas.Trans, qr.qr.mat, qr.tau, x.mat, work, len(work))
|
||||
|
||||
ok := lapack64.Trtrs(blas.NoTrans, t, x.mat)
|
||||
if !ok {
|
||||
return matrix.Condition(math.Inf(1))
|
||||
}
|
||||
}
|
||||
// M was set above to be the correct size for the result.
|
||||
m.Copy(x)
|
||||
putWorkspace(x)
|
||||
if qr.cond > matrix.ConditionTolerance {
|
||||
return matrix.Condition(qr.cond)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// SolveQRVec finds a minimum-norm solution to a system of linear equations.
|
||||
// Please see Dense.SolveQR for the full documentation.
|
||||
func (v *Vector) SolveQRVec(qr *QR, trans bool, b *Vector) error {
|
||||
if v != b {
|
||||
v.checkOverlap(b.mat)
|
||||
}
|
||||
r, c := qr.qr.Dims()
|
||||
// The Solve implementation is non-trivial, so rather than duplicate the code,
|
||||
// instead recast the Vectors as Dense and call the matrix code.
|
||||
if trans {
|
||||
v.reuseAs(r)
|
||||
} else {
|
||||
v.reuseAs(c)
|
||||
}
|
||||
return v.asDense().SolveQR(qr, trans, b.asDense())
|
||||
}
|
||||
|
|
@ -1,281 +0,0 @@
|
|||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package mat64
|
||||
|
||||
import (
|
||||
"github.com/gonum/blas"
|
||||
"github.com/gonum/blas/blas64"
|
||||
)
|
||||
|
||||
const (
|
||||
// regionOverlap is the panic string used for the general case
|
||||
// of a matrix region overlap between a source and destination.
|
||||
regionOverlap = "mat64: bad region: overlap"
|
||||
|
||||
// regionIdentity is the panic string used for the specific
|
||||
// case of complete agreement between a source and a destination.
|
||||
regionIdentity = "mat64: bad region: identical"
|
||||
|
||||
// mismatchedStrides is the panic string used for overlapping
|
||||
// data slices with differing strides.
|
||||
mismatchedStrides = "mat64: bad region: different strides"
|
||||
)
|
||||
|
||||
// checkOverlap returns false if the receiver does not overlap data elements
|
||||
// referenced by the parameter and panics otherwise.
|
||||
//
|
||||
// checkOverlap methods return a boolean to allow the check call to be added to a
|
||||
// boolean expression, making use of short-circuit operators.
|
||||
|
||||
func (m *Dense) checkOverlap(a blas64.General) bool {
|
||||
mat := m.RawMatrix()
|
||||
if cap(mat.Data) == 0 || cap(a.Data) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
off := offset(mat.Data[:1], a.Data[:1])
|
||||
|
||||
if off == 0 {
|
||||
// At least one element overlaps.
|
||||
if mat.Cols == a.Cols && mat.Rows == a.Rows && mat.Stride == a.Stride {
|
||||
panic(regionIdentity)
|
||||
}
|
||||
panic(regionOverlap)
|
||||
}
|
||||
|
||||
if off > 0 && len(mat.Data) <= off {
|
||||
// We know m is completely before a.
|
||||
return false
|
||||
}
|
||||
if off < 0 && len(a.Data) <= -off {
|
||||
// We know m is completely after a.
|
||||
return false
|
||||
}
|
||||
|
||||
if mat.Stride != a.Stride {
|
||||
// Too hard, so assume the worst.
|
||||
panic(mismatchedStrides)
|
||||
}
|
||||
|
||||
if off < 0 {
|
||||
off = -off
|
||||
mat.Cols, a.Cols = a.Cols, mat.Cols
|
||||
}
|
||||
if rectanglesOverlap(off, mat.Cols, a.Cols, mat.Stride) {
|
||||
panic(regionOverlap)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (s *SymDense) checkOverlap(a blas64.Symmetric) bool {
|
||||
mat := s.RawSymmetric()
|
||||
if cap(mat.Data) == 0 || cap(a.Data) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
off := offset(mat.Data[:1], a.Data[:1])
|
||||
|
||||
if off == 0 {
|
||||
// At least one element overlaps.
|
||||
if mat.N == a.N && mat.Stride == a.Stride {
|
||||
panic(regionIdentity)
|
||||
}
|
||||
panic(regionOverlap)
|
||||
}
|
||||
|
||||
if off > 0 && len(mat.Data) <= off {
|
||||
// We know s is completely before a.
|
||||
return false
|
||||
}
|
||||
if off < 0 && len(a.Data) <= -off {
|
||||
// We know s is completely after a.
|
||||
return false
|
||||
}
|
||||
|
||||
if mat.Stride != a.Stride {
|
||||
// Too hard, so assume the worst.
|
||||
panic(mismatchedStrides)
|
||||
}
|
||||
|
||||
if off < 0 {
|
||||
off = -off
|
||||
mat.N, a.N = a.N, mat.N
|
||||
// If we created the matrix it will always
|
||||
// be in the upper triangle, but don't trust
|
||||
// that this is the case.
|
||||
mat.Uplo, a.Uplo = a.Uplo, mat.Uplo
|
||||
}
|
||||
if trianglesOverlap(off, mat.N, a.N, mat.Stride, mat.Uplo == blas.Upper, a.Uplo == blas.Upper) {
|
||||
panic(regionOverlap)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (t *TriDense) checkOverlap(a blas64.Triangular) bool {
|
||||
mat := t.RawTriangular()
|
||||
if cap(mat.Data) == 0 || cap(a.Data) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
off := offset(mat.Data[:1], a.Data[:1])
|
||||
|
||||
if off == 0 {
|
||||
// At least one element overlaps.
|
||||
if mat.N == a.N && mat.Stride == a.Stride {
|
||||
panic(regionIdentity)
|
||||
}
|
||||
panic(regionOverlap)
|
||||
}
|
||||
|
||||
if off > 0 && len(mat.Data) <= off {
|
||||
// We know t is completely before a.
|
||||
return false
|
||||
}
|
||||
if off < 0 && len(a.Data) <= -off {
|
||||
// We know t is completely after a.
|
||||
return false
|
||||
}
|
||||
|
||||
if mat.Stride != a.Stride {
|
||||
// Too hard, so assume the worst.
|
||||
panic(mismatchedStrides)
|
||||
}
|
||||
|
||||
if off < 0 {
|
||||
off = -off
|
||||
mat.N, a.N = a.N, mat.N
|
||||
mat.Uplo, a.Uplo = a.Uplo, mat.Uplo
|
||||
}
|
||||
if trianglesOverlap(off, mat.N, a.N, mat.Stride, mat.Uplo == blas.Upper, a.Uplo == blas.Upper) {
|
||||
panic(regionOverlap)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (v *Vector) checkOverlap(a blas64.Vector) bool {
|
||||
mat := v.mat
|
||||
if cap(mat.Data) == 0 || cap(a.Data) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
off := offset(mat.Data[:1], a.Data[:1])
|
||||
|
||||
if off == 0 {
|
||||
// At least one element overlaps.
|
||||
if mat.Inc == a.Inc && len(mat.Data) == len(a.Data) {
|
||||
panic(regionIdentity)
|
||||
}
|
||||
panic(regionOverlap)
|
||||
}
|
||||
|
||||
if off > 0 && len(mat.Data) <= off {
|
||||
// We know v is completely before a.
|
||||
return false
|
||||
}
|
||||
if off < 0 && len(a.Data) <= -off {
|
||||
// We know v is completely after a.
|
||||
return false
|
||||
}
|
||||
|
||||
if mat.Inc != a.Inc {
|
||||
// Too hard, so assume the worst.
|
||||
panic(mismatchedStrides)
|
||||
}
|
||||
|
||||
if mat.Inc == 1 || off&mat.Inc == 0 {
|
||||
panic(regionOverlap)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// rectanglesOverlap returns whether the strided rectangles a and b overlap
|
||||
// when b is offset by off elements after a but has at least one element before
|
||||
// the end of a. off must be positive. a and b have aCols and bCols respectively.
|
||||
//
|
||||
// rectanglesOverlap works by shifting both matrices left such that the left
|
||||
// column of a is at 0. The column indexes are flattened by obtaining the shifted
|
||||
// relative left and right column positions modulo the common stride. This allows
|
||||
// direct comparison of the column offsets when the matrix backing data slices
|
||||
// are known to overlap.
|
||||
func rectanglesOverlap(off, aCols, bCols, stride int) bool {
|
||||
if stride == 1 {
|
||||
// Unit stride means overlapping data
|
||||
// slices must overlap as matrices.
|
||||
return true
|
||||
}
|
||||
|
||||
// Flatten the shifted matrix column positions
|
||||
// so a starts at 0, modulo the common stride.
|
||||
const aFrom = 0
|
||||
aTo := aCols
|
||||
// The mod stride operations here make the from
|
||||
// and to indexes comparable between a and b when
|
||||
// the data slices of a and b overlap.
|
||||
bFrom := off % stride
|
||||
bTo := (bFrom + bCols) % stride
|
||||
|
||||
if bTo == 0 || bFrom < bTo {
|
||||
// b matrix is not wrapped: compare for
|
||||
// simple overlap.
|
||||
return bFrom < aTo
|
||||
}
|
||||
|
||||
// b strictly wraps and so must overlap with a.
|
||||
return true
|
||||
}
|
||||
|
||||
// trianglesOverlap returns whether the strided triangles a and b overlap
|
||||
// when b is offset by off elements after a but has at least one element before
|
||||
// the end of a. off must be positive. a and b are aSize×aSize and bSize×bSize
|
||||
// respectively.
|
||||
func trianglesOverlap(off, aSize, bSize, stride int, aUpper, bUpper bool) bool {
|
||||
if !rectanglesOverlap(off, aSize, bSize, stride) {
|
||||
// Fast return if bounding rectangles do not overlap.
|
||||
return false
|
||||
}
|
||||
|
||||
// Find location of b relative to a.
|
||||
rowOffset := off / stride
|
||||
colOffset := off % stride
|
||||
if (off+bSize)%stride < colOffset {
|
||||
// We have wrapped, so readjust offsets.
|
||||
rowOffset++
|
||||
colOffset -= stride
|
||||
}
|
||||
|
||||
if aUpper {
|
||||
// Check whether the upper left of b
|
||||
// is in the triangle of a
|
||||
if rowOffset >= 0 && rowOffset <= colOffset {
|
||||
return true
|
||||
}
|
||||
// Check whether the upper right of b
|
||||
// is in the triangle of a.
|
||||
return bUpper && rowOffset < colOffset+bSize
|
||||
}
|
||||
|
||||
// Check whether the upper left of b
|
||||
// is in the triangle of a
|
||||
if colOffset >= 0 && rowOffset >= colOffset {
|
||||
return true
|
||||
}
|
||||
if bUpper {
|
||||
// Check whether the upper right corner of b
|
||||
// is in a or the upper row of b spans a row
|
||||
// of a.
|
||||
return rowOffset > colOffset+bSize || colOffset < 0
|
||||
}
|
||||
if colOffset < 0 {
|
||||
// Check whether the lower left of a
|
||||
// is in the triangle of b or below
|
||||
// the diagonal of a. This requires a
|
||||
// swap of reference origin.
|
||||
return -rowOffset+aSize > -colOffset
|
||||
}
|
||||
// Check whether the lower left of b
|
||||
// is in the triangle of a or below
|
||||
// the diagonal of a.
|
||||
return rowOffset+bSize > colOffset
|
||||
}
|
||||
|
|
@ -1,468 +0,0 @@
|
|||
// Copyright ©2013 The gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package mat64
|
||||
|
||||
import (
|
||||
"github.com/gonum/blas"
|
||||
"github.com/gonum/blas/blas64"
|
||||
"github.com/gonum/internal/asm/f64"
|
||||
"github.com/gonum/matrix"
|
||||
)
|
||||
|
||||
var (
|
||||
vector *Vector
|
||||
|
||||
_ Matrix = vector
|
||||
|
||||
_ Reseter = vector
|
||||
)
|
||||
|
||||
// Vector represents a column vector.
|
||||
type Vector struct {
|
||||
mat blas64.Vector
|
||||
n int
|
||||
// A BLAS vector can have a negative increment, but allowing this
|
||||
// in the mat64 type complicates a lot of code, and doesn't gain anything.
|
||||
// Vector must have positive increment in this package.
|
||||
}
|
||||
|
||||
// NewVector creates a new Vector of length n. If data == nil,
|
||||
// a new slice is allocated for the backing slice. If len(data) == n, data is
|
||||
// used as the backing slice, and changes to the elements of the returned Vector
|
||||
// will be reflected in data. If neither of these is true, NewVector will panic.
|
||||
func NewVector(n int, data []float64) *Vector {
|
||||
if len(data) != n && data != nil {
|
||||
panic(matrix.ErrShape)
|
||||
}
|
||||
if data == nil {
|
||||
data = make([]float64, n)
|
||||
}
|
||||
return &Vector{
|
||||
mat: blas64.Vector{
|
||||
Inc: 1,
|
||||
Data: data,
|
||||
},
|
||||
n: n,
|
||||
}
|
||||
}
|
||||
|
||||
// ViewVec returns a sub-vector view of the receiver starting at element i and
|
||||
// extending n rows. If i is out of range, n is zero, or the view extends
|
||||
// beyond the bounds of the Vector, ViewVec will panic with ErrIndexOutOfRange.
|
||||
// The returned Vector retains reference to the underlying vector.
|
||||
//
|
||||
// ViewVec is deprecated and should not be used. It will be removed at a later date.
|
||||
func (v *Vector) ViewVec(i, n int) *Vector {
|
||||
return v.SliceVec(i, i+n)
|
||||
}
|
||||
|
||||
// SliceVec returns a new Vector that shares backing data with the receiver.
|
||||
// The returned matrix starts at i of the recevier and extends k-i elements.
|
||||
// SliceVec panics with ErrIndexOutOfRange if the slice is outside the bounds
|
||||
// of the receiver.
|
||||
func (v *Vector) SliceVec(i, k int) *Vector {
|
||||
if i < 0 || k <= i || v.n < k {
|
||||
panic(matrix.ErrIndexOutOfRange)
|
||||
}
|
||||
return &Vector{
|
||||
n: k - i,
|
||||
mat: blas64.Vector{
|
||||
Inc: v.mat.Inc,
|
||||
Data: v.mat.Data[i*v.mat.Inc : (k-1)*v.mat.Inc+1],
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (v *Vector) Dims() (r, c int) {
|
||||
if v.isZero() {
|
||||
return 0, 0
|
||||
}
|
||||
return v.n, 1
|
||||
}
|
||||
|
||||
// Len returns the length of the vector.
|
||||
func (v *Vector) Len() int {
|
||||
return v.n
|
||||
}
|
||||
|
||||
// T performs an implicit transpose by returning the receiver inside a Transpose.
|
||||
func (v *Vector) T() Matrix {
|
||||
return Transpose{v}
|
||||
}
|
||||
|
||||
// Reset zeros the length of the vector so that it can be reused as the
|
||||
// receiver of a dimensionally restricted operation.
|
||||
//
|
||||
// See the Reseter interface for more information.
|
||||
func (v *Vector) Reset() {
|
||||
// No change of Inc or n to 0 may be
|
||||
// made unless both are set to 0.
|
||||
v.mat.Inc = 0
|
||||
v.n = 0
|
||||
v.mat.Data = v.mat.Data[:0]
|
||||
}
|
||||
|
||||
// CloneVec makes a copy of a into the receiver, overwriting the previous value
|
||||
// of the receiver.
|
||||
func (v *Vector) CloneVec(a *Vector) {
|
||||
if v == a {
|
||||
return
|
||||
}
|
||||
v.n = a.n
|
||||
v.mat = blas64.Vector{
|
||||
Inc: 1,
|
||||
Data: use(v.mat.Data, v.n),
|
||||
}
|
||||
blas64.Copy(v.n, a.mat, v.mat)
|
||||
}
|
||||
|
||||
func (v *Vector) RawVector() blas64.Vector {
|
||||
return v.mat
|
||||
}
|
||||
|
||||
// CopyVec makes a copy of elements of a into the receiver. It is similar to the
|
||||
// built-in copy; it copies as much as the overlap between the two vectors and
|
||||
// returns the number of elements it copied.
|
||||
func (v *Vector) CopyVec(a *Vector) int {
|
||||
n := min(v.Len(), a.Len())
|
||||
if v != a {
|
||||
blas64.Copy(n, a.mat, v.mat)
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// ScaleVec scales the vector a by alpha, placing the result in the receiver.
|
||||
func (v *Vector) ScaleVec(alpha float64, a *Vector) {
|
||||
n := a.Len()
|
||||
if v != a {
|
||||
v.reuseAs(n)
|
||||
if v.mat.Inc == 1 && a.mat.Inc == 1 {
|
||||
f64.ScalUnitaryTo(v.mat.Data, alpha, a.mat.Data)
|
||||
return
|
||||
}
|
||||
f64.ScalIncTo(v.mat.Data, uintptr(v.mat.Inc),
|
||||
alpha, a.mat.Data, uintptr(n), uintptr(a.mat.Inc))
|
||||
return
|
||||
}
|
||||
if v.mat.Inc == 1 {
|
||||
f64.ScalUnitary(alpha, v.mat.Data)
|
||||
return
|
||||
}
|
||||
f64.ScalInc(alpha, v.mat.Data, uintptr(n), uintptr(v.mat.Inc))
|
||||
}
|
||||
|
||||
// AddScaledVec adds the vectors a and alpha*b, placing the result in the receiver.
|
||||
func (v *Vector) AddScaledVec(a *Vector, alpha float64, b *Vector) {
|
||||
if alpha == 1 {
|
||||
v.AddVec(a, b)
|
||||
return
|
||||
}
|
||||
if alpha == -1 {
|
||||
v.SubVec(a, b)
|
||||
return
|
||||
}
|
||||
|
||||
ar := a.Len()
|
||||
br := b.Len()
|
||||
|
||||
if ar != br {
|
||||
panic(matrix.ErrShape)
|
||||
}
|
||||
|
||||
if v != a {
|
||||
v.checkOverlap(a.mat)
|
||||
}
|
||||
if v != b {
|
||||
v.checkOverlap(b.mat)
|
||||
}
|
||||
|
||||
v.reuseAs(ar)
|
||||
|
||||
switch {
|
||||
case alpha == 0: // v <- a
|
||||
v.CopyVec(a)
|
||||
case v == a && v == b: // v <- v + alpha * v = (alpha + 1) * v
|
||||
blas64.Scal(ar, alpha+1, v.mat)
|
||||
case v == a && v != b: // v <- v + alpha * b
|
||||
if v.mat.Inc == 1 && b.mat.Inc == 1 {
|
||||
// Fast path for a common case.
|
||||
f64.AxpyUnitaryTo(v.mat.Data, alpha, b.mat.Data, a.mat.Data)
|
||||
} else {
|
||||
f64.AxpyInc(alpha, b.mat.Data, v.mat.Data,
|
||||
uintptr(ar), uintptr(b.mat.Inc), uintptr(v.mat.Inc), 0, 0)
|
||||
}
|
||||
default: // v <- a + alpha * b or v <- a + alpha * v
|
||||
if v.mat.Inc == 1 && a.mat.Inc == 1 && b.mat.Inc == 1 {
|
||||
// Fast path for a common case.
|
||||
f64.AxpyUnitaryTo(v.mat.Data, alpha, b.mat.Data, a.mat.Data)
|
||||
} else {
|
||||
f64.AxpyIncTo(v.mat.Data, uintptr(v.mat.Inc), 0,
|
||||
alpha, b.mat.Data, a.mat.Data,
|
||||
uintptr(ar), uintptr(b.mat.Inc), uintptr(a.mat.Inc), 0, 0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// AddVec adds the vectors a and b, placing the result in the receiver.
|
||||
func (v *Vector) AddVec(a, b *Vector) {
|
||||
ar := a.Len()
|
||||
br := b.Len()
|
||||
|
||||
if ar != br {
|
||||
panic(matrix.ErrShape)
|
||||
}
|
||||
|
||||
if v != a {
|
||||
v.checkOverlap(a.mat)
|
||||
}
|
||||
if v != b {
|
||||
v.checkOverlap(b.mat)
|
||||
}
|
||||
|
||||
v.reuseAs(ar)
|
||||
|
||||
if v.mat.Inc == 1 && a.mat.Inc == 1 && b.mat.Inc == 1 {
|
||||
// Fast path for a common case.
|
||||
f64.AxpyUnitaryTo(v.mat.Data, 1, b.mat.Data, a.mat.Data)
|
||||
return
|
||||
}
|
||||
f64.AxpyIncTo(v.mat.Data, uintptr(v.mat.Inc), 0,
|
||||
1, b.mat.Data, a.mat.Data,
|
||||
uintptr(ar), uintptr(b.mat.Inc), uintptr(a.mat.Inc), 0, 0)
|
||||
}
|
||||
|
||||
// SubVec subtracts the vector b from a, placing the result in the receiver.
|
||||
func (v *Vector) SubVec(a, b *Vector) {
|
||||
ar := a.Len()
|
||||
br := b.Len()
|
||||
|
||||
if ar != br {
|
||||
panic(matrix.ErrShape)
|
||||
}
|
||||
|
||||
if v != a {
|
||||
v.checkOverlap(a.mat)
|
||||
}
|
||||
if v != b {
|
||||
v.checkOverlap(b.mat)
|
||||
}
|
||||
|
||||
v.reuseAs(ar)
|
||||
|
||||
if v.mat.Inc == 1 && a.mat.Inc == 1 && b.mat.Inc == 1 {
|
||||
// Fast path for a common case.
|
||||
f64.AxpyUnitaryTo(v.mat.Data, -1, b.mat.Data, a.mat.Data)
|
||||
return
|
||||
}
|
||||
f64.AxpyIncTo(v.mat.Data, uintptr(v.mat.Inc), 0,
|
||||
-1, b.mat.Data, a.mat.Data,
|
||||
uintptr(ar), uintptr(b.mat.Inc), uintptr(a.mat.Inc), 0, 0)
|
||||
}
|
||||
|
||||
// MulElemVec performs element-wise multiplication of a and b, placing the result
|
||||
// in the receiver.
|
||||
func (v *Vector) MulElemVec(a, b *Vector) {
|
||||
ar := a.Len()
|
||||
br := b.Len()
|
||||
|
||||
if ar != br {
|
||||
panic(matrix.ErrShape)
|
||||
}
|
||||
|
||||
if v != a {
|
||||
v.checkOverlap(a.mat)
|
||||
}
|
||||
if v != b {
|
||||
v.checkOverlap(b.mat)
|
||||
}
|
||||
|
||||
v.reuseAs(ar)
|
||||
|
||||
amat, bmat := a.RawVector(), b.RawVector()
|
||||
for i := 0; i < v.n; i++ {
|
||||
v.mat.Data[i*v.mat.Inc] = amat.Data[i*amat.Inc] * bmat.Data[i*bmat.Inc]
|
||||
}
|
||||
}
|
||||
|
||||
// DivElemVec performs element-wise division of a by b, placing the result
|
||||
// in the receiver.
|
||||
func (v *Vector) DivElemVec(a, b *Vector) {
|
||||
ar := a.Len()
|
||||
br := b.Len()
|
||||
|
||||
if ar != br {
|
||||
panic(matrix.ErrShape)
|
||||
}
|
||||
|
||||
if v != a {
|
||||
v.checkOverlap(a.mat)
|
||||
}
|
||||
if v != b {
|
||||
v.checkOverlap(b.mat)
|
||||
}
|
||||
|
||||
v.reuseAs(ar)
|
||||
|
||||
amat, bmat := a.RawVector(), b.RawVector()
|
||||
for i := 0; i < v.n; i++ {
|
||||
v.mat.Data[i*v.mat.Inc] = amat.Data[i*amat.Inc] / bmat.Data[i*bmat.Inc]
|
||||
}
|
||||
}
|
||||
|
||||
// MulVec computes a * b. The result is stored into the receiver.
|
||||
// MulVec panics if the number of columns in a does not equal the number of rows in b.
|
||||
func (v *Vector) MulVec(a Matrix, b *Vector) {
|
||||
r, c := a.Dims()
|
||||
br := b.Len()
|
||||
if c != br {
|
||||
panic(matrix.ErrShape)
|
||||
}
|
||||
|
||||
if v != b {
|
||||
v.checkOverlap(b.mat)
|
||||
}
|
||||
|
||||
a, trans := untranspose(a)
|
||||
ar, ac := a.Dims()
|
||||
v.reuseAs(r)
|
||||
var restore func()
|
||||
if v == a {
|
||||
v, restore = v.isolatedWorkspace(a.(*Vector))
|
||||
defer restore()
|
||||
} else if v == b {
|
||||
v, restore = v.isolatedWorkspace(b)
|
||||
defer restore()
|
||||
}
|
||||
|
||||
switch a := a.(type) {
|
||||
case *Vector:
|
||||
if v != a {
|
||||
v.checkOverlap(a.mat)
|
||||
}
|
||||
|
||||
if a.Len() == 1 {
|
||||
// {1,1} x {1,n}
|
||||
av := a.At(0, 0)
|
||||
for i := 0; i < b.Len(); i++ {
|
||||
v.mat.Data[i*v.mat.Inc] = av * b.mat.Data[i*b.mat.Inc]
|
||||
}
|
||||
return
|
||||
}
|
||||
if b.Len() == 1 {
|
||||
// {1,n} x {1,1}
|
||||
bv := b.At(0, 0)
|
||||
for i := 0; i < a.Len(); i++ {
|
||||
v.mat.Data[i*v.mat.Inc] = bv * a.mat.Data[i*a.mat.Inc]
|
||||
}
|
||||
return
|
||||
}
|
||||
// {n,1} x {1,n}
|
||||
var sum float64
|
||||
for i := 0; i < c; i++ {
|
||||
sum += a.At(i, 0) * b.At(i, 0)
|
||||
}
|
||||
v.SetVec(0, sum)
|
||||
return
|
||||
case RawSymmetricer:
|
||||
amat := a.RawSymmetric()
|
||||
blas64.Symv(1, amat, b.mat, 0, v.mat)
|
||||
case RawTriangular:
|
||||
v.CopyVec(b)
|
||||
amat := a.RawTriangular()
|
||||
ta := blas.NoTrans
|
||||
if trans {
|
||||
ta = blas.Trans
|
||||
}
|
||||
blas64.Trmv(ta, amat, v.mat)
|
||||
case RawMatrixer:
|
||||
amat := a.RawMatrix()
|
||||
// We don't know that a is a *Dense, so make
|
||||
// a temporary Dense to check overlap.
|
||||
(&Dense{mat: amat}).checkOverlap(v.asGeneral())
|
||||
t := blas.NoTrans
|
||||
if trans {
|
||||
t = blas.Trans
|
||||
}
|
||||
blas64.Gemv(t, 1, amat, b.mat, 0, v.mat)
|
||||
default:
|
||||
if trans {
|
||||
col := make([]float64, ar)
|
||||
for c := 0; c < ac; c++ {
|
||||
for i := range col {
|
||||
col[i] = a.At(i, c)
|
||||
}
|
||||
var f float64
|
||||
for i, e := range col {
|
||||
f += e * b.mat.Data[i*b.mat.Inc]
|
||||
}
|
||||
v.mat.Data[c*v.mat.Inc] = f
|
||||
}
|
||||
} else {
|
||||
row := make([]float64, ac)
|
||||
for r := 0; r < ar; r++ {
|
||||
for i := range row {
|
||||
row[i] = a.At(r, i)
|
||||
}
|
||||
var f float64
|
||||
for i, e := range row {
|
||||
f += e * b.mat.Data[i*b.mat.Inc]
|
||||
}
|
||||
v.mat.Data[r*v.mat.Inc] = f
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// reuseAs resizes an empty vector to a r×1 vector,
|
||||
// or checks that a non-empty matrix is r×1.
|
||||
func (v *Vector) reuseAs(r int) {
|
||||
if v.isZero() {
|
||||
v.mat = blas64.Vector{
|
||||
Inc: 1,
|
||||
Data: use(v.mat.Data, r),
|
||||
}
|
||||
v.n = r
|
||||
return
|
||||
}
|
||||
if r != v.n {
|
||||
panic(matrix.ErrShape)
|
||||
}
|
||||
}
|
||||
|
||||
func (v *Vector) isZero() bool {
|
||||
// It must be the case that v.Dims() returns
|
||||
// zeros in this case. See comment in Reset().
|
||||
return v.mat.Inc == 0
|
||||
}
|
||||
|
||||
func (v *Vector) isolatedWorkspace(a *Vector) (n *Vector, restore func()) {
|
||||
l := a.Len()
|
||||
n = getWorkspaceVec(l, false)
|
||||
return n, func() {
|
||||
v.CopyVec(n)
|
||||
putWorkspaceVec(n)
|
||||
}
|
||||
}
|
||||
|
||||
// asDense returns a Dense representation of the receiver with the same
|
||||
// underlying data.
|
||||
func (v *Vector) asDense() *Dense {
|
||||
return &Dense{
|
||||
mat: v.asGeneral(),
|
||||
capRows: v.n,
|
||||
capCols: 1,
|
||||
}
|
||||
}
|
||||
|
||||
// asGeneral returns a blas64.General representation of the receiver with the
|
||||
// same underlying data.
|
||||
func (v *Vector) asGeneral() blas64.General {
|
||||
return blas64.General{
|
||||
Rows: v.n,
|
||||
Cols: 1,
|
||||
Stride: v.mat.Inc,
|
||||
Data: v.mat.Data,
|
||||
}
|
||||
}
|
||||
|
|
@ -1,118 +0,0 @@
|
|||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package plotter
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"image/color"
|
||||
"math"
|
||||
|
||||
"github.com/gonum/plot"
|
||||
"github.com/gonum/plot/vg"
|
||||
"github.com/gonum/plot/vg/draw"
|
||||
)
|
||||
|
||||
// Bubbles implements the Plotter interface, drawing
|
||||
// a bubble plot of x, y, z triples where the z value
|
||||
// determines the radius of the bubble.
|
||||
type Bubbles struct {
|
||||
XYZs
|
||||
|
||||
// Color is the color of the bubbles.
|
||||
color.Color
|
||||
|
||||
// MinRadius and MaxRadius give the minimum
|
||||
// and maximum bubble radius respectively.
|
||||
// The radii of each bubble is interpolated linearly
|
||||
// between these two values.
|
||||
MinRadius, MaxRadius vg.Length
|
||||
|
||||
// MinZ and MaxZ are the minimum and
|
||||
// maximum Z values from the data.
|
||||
MinZ, MaxZ float64
|
||||
}
|
||||
|
||||
// NewBubbles creates as new bubble plot plotter for
|
||||
// the given data, with a minimum and maximum
|
||||
// bubble radius.
|
||||
func NewBubbles(xyz XYZer, min, max vg.Length) (*Bubbles, error) {
|
||||
cpy, err := CopyXYZs(xyz)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if min > max {
|
||||
return nil, errors.New("Min bubble radius is greater than the max radius")
|
||||
}
|
||||
minz := cpy[0].Z
|
||||
maxz := cpy[0].Z
|
||||
for _, d := range cpy {
|
||||
minz = math.Min(minz, d.Z)
|
||||
maxz = math.Max(maxz, d.Z)
|
||||
}
|
||||
return &Bubbles{
|
||||
XYZs: cpy,
|
||||
MinRadius: min,
|
||||
MaxRadius: max,
|
||||
MinZ: minz,
|
||||
MaxZ: maxz,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Plot implements the Plot method of the plot.Plotter interface.
|
||||
func (bs *Bubbles) Plot(c draw.Canvas, plt *plot.Plot) {
|
||||
trX, trY := plt.Transforms(&c)
|
||||
|
||||
c.SetColor(bs.Color)
|
||||
|
||||
for _, d := range bs.XYZs {
|
||||
x := trX(d.X)
|
||||
y := trY(d.Y)
|
||||
pt := vg.Point{X: x, Y: y}
|
||||
if !c.Contains(pt) {
|
||||
continue
|
||||
}
|
||||
|
||||
rad := bs.radius(d.Z)
|
||||
|
||||
// draw a circle centered at x, y
|
||||
var p vg.Path
|
||||
p.Move(vg.Point{X: x + rad, Y: y})
|
||||
p.Arc(pt, rad, 0, 2*math.Pi)
|
||||
p.Close()
|
||||
c.Fill(p)
|
||||
}
|
||||
}
|
||||
|
||||
// radius returns the radius of a bubble by linear interpolation.
|
||||
func (bs *Bubbles) radius(z float64) vg.Length {
|
||||
rng := bs.MaxRadius - bs.MinRadius
|
||||
if bs.MaxZ == bs.MinZ {
|
||||
return rng/2 + bs.MinRadius
|
||||
}
|
||||
d := (z - bs.MinZ) / (bs.MaxZ - bs.MinZ)
|
||||
return vg.Length(d)*rng + bs.MinRadius
|
||||
}
|
||||
|
||||
// DataRange implements the DataRange method
|
||||
// of the plot.DataRanger interface.
|
||||
func (bs *Bubbles) DataRange() (xmin, xmax, ymin, ymax float64) {
|
||||
return XYRange(XYValues{bs.XYZs})
|
||||
}
|
||||
|
||||
// GlyphBoxes implements the GlyphBoxes method
|
||||
// of the plot.GlyphBoxer interface.
|
||||
func (bs *Bubbles) GlyphBoxes(plt *plot.Plot) []plot.GlyphBox {
|
||||
boxes := make([]plot.GlyphBox, len(bs.XYZs))
|
||||
for i, d := range bs.XYZs {
|
||||
boxes[i].X = plt.X.Norm(d.X)
|
||||
boxes[i].Y = plt.Y.Norm(d.Y)
|
||||
r := bs.radius(d.Z)
|
||||
boxes[i].Rectangle = vg.Rectangle{
|
||||
Min: vg.Point{X: -r, Y: -r},
|
||||
Max: vg.Point{X: +r, Y: +r},
|
||||
}
|
||||
}
|
||||
return boxes
|
||||
}
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
Copyright ©2013 The Gonum Authors. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the gonum project nor the names of its authors and
|
||||
contributors may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
105
vendor/github.com/gonum/blas/blas.go → vendor/gonum.org/v1/gonum/blas/blas.go
generated
vendored
105
vendor/github.com/gonum/blas/blas.go → vendor/gonum.org/v1/gonum/blas/blas.go
generated
vendored
|
|
@ -1,110 +1,9 @@
|
|||
// Copyright ©2013 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2013 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
/*
|
||||
Package blas provides interfaces for the BLAS linear algebra standard.
|
||||
//go:generate ./conversions.bash
|
||||
|
||||
All methods must perform appropriate parameter checking and panic if
|
||||
provided parameters that do not conform to the requirements specified
|
||||
by the BLAS standard.
|
||||
|
||||
Quick Reference Guide to the BLAS from http://www.netlib.org/lapack/lug/node145.html
|
||||
|
||||
This version is modified to remove the "order" option. All matrix operations are
|
||||
on row-order matrices.
|
||||
|
||||
Level 1 BLAS
|
||||
|
||||
dim scalar vector vector scalars 5-element prefixes
|
||||
struct
|
||||
|
||||
_rotg ( a, b ) S, D
|
||||
_rotmg( d1, d2, a, b ) S, D
|
||||
_rot ( n, x, incX, y, incY, c, s ) S, D
|
||||
_rotm ( n, x, incX, y, incY, param ) S, D
|
||||
_swap ( n, x, incX, y, incY ) S, D, C, Z
|
||||
_scal ( n, alpha, x, incX ) S, D, C, Z, Cs, Zd
|
||||
_copy ( n, x, incX, y, incY ) S, D, C, Z
|
||||
_axpy ( n, alpha, x, incX, y, incY ) S, D, C, Z
|
||||
_dot ( n, x, incX, y, incY ) S, D, Ds
|
||||
_dotu ( n, x, incX, y, incY ) C, Z
|
||||
_dotc ( n, x, incX, y, incY ) C, Z
|
||||
__dot ( n, alpha, x, incX, y, incY ) Sds
|
||||
_nrm2 ( n, x, incX ) S, D, Sc, Dz
|
||||
_asum ( n, x, incX ) S, D, Sc, Dz
|
||||
I_amax( n, x, incX ) s, d, c, z
|
||||
|
||||
Level 2 BLAS
|
||||
|
||||
options dim b-width scalar matrix vector scalar vector prefixes
|
||||
|
||||
_gemv ( trans, m, n, alpha, a, lda, x, incX, beta, y, incY ) S, D, C, Z
|
||||
_gbmv ( trans, m, n, kL, kU, alpha, a, lda, x, incX, beta, y, incY ) S, D, C, Z
|
||||
_hemv ( uplo, n, alpha, a, lda, x, incX, beta, y, incY ) C, Z
|
||||
_hbmv ( uplo, n, k, alpha, a, lda, x, incX, beta, y, incY ) C, Z
|
||||
_hpmv ( uplo, n, alpha, ap, x, incX, beta, y, incY ) C, Z
|
||||
_symv ( uplo, n, alpha, a, lda, x, incX, beta, y, incY ) S, D
|
||||
_sbmv ( uplo, n, k, alpha, a, lda, x, incX, beta, y, incY ) S, D
|
||||
_spmv ( uplo, n, alpha, ap, x, incX, beta, y, incY ) S, D
|
||||
_trmv ( uplo, trans, diag, n, a, lda, x, incX ) S, D, C, Z
|
||||
_tbmv ( uplo, trans, diag, n, k, a, lda, x, incX ) S, D, C, Z
|
||||
_tpmv ( uplo, trans, diag, n, ap, x, incX ) S, D, C, Z
|
||||
_trsv ( uplo, trans, diag, n, a, lda, x, incX ) S, D, C, Z
|
||||
_tbsv ( uplo, trans, diag, n, k, a, lda, x, incX ) S, D, C, Z
|
||||
_tpsv ( uplo, trans, diag, n, ap, x, incX ) S, D, C, Z
|
||||
|
||||
options dim scalar vector vector matrix prefixes
|
||||
|
||||
_ger ( m, n, alpha, x, incX, y, incY, a, lda ) S, D
|
||||
_geru ( m, n, alpha, x, incX, y, incY, a, lda ) C, Z
|
||||
_gerc ( m, n, alpha, x, incX, y, incY, a, lda ) C, Z
|
||||
_her ( uplo, n, alpha, x, incX, a, lda ) C, Z
|
||||
_hpr ( uplo, n, alpha, x, incX, ap ) C, Z
|
||||
_her2 ( uplo, n, alpha, x, incX, y, incY, a, lda ) C, Z
|
||||
_hpr2 ( uplo, n, alpha, x, incX, y, incY, ap ) C, Z
|
||||
_syr ( uplo, n, alpha, x, incX, a, lda ) S, D
|
||||
_spr ( uplo, n, alpha, x, incX, ap ) S, D
|
||||
_syr2 ( uplo, n, alpha, x, incX, y, incY, a, lda ) S, D
|
||||
_spr2 ( uplo, n, alpha, x, incX, y, incY, ap ) S, D
|
||||
|
||||
Level 3 BLAS
|
||||
|
||||
options dim scalar matrix matrix scalar matrix prefixes
|
||||
|
||||
_gemm ( transA, transB, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) S, D, C, Z
|
||||
_symm ( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc ) S, D, C, Z
|
||||
_hemm ( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc ) C, Z
|
||||
_syrk ( uplo, trans, n, k, alpha, a, lda, beta, c, ldc ) S, D, C, Z
|
||||
_herk ( uplo, trans, n, k, alpha, a, lda, beta, c, ldc ) C, Z
|
||||
_syr2k( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) S, D, C, Z
|
||||
_her2k( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) C, Z
|
||||
_trmm ( side, uplo, transA, diag, m, n, alpha, a, lda, b, ldb ) S, D, C, Z
|
||||
_trsm ( side, uplo, transA, diag, m, n, alpha, a, lda, b, ldb ) S, D, C, Z
|
||||
|
||||
Meaning of prefixes
|
||||
|
||||
S - float32 C - complex64
|
||||
D - float64 Z - complex128
|
||||
|
||||
Matrix types
|
||||
|
||||
GE - GEneral GB - General Band
|
||||
SY - SYmmetric SB - Symmetric Band SP - Symmetric Packed
|
||||
HE - HErmitian HB - Hermitian Band HP - Hermitian Packed
|
||||
TR - TRiangular TB - Triangular Band TP - Triangular Packed
|
||||
|
||||
Options
|
||||
|
||||
trans = NoTrans, Trans, ConjTrans
|
||||
uplo = Upper, Lower
|
||||
diag = Nonunit, Unit
|
||||
side = Left, Right (A or op(A) on the left, or A or op(A) on the right)
|
||||
|
||||
For real matrices, Trans and ConjTrans have the same meaning.
|
||||
For Hermitian matrices, trans = Trans is not allowed.
|
||||
For complex symmetric matrices, trans = ConjTrans is not allowed.
|
||||
*/
|
||||
package blas
|
||||
|
||||
// Flag constants indicate Givens transformation H matrix state.
|
||||
|
|
@ -1,16 +1,15 @@
|
|||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package blas64 provides a simple interface to the float64 BLAS API.
|
||||
package blas64
|
||||
|
||||
import (
|
||||
"github.com/gonum/blas"
|
||||
"github.com/gonum/blas/native"
|
||||
"gonum.org/v1/gonum/blas"
|
||||
"gonum.org/v1/gonum/blas/gonum"
|
||||
)
|
||||
|
||||
var blas64 blas.Float64 = native.Implementation{}
|
||||
var blas64 blas.Float64 = gonum.Implementation{}
|
||||
|
||||
// Use sets the BLAS float64 implementation to be used by subsequent BLAS calls.
|
||||
// The default implementation is native.Implementation.
|
||||
|
|
@ -0,0 +1,277 @@
|
|||
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package blas64
|
||||
|
||||
import "gonum.org/v1/gonum/blas"
|
||||
|
||||
// GeneralCols represents a matrix using the conventional column-major storage scheme.
|
||||
type GeneralCols General
|
||||
|
||||
// From fills the receiver with elements from a. The receiver
|
||||
// must have the same dimensions as a and have adequate backing
|
||||
// data storage.
|
||||
func (t GeneralCols) From(a General) {
|
||||
if t.Rows != a.Rows || t.Cols != a.Cols {
|
||||
panic("blas64: mismatched dimension")
|
||||
}
|
||||
if len(t.Data) < (t.Cols-1)*t.Stride+t.Rows {
|
||||
panic("blas64: short data slice")
|
||||
}
|
||||
for i := 0; i < a.Rows; i++ {
|
||||
for j, v := range a.Data[i*a.Stride : i*a.Stride+a.Cols] {
|
||||
t.Data[i+j*t.Stride] = v
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// From fills the receiver with elements from a. The receiver
|
||||
// must have the same dimensions as a and have adequate backing
|
||||
// data storage.
|
||||
func (t General) From(a GeneralCols) {
|
||||
if t.Rows != a.Rows || t.Cols != a.Cols {
|
||||
panic("blas64: mismatched dimension")
|
||||
}
|
||||
if len(t.Data) < (t.Rows-1)*t.Stride+t.Cols {
|
||||
panic("blas64: short data slice")
|
||||
}
|
||||
for j := 0; j < a.Cols; j++ {
|
||||
for i, v := range a.Data[j*a.Stride : j*a.Stride+a.Rows] {
|
||||
t.Data[i*t.Stride+j] = v
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TriangularCols represents a matrix using the conventional column-major storage scheme.
|
||||
type TriangularCols Triangular
|
||||
|
||||
// From fills the receiver with elements from a. The receiver
|
||||
// must have the same dimensions, uplo and diag as a and have
|
||||
// adequate backing data storage.
|
||||
func (t TriangularCols) From(a Triangular) {
|
||||
if t.N != a.N {
|
||||
panic("blas64: mismatched dimension")
|
||||
}
|
||||
if t.Uplo != a.Uplo {
|
||||
panic("blas64: mismatched BLAS uplo")
|
||||
}
|
||||
if t.Diag != a.Diag {
|
||||
panic("blas64: mismatched BLAS diag")
|
||||
}
|
||||
switch a.Uplo {
|
||||
default:
|
||||
panic("blas64: bad BLAS uplo")
|
||||
case blas.Upper:
|
||||
for i := 0; i < a.N; i++ {
|
||||
for j := i; j < a.N; j++ {
|
||||
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
|
||||
}
|
||||
}
|
||||
case blas.Lower:
|
||||
for i := 0; i < a.N; i++ {
|
||||
for j := 0; j <= i; j++ {
|
||||
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
|
||||
}
|
||||
}
|
||||
case blas.All:
|
||||
for i := 0; i < a.N; i++ {
|
||||
for j := 0; j < a.N; j++ {
|
||||
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// From fills the receiver with elements from a. The receiver
|
||||
// must have the same dimensions, uplo and diag as a and have
|
||||
// adequate backing data storage.
|
||||
func (t Triangular) From(a TriangularCols) {
|
||||
if t.N != a.N {
|
||||
panic("blas64: mismatched dimension")
|
||||
}
|
||||
if t.Uplo != a.Uplo {
|
||||
panic("blas64: mismatched BLAS uplo")
|
||||
}
|
||||
if t.Diag != a.Diag {
|
||||
panic("blas64: mismatched BLAS diag")
|
||||
}
|
||||
switch a.Uplo {
|
||||
default:
|
||||
panic("blas64: bad BLAS uplo")
|
||||
case blas.Upper:
|
||||
for i := 0; i < a.N; i++ {
|
||||
for j := i; j < a.N; j++ {
|
||||
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
|
||||
}
|
||||
}
|
||||
case blas.Lower:
|
||||
for i := 0; i < a.N; i++ {
|
||||
for j := 0; j <= i; j++ {
|
||||
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
|
||||
}
|
||||
}
|
||||
case blas.All:
|
||||
for i := 0; i < a.N; i++ {
|
||||
for j := 0; j < a.N; j++ {
|
||||
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// BandCols represents a matrix using the band column-major storage scheme.
|
||||
type BandCols Band
|
||||
|
||||
// From fills the receiver with elements from a. The receiver
|
||||
// must have the same dimensions and bandwidth as a and have
|
||||
// adequate backing data storage.
|
||||
func (t BandCols) From(a Band) {
|
||||
if t.Rows != a.Rows || t.Cols != a.Cols {
|
||||
panic("blas64: mismatched dimension")
|
||||
}
|
||||
if t.KL != a.KL || t.KU != a.KU {
|
||||
panic("blas64: mismatched bandwidth")
|
||||
}
|
||||
if a.Stride < a.KL+a.KU+1 {
|
||||
panic("blas64: short stride for source")
|
||||
}
|
||||
if t.Stride < t.KL+t.KU+1 {
|
||||
panic("blas64: short stride for destination")
|
||||
}
|
||||
for i := 0; i < a.Rows; i++ {
|
||||
for j := max(0, i-a.KL); j < min(i+a.KU+1, a.Cols); j++ {
|
||||
t.Data[i+t.KU-j+j*t.Stride] = a.Data[j+a.KL-i+i*a.Stride]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// From fills the receiver with elements from a. The receiver
|
||||
// must have the same dimensions and bandwidth as a and have
|
||||
// adequate backing data storage.
|
||||
func (t Band) From(a BandCols) {
|
||||
if t.Rows != a.Rows || t.Cols != a.Cols {
|
||||
panic("blas64: mismatched dimension")
|
||||
}
|
||||
if t.KL != a.KL || t.KU != a.KU {
|
||||
panic("blas64: mismatched bandwidth")
|
||||
}
|
||||
if a.Stride < a.KL+a.KU+1 {
|
||||
panic("blas64: short stride for source")
|
||||
}
|
||||
if t.Stride < t.KL+t.KU+1 {
|
||||
panic("blas64: short stride for destination")
|
||||
}
|
||||
for j := 0; j < a.Cols; j++ {
|
||||
for i := max(0, j-a.KU); i < min(j+a.KL+1, a.Rows); i++ {
|
||||
t.Data[j+a.KL-i+i*a.Stride] = a.Data[i+t.KU-j+j*t.Stride]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TriangularBandCols represents a symmetric matrix using the band column-major storage scheme.
|
||||
type TriangularBandCols TriangularBand
|
||||
|
||||
// From fills the receiver with elements from a. The receiver
|
||||
// must have the same dimensions, bandwidth and uplo as a and
|
||||
// have adequate backing data storage.
|
||||
func (t TriangularBandCols) From(a TriangularBand) {
|
||||
if t.N != a.N {
|
||||
panic("blas64: mismatched dimension")
|
||||
}
|
||||
if t.K != a.K {
|
||||
panic("blas64: mismatched bandwidth")
|
||||
}
|
||||
if a.Stride < a.K+1 {
|
||||
panic("blas64: short stride for source")
|
||||
}
|
||||
if t.Stride < t.K+1 {
|
||||
panic("blas64: short stride for destination")
|
||||
}
|
||||
if t.Uplo != a.Uplo {
|
||||
panic("blas64: mismatched BLAS uplo")
|
||||
}
|
||||
if t.Diag != a.Diag {
|
||||
panic("blas64: mismatched BLAS diag")
|
||||
}
|
||||
dst := BandCols{
|
||||
Rows: t.N, Cols: t.N,
|
||||
Stride: t.Stride,
|
||||
Data: t.Data,
|
||||
}
|
||||
src := Band{
|
||||
Rows: a.N, Cols: a.N,
|
||||
Stride: a.Stride,
|
||||
Data: a.Data,
|
||||
}
|
||||
switch a.Uplo {
|
||||
default:
|
||||
panic("blas64: bad BLAS uplo")
|
||||
case blas.Upper:
|
||||
dst.KU = t.K
|
||||
src.KU = a.K
|
||||
case blas.Lower:
|
||||
dst.KL = t.K
|
||||
src.KL = a.K
|
||||
}
|
||||
dst.From(src)
|
||||
}
|
||||
|
||||
// From fills the receiver with elements from a. The receiver
|
||||
// must have the same dimensions, bandwidth and uplo as a and
|
||||
// have adequate backing data storage.
|
||||
func (t TriangularBand) From(a TriangularBandCols) {
|
||||
if t.N != a.N {
|
||||
panic("blas64: mismatched dimension")
|
||||
}
|
||||
if t.K != a.K {
|
||||
panic("blas64: mismatched bandwidth")
|
||||
}
|
||||
if a.Stride < a.K+1 {
|
||||
panic("blas64: short stride for source")
|
||||
}
|
||||
if t.Stride < t.K+1 {
|
||||
panic("blas64: short stride for destination")
|
||||
}
|
||||
if t.Uplo != a.Uplo {
|
||||
panic("blas64: mismatched BLAS uplo")
|
||||
}
|
||||
if t.Diag != a.Diag {
|
||||
panic("blas64: mismatched BLAS diag")
|
||||
}
|
||||
dst := Band{
|
||||
Rows: t.N, Cols: t.N,
|
||||
Stride: t.Stride,
|
||||
Data: t.Data,
|
||||
}
|
||||
src := BandCols{
|
||||
Rows: a.N, Cols: a.N,
|
||||
Stride: a.Stride,
|
||||
Data: a.Data,
|
||||
}
|
||||
switch a.Uplo {
|
||||
default:
|
||||
panic("blas64: bad BLAS uplo")
|
||||
case blas.Upper:
|
||||
dst.KU = t.K
|
||||
src.KU = a.K
|
||||
case blas.Lower:
|
||||
dst.KL = t.K
|
||||
src.KL = a.K
|
||||
}
|
||||
dst.From(src)
|
||||
}
|
||||
|
||||
func min(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func max(a, b int) int {
|
||||
if a > b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
|
@ -0,0 +1,153 @@
|
|||
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package blas64
|
||||
|
||||
import "gonum.org/v1/gonum/blas"
|
||||
|
||||
// SymmetricCols represents a matrix using the conventional column-major storage scheme.
|
||||
type SymmetricCols Symmetric
|
||||
|
||||
// From fills the receiver with elements from a. The receiver
|
||||
// must have the same dimensions and uplo as a and have adequate
|
||||
// backing data storage.
|
||||
func (t SymmetricCols) From(a Symmetric) {
|
||||
if t.N != a.N {
|
||||
panic("blas64: mismatched dimension")
|
||||
}
|
||||
if t.Uplo != a.Uplo {
|
||||
panic("blas64: mismatched BLAS uplo")
|
||||
}
|
||||
switch a.Uplo {
|
||||
default:
|
||||
panic("blas64: bad BLAS uplo")
|
||||
case blas.Upper:
|
||||
for i := 0; i < a.N; i++ {
|
||||
for j := i; j < a.N; j++ {
|
||||
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
|
||||
}
|
||||
}
|
||||
case blas.Lower:
|
||||
for i := 0; i < a.N; i++ {
|
||||
for j := 0; j <= i; j++ {
|
||||
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// From fills the receiver with elements from a. The receiver
|
||||
// must have the same dimensions and uplo as a and have adequate
|
||||
// backing data storage.
|
||||
func (t Symmetric) From(a SymmetricCols) {
|
||||
if t.N != a.N {
|
||||
panic("blas64: mismatched dimension")
|
||||
}
|
||||
if t.Uplo != a.Uplo {
|
||||
panic("blas64: mismatched BLAS uplo")
|
||||
}
|
||||
switch a.Uplo {
|
||||
default:
|
||||
panic("blas64: bad BLAS uplo")
|
||||
case blas.Upper:
|
||||
for i := 0; i < a.N; i++ {
|
||||
for j := i; j < a.N; j++ {
|
||||
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
|
||||
}
|
||||
}
|
||||
case blas.Lower:
|
||||
for i := 0; i < a.N; i++ {
|
||||
for j := 0; j <= i; j++ {
|
||||
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// SymmetricBandCols represents a symmetric matrix using the band column-major storage scheme.
|
||||
type SymmetricBandCols SymmetricBand
|
||||
|
||||
// From fills the receiver with elements from a. The receiver
|
||||
// must have the same dimensions, bandwidth and uplo as a and
|
||||
// have adequate backing data storage.
|
||||
func (t SymmetricBandCols) From(a SymmetricBand) {
|
||||
if t.N != a.N {
|
||||
panic("blas64: mismatched dimension")
|
||||
}
|
||||
if t.K != a.K {
|
||||
panic("blas64: mismatched bandwidth")
|
||||
}
|
||||
if a.Stride < a.K+1 {
|
||||
panic("blas64: short stride for source")
|
||||
}
|
||||
if t.Stride < t.K+1 {
|
||||
panic("blas64: short stride for destination")
|
||||
}
|
||||
if t.Uplo != a.Uplo {
|
||||
panic("blas64: mismatched BLAS uplo")
|
||||
}
|
||||
dst := BandCols{
|
||||
Rows: t.N, Cols: t.N,
|
||||
Stride: t.Stride,
|
||||
Data: t.Data,
|
||||
}
|
||||
src := Band{
|
||||
Rows: a.N, Cols: a.N,
|
||||
Stride: a.Stride,
|
||||
Data: a.Data,
|
||||
}
|
||||
switch a.Uplo {
|
||||
default:
|
||||
panic("blas64: bad BLAS uplo")
|
||||
case blas.Upper:
|
||||
dst.KU = t.K
|
||||
src.KU = a.K
|
||||
case blas.Lower:
|
||||
dst.KL = t.K
|
||||
src.KL = a.K
|
||||
}
|
||||
dst.From(src)
|
||||
}
|
||||
|
||||
// From fills the receiver with elements from a. The receiver
|
||||
// must have the same dimensions, bandwidth and uplo as a and
|
||||
// have adequate backing data storage.
|
||||
func (t SymmetricBand) From(a SymmetricBandCols) {
|
||||
if t.N != a.N {
|
||||
panic("blas64: mismatched dimension")
|
||||
}
|
||||
if t.K != a.K {
|
||||
panic("blas64: mismatched bandwidth")
|
||||
}
|
||||
if a.Stride < a.K+1 {
|
||||
panic("blas64: short stride for source")
|
||||
}
|
||||
if t.Stride < t.K+1 {
|
||||
panic("blas64: short stride for destination")
|
||||
}
|
||||
if t.Uplo != a.Uplo {
|
||||
panic("blas64: mismatched BLAS uplo")
|
||||
}
|
||||
dst := Band{
|
||||
Rows: t.N, Cols: t.N,
|
||||
Stride: t.Stride,
|
||||
Data: t.Data,
|
||||
}
|
||||
src := BandCols{
|
||||
Rows: a.N, Cols: a.N,
|
||||
Stride: a.Stride,
|
||||
Data: a.Data,
|
||||
}
|
||||
switch a.Uplo {
|
||||
default:
|
||||
panic("blas64: bad BLAS uplo")
|
||||
case blas.Upper:
|
||||
dst.KU = t.K
|
||||
src.KU = a.K
|
||||
case blas.Lower:
|
||||
dst.KL = t.K
|
||||
src.KL = a.K
|
||||
}
|
||||
dst.From(src)
|
||||
}
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package blas64 provides a simple interface to the float64 BLAS API.
|
||||
package blas64 // import "gonum.org/v1/gonum/blas/blas64"
|
||||
|
|
@ -0,0 +1,108 @@
|
|||
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
/*
|
||||
Package blas provides interfaces for the BLAS linear algebra standard.
|
||||
|
||||
All methods must perform appropriate parameter checking and panic if
|
||||
provided parameters that do not conform to the requirements specified
|
||||
by the BLAS standard.
|
||||
|
||||
Quick Reference Guide to the BLAS from http://www.netlib.org/lapack/lug/node145.html
|
||||
|
||||
This version is modified to remove the "order" option. All matrix operations are
|
||||
on row-order matrices.
|
||||
|
||||
Level 1 BLAS
|
||||
|
||||
dim scalar vector vector scalars 5-element prefixes
|
||||
struct
|
||||
|
||||
_rotg ( a, b ) S, D
|
||||
_rotmg( d1, d2, a, b ) S, D
|
||||
_rot ( n, x, incX, y, incY, c, s ) S, D
|
||||
_rotm ( n, x, incX, y, incY, param ) S, D
|
||||
_swap ( n, x, incX, y, incY ) S, D, C, Z
|
||||
_scal ( n, alpha, x, incX ) S, D, C, Z, Cs, Zd
|
||||
_copy ( n, x, incX, y, incY ) S, D, C, Z
|
||||
_axpy ( n, alpha, x, incX, y, incY ) S, D, C, Z
|
||||
_dot ( n, x, incX, y, incY ) S, D, Ds
|
||||
_dotu ( n, x, incX, y, incY ) C, Z
|
||||
_dotc ( n, x, incX, y, incY ) C, Z
|
||||
__dot ( n, alpha, x, incX, y, incY ) Sds
|
||||
_nrm2 ( n, x, incX ) S, D, Sc, Dz
|
||||
_asum ( n, x, incX ) S, D, Sc, Dz
|
||||
I_amax( n, x, incX ) s, d, c, z
|
||||
|
||||
Level 2 BLAS
|
||||
|
||||
options dim b-width scalar matrix vector scalar vector prefixes
|
||||
|
||||
_gemv ( trans, m, n, alpha, a, lda, x, incX, beta, y, incY ) S, D, C, Z
|
||||
_gbmv ( trans, m, n, kL, kU, alpha, a, lda, x, incX, beta, y, incY ) S, D, C, Z
|
||||
_hemv ( uplo, n, alpha, a, lda, x, incX, beta, y, incY ) C, Z
|
||||
_hbmv ( uplo, n, k, alpha, a, lda, x, incX, beta, y, incY ) C, Z
|
||||
_hpmv ( uplo, n, alpha, ap, x, incX, beta, y, incY ) C, Z
|
||||
_symv ( uplo, n, alpha, a, lda, x, incX, beta, y, incY ) S, D
|
||||
_sbmv ( uplo, n, k, alpha, a, lda, x, incX, beta, y, incY ) S, D
|
||||
_spmv ( uplo, n, alpha, ap, x, incX, beta, y, incY ) S, D
|
||||
_trmv ( uplo, trans, diag, n, a, lda, x, incX ) S, D, C, Z
|
||||
_tbmv ( uplo, trans, diag, n, k, a, lda, x, incX ) S, D, C, Z
|
||||
_tpmv ( uplo, trans, diag, n, ap, x, incX ) S, D, C, Z
|
||||
_trsv ( uplo, trans, diag, n, a, lda, x, incX ) S, D, C, Z
|
||||
_tbsv ( uplo, trans, diag, n, k, a, lda, x, incX ) S, D, C, Z
|
||||
_tpsv ( uplo, trans, diag, n, ap, x, incX ) S, D, C, Z
|
||||
|
||||
options dim scalar vector vector matrix prefixes
|
||||
|
||||
_ger ( m, n, alpha, x, incX, y, incY, a, lda ) S, D
|
||||
_geru ( m, n, alpha, x, incX, y, incY, a, lda ) C, Z
|
||||
_gerc ( m, n, alpha, x, incX, y, incY, a, lda ) C, Z
|
||||
_her ( uplo, n, alpha, x, incX, a, lda ) C, Z
|
||||
_hpr ( uplo, n, alpha, x, incX, ap ) C, Z
|
||||
_her2 ( uplo, n, alpha, x, incX, y, incY, a, lda ) C, Z
|
||||
_hpr2 ( uplo, n, alpha, x, incX, y, incY, ap ) C, Z
|
||||
_syr ( uplo, n, alpha, x, incX, a, lda ) S, D
|
||||
_spr ( uplo, n, alpha, x, incX, ap ) S, D
|
||||
_syr2 ( uplo, n, alpha, x, incX, y, incY, a, lda ) S, D
|
||||
_spr2 ( uplo, n, alpha, x, incX, y, incY, ap ) S, D
|
||||
|
||||
Level 3 BLAS
|
||||
|
||||
options dim scalar matrix matrix scalar matrix prefixes
|
||||
|
||||
_gemm ( transA, transB, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) S, D, C, Z
|
||||
_symm ( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc ) S, D, C, Z
|
||||
_hemm ( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc ) C, Z
|
||||
_syrk ( uplo, trans, n, k, alpha, a, lda, beta, c, ldc ) S, D, C, Z
|
||||
_herk ( uplo, trans, n, k, alpha, a, lda, beta, c, ldc ) C, Z
|
||||
_syr2k( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) S, D, C, Z
|
||||
_her2k( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) C, Z
|
||||
_trmm ( side, uplo, transA, diag, m, n, alpha, a, lda, b, ldb ) S, D, C, Z
|
||||
_trsm ( side, uplo, transA, diag, m, n, alpha, a, lda, b, ldb ) S, D, C, Z
|
||||
|
||||
Meaning of prefixes
|
||||
|
||||
S - float32 C - complex64
|
||||
D - float64 Z - complex128
|
||||
|
||||
Matrix types
|
||||
|
||||
GE - GEneral GB - General Band
|
||||
SY - SYmmetric SB - Symmetric Band SP - Symmetric Packed
|
||||
HE - HErmitian HB - Hermitian Band HP - Hermitian Packed
|
||||
TR - TRiangular TB - Triangular Band TP - Triangular Packed
|
||||
|
||||
Options
|
||||
|
||||
trans = NoTrans, Trans, ConjTrans
|
||||
uplo = Upper, Lower
|
||||
diag = Nonunit, Unit
|
||||
side = Left, Right (A or op(A) on the left, or A or op(A) on the right)
|
||||
|
||||
For real matrices, Trans and ConjTrans have the same meaning.
|
||||
For Hermitian matrices, trans = Trans is not allowed.
|
||||
For complex symmetric matrices, trans = ConjTrans is not allowed.
|
||||
*/
|
||||
package blas // import "gonum.org/v1/gonum/blas"
|
||||
|
|
@ -0,0 +1,185 @@
|
|||
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package gonum
|
||||
|
||||
import "gonum.org/v1/gonum/blas"
|
||||
|
||||
var (
|
||||
_ blas.Complex64 = Implementation{}
|
||||
_ blas.Complex128 = Implementation{}
|
||||
)
|
||||
|
||||
// TODO(btracey): Replace this as complex routines are added, and instead
|
||||
// automatically generate the complex64 routines from the complex128 ones.
|
||||
|
||||
var noComplex = "native: implementation does not implement this routine, see the cgo wrapper in gonum.org/v1/netlib/blas"
|
||||
|
||||
// Level 1 complex64 routines.
|
||||
|
||||
func (Implementation) Cdotu(n int, x []complex64, incX int, y []complex64, incY int) (dotu complex64) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Cdotc(n int, x []complex64, incX int, y []complex64, incY int) (dotc complex64) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Scnrm2(n int, x []complex64, incX int) float32 {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Scasum(n int, x []complex64, incX int) float32 {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Icamax(n int, x []complex64, incX int) int {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Cswap(n int, x []complex64, incX int, y []complex64, incY int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Ccopy(n int, x []complex64, incX int, y []complex64, incY int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Caxpy(n int, alpha complex64, x []complex64, incX int, y []complex64, incY int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Cscal(n int, alpha complex64, x []complex64, incX int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Csscal(n int, alpha float32, x []complex64, incX int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
|
||||
// Level 2 complex64 routines.
|
||||
|
||||
func (Implementation) Cgemv(tA blas.Transpose, m, n int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Cgbmv(tA blas.Transpose, m, n, kL, kU int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Ctrmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []complex64, lda int, x []complex64, incX int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Ctbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex64, lda int, x []complex64, incX int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Ctpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []complex64, x []complex64, incX int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Ctrsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []complex64, lda int, x []complex64, incX int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Ctbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex64, lda int, x []complex64, incX int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Ctpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []complex64, x []complex64, incX int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Chemv(ul blas.Uplo, n int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Chbmv(ul blas.Uplo, n, k int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Chpmv(ul blas.Uplo, n int, alpha complex64, ap []complex64, x []complex64, incX int, beta complex64, y []complex64, incY int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Cgeru(m, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Cgerc(m, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Cher(ul blas.Uplo, n int, alpha float32, x []complex64, incX int, a []complex64, lda int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Chpr(ul blas.Uplo, n int, alpha float32, x []complex64, incX int, a []complex64) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Cher2(ul blas.Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Chpr2(ul blas.Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, ap []complex64) {
|
||||
panic(noComplex)
|
||||
}
|
||||
|
||||
// Level 3 complex64 routines.
|
||||
|
||||
func (Implementation) Cgemm(tA, tB blas.Transpose, m, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Csymm(s blas.Side, ul blas.Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Csyrk(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, beta complex64, c []complex64, ldc int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Csyr2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Ctrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Ctrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Chemm(s blas.Side, ul blas.Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Cherk(ul blas.Uplo, t blas.Transpose, n, k int, alpha float32, a []complex64, lda int, beta float32, c []complex64, ldc int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Cher2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta float32, c []complex64, ldc int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
|
||||
// Level 2 complex128 routines.
|
||||
|
||||
func (Implementation) Zgbmv(tA blas.Transpose, m, n int, kL int, kU int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Ztbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex128, lda int, x []complex128, incX int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Ztpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []complex128, x []complex128, incX int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Ztbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex128, lda int, x []complex128, incX int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Ztpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []complex128, x []complex128, incX int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Zhbmv(ul blas.Uplo, n, k int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
|
||||
// Level 3 complex128 routines.
|
||||
|
||||
func (Implementation) Zgemm(tA, tB blas.Transpose, m, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Zsymm(s blas.Side, ul blas.Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Zsyrk(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, beta complex128, c []complex128, ldc int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Zsyr2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Ztrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Ztrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Zhemm(s blas.Side, ul blas.Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Zherk(ul blas.Uplo, t blas.Transpose, n, k int, alpha float64, a []complex128, lda int, beta float64, c []complex128, ldc int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
func (Implementation) Zher2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta float64, c []complex128, ldc int) {
|
||||
panic(noComplex)
|
||||
}
|
||||
|
|
@ -1,15 +1,15 @@
|
|||
// Copyright ©2014 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package native
|
||||
package gonum
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"sync"
|
||||
|
||||
"github.com/gonum/blas"
|
||||
"github.com/gonum/internal/asm/f64"
|
||||
"gonum.org/v1/gonum/blas"
|
||||
"gonum.org/v1/gonum/internal/asm/f64"
|
||||
)
|
||||
|
||||
// Dgemm computes
|
||||
|
|
@ -25,17 +25,17 @@ func (Implementation) Dgemm(tA, tB blas.Transpose, m, n, k int, alpha float64, a
|
|||
}
|
||||
aTrans := tA == blas.Trans || tA == blas.ConjTrans
|
||||
if aTrans {
|
||||
checkMatrix64(k, m, a, lda)
|
||||
checkDMatrix('a', k, m, a, lda)
|
||||
} else {
|
||||
checkMatrix64(m, k, a, lda)
|
||||
checkDMatrix('a', m, k, a, lda)
|
||||
}
|
||||
bTrans := tB == blas.Trans || tB == blas.ConjTrans
|
||||
if bTrans {
|
||||
checkMatrix64(n, k, b, ldb)
|
||||
checkDMatrix('b', n, k, b, ldb)
|
||||
} else {
|
||||
checkMatrix64(k, n, b, ldb)
|
||||
checkDMatrix('b', k, n, b, ldb)
|
||||
}
|
||||
checkMatrix64(m, n, c, ldc)
|
||||
checkDMatrix('c', m, n, c, ldc)
|
||||
|
||||
// scale c
|
||||
if beta != 1 {
|
||||
|
|
@ -121,7 +121,7 @@ func dgemmParallel(aTrans, bTrans bool, m, n, k int, a []float64, lda int, b []f
|
|||
go func() {
|
||||
defer wg.Done()
|
||||
// Make local copies of otherwise global variables to reduce shared memory.
|
||||
// This has a noticable effect on benchmarks in some cases.
|
||||
// This has a noticeable effect on benchmarks in some cases.
|
||||
alpha := alpha
|
||||
aTrans := aTrans
|
||||
bTrans := bTrans
|
||||
|
|
@ -259,18 +259,3 @@ func dgemmSerialTransTrans(m, n, k int, a []float64, lda int, b []float64, ldb i
|
|||
func sliceView64(a []float64, lda, i, j, r, c int) []float64 {
|
||||
return a[i*lda+j : (i+r-1)*lda+j+c]
|
||||
}
|
||||
|
||||
func checkMatrix64(m, n int, a []float64, lda int) {
|
||||
if m < 0 {
|
||||
panic("blas: rows < 0")
|
||||
}
|
||||
if n < 0 {
|
||||
panic("blas: cols < 0")
|
||||
}
|
||||
if lda < n {
|
||||
panic("blas: illegal stride")
|
||||
}
|
||||
if len(a) < (m-1)*lda+n {
|
||||
panic("blas: insufficient matrix slice length")
|
||||
}
|
||||
}
|
||||
|
|
@ -7,12 +7,12 @@
|
|||
/*
|
||||
Package native is a Go implementation of the BLAS API. This implementation
|
||||
panics when the input arguments are invalid as per the standard, for example
|
||||
if a vector increment is zero. Please note that the treatment of NaN values
|
||||
if a vector increment is zero. Note that the treatment of NaN values
|
||||
is not specified, and differs among the BLAS implementations.
|
||||
github.com/gonum/blas/blas64 provides helpful wrapper functions to the BLAS
|
||||
gonum.org/v1/gonum/blas/blas64 provides helpful wrapper functions to the BLAS
|
||||
interface. The rest of this text describes the layout of the data for the input types.
|
||||
|
||||
Please note that in the function documentation, x[i] refers to the i^th element
|
||||
Note that in the function documentation, x[i] refers to the i^th element
|
||||
of the vector, which will be different from the i^th element of the slice if
|
||||
incX != 1.
|
||||
|
||||
|
|
@ -85,4 +85,4 @@ which is given to the BLAS routine as [∗ 1 2 3 4 ...].
|
|||
See http://www.crest.iu.edu/research/mtl/reference/html/banded.html
|
||||
for more information
|
||||
*/
|
||||
package native
|
||||
package gonum // import "gonum.org/v1/gonum/blas/gonum"
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package gonum
|
||||
|
||||
import (
|
||||
"math"
|
||||
)
|
||||
|
||||
type general64 struct {
|
||||
data []float64
|
||||
rows, cols int
|
||||
stride int
|
||||
}
|
||||
|
||||
func (g general64) clone() general64 {
|
||||
data := make([]float64, len(g.data))
|
||||
copy(data, g.data)
|
||||
return general64{
|
||||
data: data,
|
||||
rows: g.rows,
|
||||
cols: g.cols,
|
||||
stride: g.stride,
|
||||
}
|
||||
}
|
||||
|
||||
func (g general64) equal(a general64) bool {
|
||||
if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
|
||||
return false
|
||||
}
|
||||
for i, v := range g.data {
|
||||
if a.data[i] != v {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (g general64) equalWithinAbs(a general64, tol float64) bool {
|
||||
if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
|
||||
return false
|
||||
}
|
||||
for i, v := range g.data {
|
||||
if math.Abs(a.data[i]-v) > tol {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
|
||||
|
||||
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package gonum
|
||||
|
||||
import (
|
||||
math "gonum.org/v1/gonum/internal/math32"
|
||||
)
|
||||
|
||||
type general32 struct {
|
||||
data []float32
|
||||
rows, cols int
|
||||
stride int
|
||||
}
|
||||
|
||||
func (g general32) clone() general32 {
|
||||
data := make([]float32, len(g.data))
|
||||
copy(data, g.data)
|
||||
return general32{
|
||||
data: data,
|
||||
rows: g.rows,
|
||||
cols: g.cols,
|
||||
stride: g.stride,
|
||||
}
|
||||
}
|
||||
|
||||
func (g general32) equal(a general32) bool {
|
||||
if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
|
||||
return false
|
||||
}
|
||||
for i, v := range g.data {
|
||||
if a.data[i] != v {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (g general32) equalWithinAbs(a general32, tol float32) bool {
|
||||
if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
|
||||
return false
|
||||
}
|
||||
for i, v := range g.data {
|
||||
if math.Abs(a.data[i]-v) > tol {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
|
@ -1,10 +1,12 @@
|
|||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate ./single_precision.bash
|
||||
|
||||
package native
|
||||
package gonum
|
||||
|
||||
import "math"
|
||||
|
||||
type Implementation struct{}
|
||||
|
||||
|
|
@ -43,9 +45,6 @@ const (
|
|||
buffMul = 4 // how big is the buffer relative to the number of workers
|
||||
)
|
||||
|
||||
// [SD]gemm debugging constant.
|
||||
const debug = false
|
||||
|
||||
// subMul is a common type shared by [SD]gemm.
|
||||
type subMul struct {
|
||||
i, j int // index of block
|
||||
|
|
@ -65,8 +64,70 @@ func min(a, b int) int {
|
|||
return a
|
||||
}
|
||||
|
||||
// blocks returns the number of divisons of the dimension length with the given
|
||||
func checkSMatrix(name byte, m, n int, a []float32, lda int) {
|
||||
if m < 0 {
|
||||
panic(mLT0)
|
||||
}
|
||||
if n < 0 {
|
||||
panic(nLT0)
|
||||
}
|
||||
if lda < n {
|
||||
panic("blas: illegal stride of " + string(name))
|
||||
}
|
||||
if len(a) < (m-1)*lda+n {
|
||||
panic("blas: index of " + string(name) + " out of range")
|
||||
}
|
||||
}
|
||||
|
||||
func checkDMatrix(name byte, m, n int, a []float64, lda int) {
|
||||
if m < 0 {
|
||||
panic(mLT0)
|
||||
}
|
||||
if n < 0 {
|
||||
panic(nLT0)
|
||||
}
|
||||
if lda < n {
|
||||
panic("blas: illegal stride of " + string(name))
|
||||
}
|
||||
if len(a) < (m-1)*lda+n {
|
||||
panic("blas: index of " + string(name) + " out of range")
|
||||
}
|
||||
}
|
||||
|
||||
func checkZMatrix(name byte, m, n int, a []complex128, lda int) {
|
||||
if m < 0 {
|
||||
panic(mLT0)
|
||||
}
|
||||
if n < 0 {
|
||||
panic(nLT0)
|
||||
}
|
||||
if lda < max(1, n) {
|
||||
panic("blas: illegal stride of " + string(name))
|
||||
}
|
||||
if len(a) < (m-1)*lda+n {
|
||||
panic("blas: insufficient " + string(name) + " matrix slice length")
|
||||
}
|
||||
}
|
||||
|
||||
func checkZVector(name byte, n int, x []complex128, incX int) {
|
||||
if n < 0 {
|
||||
panic(nLT0)
|
||||
}
|
||||
if incX == 0 {
|
||||
panic(zeroIncX)
|
||||
}
|
||||
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
|
||||
panic("blas: insufficient " + string(name) + " vector slice length")
|
||||
}
|
||||
}
|
||||
|
||||
// blocks returns the number of divisions of the dimension length with the given
|
||||
// block size.
|
||||
func blocks(dim, bsize int) int {
|
||||
return (dim + bsize - 1) / bsize
|
||||
}
|
||||
|
||||
// dcabs1 returns |real(z)|+|imag(z)|.
|
||||
func dcabs1(z complex128) float64 {
|
||||
return math.Abs(real(z)) + math.Abs(imag(z))
|
||||
}
|
||||
|
|
@ -0,0 +1,442 @@
|
|||
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package gonum
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"gonum.org/v1/gonum/internal/asm/c128"
|
||||
)
|
||||
|
||||
// Dzasum returns the sum of the absolute values of the elements of x
|
||||
// \sum_i |Re(x[i])| + |Im(x[i])|
|
||||
// Dzasum returns 0 if incX is negative.
|
||||
func (Implementation) Dzasum(n int, x []complex128, incX int) float64 {
|
||||
if n < 0 {
|
||||
panic(negativeN)
|
||||
}
|
||||
if incX < 1 {
|
||||
if incX == 0 {
|
||||
panic(zeroIncX)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
var sum float64
|
||||
if incX == 1 {
|
||||
if len(x) < n {
|
||||
panic(badX)
|
||||
}
|
||||
for _, v := range x[:n] {
|
||||
sum += dcabs1(v)
|
||||
}
|
||||
return sum
|
||||
}
|
||||
if (n-1)*incX >= len(x) {
|
||||
panic(badX)
|
||||
}
|
||||
for i := 0; i < n; i++ {
|
||||
v := x[i*incX]
|
||||
sum += dcabs1(v)
|
||||
}
|
||||
return sum
|
||||
}
|
||||
|
||||
// Dznrm2 computes the Euclidean norm of the complex vector x,
|
||||
// ‖x‖_2 = sqrt(\sum_i x[i] * conj(x[i])).
|
||||
// This function returns 0 if incX is negative.
|
||||
func (Implementation) Dznrm2(n int, x []complex128, incX int) float64 {
|
||||
if incX < 1 {
|
||||
if incX == 0 {
|
||||
panic(zeroIncX)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
if n < 1 {
|
||||
if n == 0 {
|
||||
return 0
|
||||
}
|
||||
panic(negativeN)
|
||||
}
|
||||
if (n-1)*incX >= len(x) {
|
||||
panic(badX)
|
||||
}
|
||||
var (
|
||||
scale float64
|
||||
ssq float64 = 1
|
||||
)
|
||||
if incX == 1 {
|
||||
for _, v := range x[:n] {
|
||||
re, im := math.Abs(real(v)), math.Abs(imag(v))
|
||||
if re != 0 {
|
||||
if re > scale {
|
||||
ssq = 1 + ssq*(scale/re)*(scale/re)
|
||||
scale = re
|
||||
} else {
|
||||
ssq += (re / scale) * (re / scale)
|
||||
}
|
||||
}
|
||||
if im != 0 {
|
||||
if im > scale {
|
||||
ssq = 1 + ssq*(scale/im)*(scale/im)
|
||||
scale = im
|
||||
} else {
|
||||
ssq += (im / scale) * (im / scale)
|
||||
}
|
||||
}
|
||||
}
|
||||
if math.IsInf(scale, 1) {
|
||||
return math.Inf(1)
|
||||
}
|
||||
return scale * math.Sqrt(ssq)
|
||||
}
|
||||
for ix := 0; ix < n*incX; ix += incX {
|
||||
re, im := math.Abs(real(x[ix])), math.Abs(imag(x[ix]))
|
||||
if re != 0 {
|
||||
if re > scale {
|
||||
ssq = 1 + ssq*(scale/re)*(scale/re)
|
||||
scale = re
|
||||
} else {
|
||||
ssq += (re / scale) * (re / scale)
|
||||
}
|
||||
}
|
||||
if im != 0 {
|
||||
if im > scale {
|
||||
ssq = 1 + ssq*(scale/im)*(scale/im)
|
||||
scale = im
|
||||
} else {
|
||||
ssq += (im / scale) * (im / scale)
|
||||
}
|
||||
}
|
||||
}
|
||||
if math.IsInf(scale, 1) {
|
||||
return math.Inf(1)
|
||||
}
|
||||
return scale * math.Sqrt(ssq)
|
||||
}
|
||||
|
||||
// Izamax returns the index of the first element of x having largest |Re(·)|+|Im(·)|.
|
||||
// Izamax returns -1 if n is 0 or incX is negative.
|
||||
func (Implementation) Izamax(n int, x []complex128, incX int) int {
|
||||
if incX < 1 {
|
||||
if incX == 0 {
|
||||
panic(zeroIncX)
|
||||
}
|
||||
// Return invalid index.
|
||||
return -1
|
||||
}
|
||||
if n < 1 {
|
||||
if n == 0 {
|
||||
// Return invalid index.
|
||||
return -1
|
||||
}
|
||||
panic(negativeN)
|
||||
}
|
||||
if len(x) <= (n-1)*incX {
|
||||
panic(badX)
|
||||
}
|
||||
idx := 0
|
||||
max := dcabs1(x[0])
|
||||
if incX == 1 {
|
||||
for i, v := range x[1:n] {
|
||||
absV := dcabs1(v)
|
||||
if absV > max {
|
||||
max = absV
|
||||
idx = i + 1
|
||||
}
|
||||
}
|
||||
return idx
|
||||
}
|
||||
ix := incX
|
||||
for i := 1; i < n; i++ {
|
||||
absV := dcabs1(x[ix])
|
||||
if absV > max {
|
||||
max = absV
|
||||
idx = i
|
||||
}
|
||||
ix += incX
|
||||
}
|
||||
return idx
|
||||
}
|
||||
|
||||
// Zaxpy adds alpha times x to y:
|
||||
// y[i] += alpha * x[i] for all i
|
||||
func (Implementation) Zaxpy(n int, alpha complex128, x []complex128, incX int, y []complex128, incY int) {
|
||||
if incX == 0 {
|
||||
panic(zeroIncX)
|
||||
}
|
||||
if incY == 0 {
|
||||
panic(zeroIncY)
|
||||
}
|
||||
if n < 1 {
|
||||
if n == 0 {
|
||||
return
|
||||
}
|
||||
panic(negativeN)
|
||||
}
|
||||
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
|
||||
panic(badX)
|
||||
}
|
||||
if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
|
||||
panic(badY)
|
||||
}
|
||||
if alpha == 0 {
|
||||
return
|
||||
}
|
||||
if incX == 1 && incY == 1 {
|
||||
c128.AxpyUnitary(alpha, x[:n], y[:n])
|
||||
return
|
||||
}
|
||||
var ix, iy int
|
||||
if incX < 0 {
|
||||
ix = (1 - n) * incX
|
||||
}
|
||||
if incY < 0 {
|
||||
iy = (1 - n) * incY
|
||||
}
|
||||
c128.AxpyInc(alpha, x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
|
||||
}
|
||||
|
||||
// Zcopy copies the vector x to vector y.
|
||||
func (Implementation) Zcopy(n int, x []complex128, incX int, y []complex128, incY int) {
|
||||
if incX == 0 {
|
||||
panic(zeroIncX)
|
||||
}
|
||||
if incY == 0 {
|
||||
panic(zeroIncY)
|
||||
}
|
||||
if n < 1 {
|
||||
if n == 0 {
|
||||
return
|
||||
}
|
||||
panic(negativeN)
|
||||
}
|
||||
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
|
||||
panic(badX)
|
||||
}
|
||||
if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
|
||||
panic(badY)
|
||||
}
|
||||
if incX == 1 && incY == 1 {
|
||||
copy(y[:n], x[:n])
|
||||
return
|
||||
}
|
||||
var ix, iy int
|
||||
if incX < 0 {
|
||||
ix = (-n + 1) * incX
|
||||
}
|
||||
if incY < 0 {
|
||||
iy = (-n + 1) * incY
|
||||
}
|
||||
for i := 0; i < n; i++ {
|
||||
y[iy] = x[ix]
|
||||
ix += incX
|
||||
iy += incY
|
||||
}
|
||||
}
|
||||
|
||||
// Zdotc computes the dot product
|
||||
// x^H · y
|
||||
// of two complex vectors x and y.
|
||||
func (Implementation) Zdotc(n int, x []complex128, incX int, y []complex128, incY int) complex128 {
|
||||
if incX == 0 {
|
||||
panic(zeroIncX)
|
||||
}
|
||||
if incY == 0 {
|
||||
panic(zeroIncY)
|
||||
}
|
||||
if n <= 0 {
|
||||
if n == 0 {
|
||||
return 0
|
||||
}
|
||||
panic(negativeN)
|
||||
}
|
||||
if incX == 1 && incY == 1 {
|
||||
if len(x) < n {
|
||||
panic(badX)
|
||||
}
|
||||
if len(y) < n {
|
||||
panic(badY)
|
||||
}
|
||||
return c128.DotcUnitary(x[:n], y[:n])
|
||||
}
|
||||
var ix, iy int
|
||||
if incX < 0 {
|
||||
ix = (-n + 1) * incX
|
||||
}
|
||||
if incY < 0 {
|
||||
iy = (-n + 1) * incY
|
||||
}
|
||||
if ix >= len(x) || (n-1)*incX >= len(x) {
|
||||
panic(badX)
|
||||
}
|
||||
if iy >= len(y) || (n-1)*incY >= len(y) {
|
||||
panic(badY)
|
||||
}
|
||||
return c128.DotcInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
|
||||
}
|
||||
|
||||
// Zdotu computes the dot product
|
||||
// x^T · y
|
||||
// of two complex vectors x and y.
|
||||
func (Implementation) Zdotu(n int, x []complex128, incX int, y []complex128, incY int) complex128 {
|
||||
if incX == 0 {
|
||||
panic(zeroIncX)
|
||||
}
|
||||
if incY == 0 {
|
||||
panic(zeroIncY)
|
||||
}
|
||||
if n <= 0 {
|
||||
if n == 0 {
|
||||
return 0
|
||||
}
|
||||
panic(negativeN)
|
||||
}
|
||||
if incX == 1 && incY == 1 {
|
||||
if len(x) < n {
|
||||
panic(badX)
|
||||
}
|
||||
if len(y) < n {
|
||||
panic(badY)
|
||||
}
|
||||
return c128.DotuUnitary(x[:n], y[:n])
|
||||
}
|
||||
var ix, iy int
|
||||
if incX < 0 {
|
||||
ix = (-n + 1) * incX
|
||||
}
|
||||
if incY < 0 {
|
||||
iy = (-n + 1) * incY
|
||||
}
|
||||
if ix >= len(x) || (n-1)*incX >= len(x) {
|
||||
panic(badX)
|
||||
}
|
||||
if iy >= len(y) || (n-1)*incY >= len(y) {
|
||||
panic(badY)
|
||||
}
|
||||
return c128.DotuInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
|
||||
}
|
||||
|
||||
// Zdscal scales the vector x by a real scalar alpha.
|
||||
// Zdscal has no effect if incX < 0.
|
||||
func (Implementation) Zdscal(n int, alpha float64, x []complex128, incX int) {
|
||||
if incX < 1 {
|
||||
if incX == 0 {
|
||||
panic(zeroIncX)
|
||||
}
|
||||
return
|
||||
}
|
||||
if (n-1)*incX >= len(x) {
|
||||
panic(badX)
|
||||
}
|
||||
if n < 1 {
|
||||
if n == 0 {
|
||||
return
|
||||
}
|
||||
panic(negativeN)
|
||||
}
|
||||
if alpha == 0 {
|
||||
if incX == 1 {
|
||||
x = x[:n]
|
||||
for i := range x {
|
||||
x[i] = 0
|
||||
}
|
||||
return
|
||||
}
|
||||
for ix := 0; ix < n*incX; ix += incX {
|
||||
x[ix] = 0
|
||||
}
|
||||
return
|
||||
}
|
||||
if incX == 1 {
|
||||
x = x[:n]
|
||||
for i, v := range x {
|
||||
x[i] = complex(alpha*real(v), alpha*imag(v))
|
||||
}
|
||||
return
|
||||
}
|
||||
for ix := 0; ix < n*incX; ix += incX {
|
||||
v := x[ix]
|
||||
x[ix] = complex(alpha*real(v), alpha*imag(v))
|
||||
}
|
||||
}
|
||||
|
||||
// Zscal scales the vector x by a complex scalar alpha.
|
||||
// Zscal has no effect if incX < 0.
|
||||
func (Implementation) Zscal(n int, alpha complex128, x []complex128, incX int) {
|
||||
if incX < 1 {
|
||||
if incX == 0 {
|
||||
panic(zeroIncX)
|
||||
}
|
||||
return
|
||||
}
|
||||
if (n-1)*incX >= len(x) {
|
||||
panic(badX)
|
||||
}
|
||||
if n < 1 {
|
||||
if n == 0 {
|
||||
return
|
||||
}
|
||||
panic(negativeN)
|
||||
}
|
||||
if alpha == 0 {
|
||||
if incX == 1 {
|
||||
x = x[:n]
|
||||
for i := range x {
|
||||
x[i] = 0
|
||||
}
|
||||
return
|
||||
}
|
||||
for ix := 0; ix < n*incX; ix += incX {
|
||||
x[ix] = 0
|
||||
}
|
||||
return
|
||||
}
|
||||
if incX == 1 {
|
||||
c128.ScalUnitary(alpha, x[:n])
|
||||
return
|
||||
}
|
||||
c128.ScalInc(alpha, x, uintptr(n), uintptr(incX))
|
||||
}
|
||||
|
||||
// Zswap exchanges the elements of two complex vectors x and y.
|
||||
func (Implementation) Zswap(n int, x []complex128, incX int, y []complex128, incY int) {
|
||||
if incX == 0 {
|
||||
panic(zeroIncX)
|
||||
}
|
||||
if incY == 0 {
|
||||
panic(zeroIncY)
|
||||
}
|
||||
if n < 1 {
|
||||
if n == 0 {
|
||||
return
|
||||
}
|
||||
panic(negativeN)
|
||||
}
|
||||
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
|
||||
panic(badX)
|
||||
}
|
||||
if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
|
||||
panic(badY)
|
||||
}
|
||||
if incX == 1 && incY == 1 {
|
||||
x = x[:n]
|
||||
for i, v := range x {
|
||||
x[i], y[i] = y[i], v
|
||||
}
|
||||
return
|
||||
}
|
||||
var ix, iy int
|
||||
if incX < 0 {
|
||||
ix = (-n + 1) * incX
|
||||
}
|
||||
if incY < 0 {
|
||||
iy = (-n + 1) * incY
|
||||
}
|
||||
for i := 0; i < n; i++ {
|
||||
x[ix], y[iy] = y[iy], x[ix]
|
||||
ix += incX
|
||||
iy += incY
|
||||
}
|
||||
}
|
||||
|
|
@ -1,14 +1,14 @@
|
|||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package native
|
||||
package gonum
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"github.com/gonum/blas"
|
||||
"github.com/gonum/internal/asm/f64"
|
||||
"gonum.org/v1/gonum/blas"
|
||||
"gonum.org/v1/gonum/internal/asm/f64"
|
||||
)
|
||||
|
||||
var _ blas.Float64Level1 = Implementation{}
|
||||
|
|
@ -270,13 +270,7 @@ func (Implementation) Daxpy(n int, alpha float64, x []float64, incX int, y []flo
|
|||
return
|
||||
}
|
||||
if incX == 1 && incY == 1 {
|
||||
if len(x) < n {
|
||||
panic(badLenX)
|
||||
}
|
||||
if len(y) < n {
|
||||
panic(badLenY)
|
||||
}
|
||||
f64.AxpyUnitaryTo(y, alpha, x[:n], y)
|
||||
f64.AxpyUnitary(alpha, x[:n], y[:n])
|
||||
return
|
||||
}
|
||||
var ix, iy int
|
||||
|
|
@ -286,12 +280,6 @@ func (Implementation) Daxpy(n int, alpha float64, x []float64, incX int, y []flo
|
|||
if incY < 0 {
|
||||
iy = (-n + 1) * incY
|
||||
}
|
||||
if ix >= len(x) || ix+(n-1)*incX >= len(x) {
|
||||
panic(badLenX)
|
||||
}
|
||||
if iy >= len(y) || iy+(n-1)*incY >= len(y) {
|
||||
panic(badLenY)
|
||||
}
|
||||
f64.AxpyInc(alpha, x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
|
||||
}
|
||||
|
||||
|
|
@ -565,7 +553,6 @@ func (Implementation) Drotm(n int, x []float64, incX int, y []float64, incY int,
|
|||
ix += incX
|
||||
iy += incY
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Dscal scales x by alpha.
|
||||
|
|
@ -604,7 +591,5 @@ func (Implementation) Dscal(n int, alpha float64, x []float64, incX int) {
|
|||
f64.ScalUnitary(alpha, x[:n])
|
||||
return
|
||||
}
|
||||
for ix := 0; ix < n*incX; ix += incX {
|
||||
x[ix] *= alpha
|
||||
}
|
||||
f64.ScalInc(alpha, x, uintptr(n), uintptr(incX))
|
||||
}
|
||||
|
|
@ -1,11 +1,11 @@
|
|||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package native
|
||||
package gonum
|
||||
|
||||
import (
|
||||
"github.com/gonum/internal/asm/f64"
|
||||
"gonum.org/v1/gonum/internal/asm/f64"
|
||||
)
|
||||
|
||||
// Ddot computes the dot product of the two vectors
|
||||
|
|
@ -1,16 +1,16 @@
|
|||
// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
|
||||
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
|
||||
|
||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package native
|
||||
package gonum
|
||||
|
||||
import (
|
||||
math "github.com/gonum/blas/native/internal/math32"
|
||||
math "gonum.org/v1/gonum/internal/math32"
|
||||
|
||||
"github.com/gonum/blas"
|
||||
"github.com/gonum/internal/asm/f32"
|
||||
"gonum.org/v1/gonum/blas"
|
||||
"gonum.org/v1/gonum/internal/asm/f32"
|
||||
)
|
||||
|
||||
var _ blas.Float32Level1 = Implementation{}
|
||||
|
|
@ -284,13 +284,7 @@ func (Implementation) Saxpy(n int, alpha float32, x []float32, incX int, y []flo
|
|||
return
|
||||
}
|
||||
if incX == 1 && incY == 1 {
|
||||
if len(x) < n {
|
||||
panic(badLenX)
|
||||
}
|
||||
if len(y) < n {
|
||||
panic(badLenY)
|
||||
}
|
||||
f32.AxpyUnitaryTo(y, alpha, x[:n], y)
|
||||
f32.AxpyUnitary(alpha, x[:n], y[:n])
|
||||
return
|
||||
}
|
||||
var ix, iy int
|
||||
|
|
@ -300,12 +294,6 @@ func (Implementation) Saxpy(n int, alpha float32, x []float32, incX int, y []flo
|
|||
if incY < 0 {
|
||||
iy = (-n + 1) * incY
|
||||
}
|
||||
if ix >= len(x) || ix+(n-1)*incX >= len(x) {
|
||||
panic(badLenX)
|
||||
}
|
||||
if iy >= len(y) || iy+(n-1)*incY >= len(y) {
|
||||
panic(badLenY)
|
||||
}
|
||||
f32.AxpyInc(alpha, x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
|
||||
}
|
||||
|
||||
|
|
@ -587,7 +575,6 @@ func (Implementation) Srotm(n int, x []float32, incX int, y []float32, incY int,
|
|||
ix += incX
|
||||
iy += incY
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Sscal scales x by alpha.
|
||||
|
|
@ -628,7 +615,5 @@ func (Implementation) Sscal(n int, alpha float32, x []float32, incX int) {
|
|||
f32.ScalUnitary(alpha, x[:n])
|
||||
return
|
||||
}
|
||||
for ix := 0; ix < n*incX; ix += incX {
|
||||
x[ix] *= alpha
|
||||
}
|
||||
f32.ScalInc(alpha, x, uintptr(n), uintptr(incX))
|
||||
}
|
||||
|
|
@ -1,13 +1,13 @@
|
|||
// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
|
||||
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
|
||||
|
||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package native
|
||||
package gonum
|
||||
|
||||
import (
|
||||
"github.com/gonum/internal/asm/f32"
|
||||
"gonum.org/v1/gonum/internal/asm/f32"
|
||||
)
|
||||
|
||||
// Dsdot computes the dot product of the two vectors
|
||||
|
|
@ -1,13 +1,13 @@
|
|||
// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
|
||||
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
|
||||
|
||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package native
|
||||
package gonum
|
||||
|
||||
import (
|
||||
"github.com/gonum/internal/asm/f32"
|
||||
"gonum.org/v1/gonum/internal/asm/f32"
|
||||
)
|
||||
|
||||
// Sdot computes the dot product of the two vectors
|
||||
|
|
@ -1,13 +1,13 @@
|
|||
// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
|
||||
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
|
||||
|
||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package native
|
||||
package gonum
|
||||
|
||||
import (
|
||||
"github.com/gonum/internal/asm/f32"
|
||||
"gonum.org/v1/gonum/internal/asm/f32"
|
||||
)
|
||||
|
||||
// Sdsdot computes the dot product of the two vectors plus a constant
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -1,20 +1,20 @@
|
|||
// Copyright ©2014 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package native
|
||||
package gonum
|
||||
|
||||
import (
|
||||
"github.com/gonum/blas"
|
||||
"github.com/gonum/internal/asm/f64"
|
||||
"gonum.org/v1/gonum/blas"
|
||||
"gonum.org/v1/gonum/internal/asm/f64"
|
||||
)
|
||||
|
||||
var _ blas.Float64Level2 = Implementation{}
|
||||
|
||||
// Dgemv computes
|
||||
// y = alpha * a * x + beta * y if tA = blas.NoTrans
|
||||
// y = alpha * A * x + beta * y if tA = blas.NoTrans
|
||||
// y = alpha * A^T * x + beta * y if tA = blas.Trans or blas.ConjTrans
|
||||
// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.
|
||||
// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars.
|
||||
func (Implementation) Dgemv(tA blas.Transpose, m, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) {
|
||||
if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
|
||||
panic(badTranspose)
|
||||
|
|
@ -167,21 +167,14 @@ func (Implementation) Dger(m, n int, alpha float64, x []float64, incX int, y []f
|
|||
x = x[:m]
|
||||
y = y[:n]
|
||||
for i, xv := range x {
|
||||
tmp := alpha * xv
|
||||
if tmp != 0 {
|
||||
atmp := a[i*lda : i*lda+n]
|
||||
f64.AxpyUnitaryTo(atmp, tmp, y, atmp)
|
||||
}
|
||||
f64.AxpyUnitary(alpha*xv, y, a[i*lda:i*lda+n])
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
ix := kx
|
||||
for i := 0; i < m; i++ {
|
||||
tmp := alpha * x[ix]
|
||||
if tmp != 0 {
|
||||
f64.AxpyInc(tmp, y, a[i*lda:i*lda+n], uintptr(n), uintptr(incY), 1, uintptr(ky), 0)
|
||||
}
|
||||
f64.AxpyInc(alpha*x[ix], y, a[i*lda:i*lda+n], uintptr(n), uintptr(incY), 1, uintptr(ky), 0)
|
||||
ix += incX
|
||||
}
|
||||
}
|
||||
|
|
@ -230,7 +223,7 @@ func (Implementation) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64,
|
|||
if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) {
|
||||
panic(badY)
|
||||
}
|
||||
if lda*(m-1)+kL+kU+1 > len(a) || lda < kL+kU+1 {
|
||||
if lda*(min(m, n+kL)-1)+kL+kU+1 > len(a) || lda < kL+kU+1 {
|
||||
panic(badLdA)
|
||||
}
|
||||
|
||||
|
|
@ -269,7 +262,7 @@ func (Implementation) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64,
|
|||
if tA == blas.NoTrans {
|
||||
iy := ky
|
||||
if incX == 1 {
|
||||
for i := 0; i < m; i++ {
|
||||
for i := 0; i < min(m, n+kL); i++ {
|
||||
l := max(0, kL-i)
|
||||
u := min(nCol, ld+kL-i)
|
||||
off := max(0, i-kL)
|
||||
|
|
@ -284,7 +277,7 @@ func (Implementation) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64,
|
|||
}
|
||||
return
|
||||
}
|
||||
for i := 0; i < m; i++ {
|
||||
for i := 0; i < min(m, n+kL); i++ {
|
||||
l := max(0, kL-i)
|
||||
u := min(nCol, ld+kL-i)
|
||||
off := max(0, i-kL)
|
||||
|
|
@ -301,7 +294,7 @@ func (Implementation) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64,
|
|||
return
|
||||
}
|
||||
if incX == 1 {
|
||||
for i := 0; i < m; i++ {
|
||||
for i := 0; i < min(m, n+kL); i++ {
|
||||
l := max(0, kL-i)
|
||||
u := min(nCol, ld+kL-i)
|
||||
off := max(0, i-kL)
|
||||
|
|
@ -316,7 +309,7 @@ func (Implementation) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64,
|
|||
return
|
||||
}
|
||||
ix := kx
|
||||
for i := 0; i < m; i++ {
|
||||
for i := 0; i < min(m, n+kL); i++ {
|
||||
l := max(0, kL-i)
|
||||
u := min(nCol, ld+kL-i)
|
||||
off := max(0, i-kL)
|
||||
|
|
@ -1530,7 +1523,6 @@ func (Implementation) Dsbmv(ul blas.Uplo, n, k int, alpha float64, a []float64,
|
|||
ix += incX
|
||||
iy += incY
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Dsyr performs the rank-one update
|
||||
|
|
@ -1718,7 +1710,6 @@ func (Implementation) Dsyr2(ul blas.Uplo, n int, alpha float64, x []float64, inc
|
|||
ix += incX
|
||||
iy += incY
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Dtpsv solves
|
||||
|
|
@ -1,22 +1,22 @@
|
|||
// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
|
||||
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
|
||||
|
||||
// Copyright ©2014 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package native
|
||||
package gonum
|
||||
|
||||
import (
|
||||
"github.com/gonum/blas"
|
||||
"github.com/gonum/internal/asm/f32"
|
||||
"gonum.org/v1/gonum/blas"
|
||||
"gonum.org/v1/gonum/internal/asm/f32"
|
||||
)
|
||||
|
||||
var _ blas.Float32Level2 = Implementation{}
|
||||
|
||||
// Sgemv computes
|
||||
// y = alpha * a * x + beta * y if tA = blas.NoTrans
|
||||
// y = alpha * A * x + beta * y if tA = blas.NoTrans
|
||||
// y = alpha * A^T * x + beta * y if tA = blas.Trans or blas.ConjTrans
|
||||
// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.
|
||||
// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars.
|
||||
//
|
||||
// Float32 implementations are autogenerated and not directly tested.
|
||||
func (Implementation) Sgemv(tA blas.Transpose, m, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) {
|
||||
|
|
@ -173,21 +173,14 @@ func (Implementation) Sger(m, n int, alpha float32, x []float32, incX int, y []f
|
|||
x = x[:m]
|
||||
y = y[:n]
|
||||
for i, xv := range x {
|
||||
tmp := alpha * xv
|
||||
if tmp != 0 {
|
||||
atmp := a[i*lda : i*lda+n]
|
||||
f32.AxpyUnitaryTo(atmp, tmp, y, atmp)
|
||||
}
|
||||
f32.AxpyUnitary(alpha*xv, y, a[i*lda:i*lda+n])
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
ix := kx
|
||||
for i := 0; i < m; i++ {
|
||||
tmp := alpha * x[ix]
|
||||
if tmp != 0 {
|
||||
f32.AxpyInc(tmp, y, a[i*lda:i*lda+n], uintptr(n), uintptr(incY), 1, uintptr(ky), 0)
|
||||
}
|
||||
f32.AxpyInc(alpha*x[ix], y, a[i*lda:i*lda+n], uintptr(n), uintptr(incY), 1, uintptr(ky), 0)
|
||||
ix += incX
|
||||
}
|
||||
}
|
||||
|
|
@ -238,7 +231,7 @@ func (Implementation) Sgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float32,
|
|||
if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) {
|
||||
panic(badY)
|
||||
}
|
||||
if lda*(m-1)+kL+kU+1 > len(a) || lda < kL+kU+1 {
|
||||
if lda*(min(m, n+kL)-1)+kL+kU+1 > len(a) || lda < kL+kU+1 {
|
||||
panic(badLdA)
|
||||
}
|
||||
|
||||
|
|
@ -277,7 +270,7 @@ func (Implementation) Sgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float32,
|
|||
if tA == blas.NoTrans {
|
||||
iy := ky
|
||||
if incX == 1 {
|
||||
for i := 0; i < m; i++ {
|
||||
for i := 0; i < min(m, n+kL); i++ {
|
||||
l := max(0, kL-i)
|
||||
u := min(nCol, ld+kL-i)
|
||||
off := max(0, i-kL)
|
||||
|
|
@ -292,7 +285,7 @@ func (Implementation) Sgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float32,
|
|||
}
|
||||
return
|
||||
}
|
||||
for i := 0; i < m; i++ {
|
||||
for i := 0; i < min(m, n+kL); i++ {
|
||||
l := max(0, kL-i)
|
||||
u := min(nCol, ld+kL-i)
|
||||
off := max(0, i-kL)
|
||||
|
|
@ -309,7 +302,7 @@ func (Implementation) Sgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float32,
|
|||
return
|
||||
}
|
||||
if incX == 1 {
|
||||
for i := 0; i < m; i++ {
|
||||
for i := 0; i < min(m, n+kL); i++ {
|
||||
l := max(0, kL-i)
|
||||
u := min(nCol, ld+kL-i)
|
||||
off := max(0, i-kL)
|
||||
|
|
@ -324,7 +317,7 @@ func (Implementation) Sgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float32,
|
|||
return
|
||||
}
|
||||
ix := kx
|
||||
for i := 0; i < m; i++ {
|
||||
for i := 0; i < min(m, n+kL); i++ {
|
||||
l := max(0, kL-i)
|
||||
u := min(nCol, ld+kL-i)
|
||||
off := max(0, i-kL)
|
||||
|
|
@ -1552,7 +1545,6 @@ func (Implementation) Ssbmv(ul blas.Uplo, n, k int, alpha float32, a []float32,
|
|||
ix += incX
|
||||
iy += incY
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Ssyr performs the rank-one update
|
||||
|
|
@ -1744,7 +1736,6 @@ func (Implementation) Ssyr2(ul blas.Uplo, n int, alpha float32, x []float32, inc
|
|||
ix += incX
|
||||
iy += incY
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Stpsv solves
|
||||
|
|
@ -1,12 +1,12 @@
|
|||
// Copyright ©2014 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package native
|
||||
package gonum
|
||||
|
||||
import (
|
||||
"github.com/gonum/blas"
|
||||
"github.com/gonum/internal/asm/f64"
|
||||
"gonum.org/v1/gonum/blas"
|
||||
"gonum.org/v1/gonum/internal/asm/f64"
|
||||
)
|
||||
|
||||
var _ blas.Float64Level3 = Implementation{}
|
||||
|
|
@ -1,14 +1,14 @@
|
|||
// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
|
||||
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
|
||||
|
||||
// Copyright ©2014 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package native
|
||||
package gonum
|
||||
|
||||
import (
|
||||
"github.com/gonum/blas"
|
||||
"github.com/gonum/internal/asm/f32"
|
||||
"gonum.org/v1/gonum/blas"
|
||||
"gonum.org/v1/gonum/internal/asm/f32"
|
||||
)
|
||||
|
||||
var _ blas.Float32Level3 = Implementation{}
|
||||
|
|
@ -1,17 +1,17 @@
|
|||
// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
|
||||
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
|
||||
|
||||
// Copyright ©2014 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package native
|
||||
package gonum
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"sync"
|
||||
|
||||
"github.com/gonum/blas"
|
||||
"github.com/gonum/internal/asm/f32"
|
||||
"gonum.org/v1/gonum/blas"
|
||||
"gonum.org/v1/gonum/internal/asm/f32"
|
||||
)
|
||||
|
||||
// Sgemm computes
|
||||
|
|
@ -29,17 +29,17 @@ func (Implementation) Sgemm(tA, tB blas.Transpose, m, n, k int, alpha float32, a
|
|||
}
|
||||
aTrans := tA == blas.Trans || tA == blas.ConjTrans
|
||||
if aTrans {
|
||||
checkMatrix32(k, m, a, lda)
|
||||
checkSMatrix('a', k, m, a, lda)
|
||||
} else {
|
||||
checkMatrix32(m, k, a, lda)
|
||||
checkSMatrix('a', m, k, a, lda)
|
||||
}
|
||||
bTrans := tB == blas.Trans || tB == blas.ConjTrans
|
||||
if bTrans {
|
||||
checkMatrix32(n, k, b, ldb)
|
||||
checkSMatrix('b', n, k, b, ldb)
|
||||
} else {
|
||||
checkMatrix32(k, n, b, ldb)
|
||||
checkSMatrix('b', k, n, b, ldb)
|
||||
}
|
||||
checkMatrix32(m, n, c, ldc)
|
||||
checkSMatrix('c', m, n, c, ldc)
|
||||
|
||||
// scale c
|
||||
if beta != 1 {
|
||||
|
|
@ -125,7 +125,7 @@ func sgemmParallel(aTrans, bTrans bool, m, n, k int, a []float32, lda int, b []f
|
|||
go func() {
|
||||
defer wg.Done()
|
||||
// Make local copies of otherwise global variables to reduce shared memory.
|
||||
// This has a noticable effect on benchmarks in some cases.
|
||||
// This has a noticeable effect on benchmarks in some cases.
|
||||
alpha := alpha
|
||||
aTrans := aTrans
|
||||
bTrans := bTrans
|
||||
|
|
@ -263,18 +263,3 @@ func sgemmSerialTransTrans(m, n, k int, a []float32, lda int, b []float32, ldb i
|
|||
func sliceView32(a []float32, lda, i, j, r, c int) []float32 {
|
||||
return a[i*lda+j : (i+r-1)*lda+j+c]
|
||||
}
|
||||
|
||||
func checkMatrix32(m, n int, a []float32, lda int) {
|
||||
if m < 0 {
|
||||
panic("blas: rows < 0")
|
||||
}
|
||||
if n < 0 {
|
||||
panic("blas: cols < 0")
|
||||
}
|
||||
if lda < n {
|
||||
panic("blas: illegal stride")
|
||||
}
|
||||
if len(a) < (m-1)*lda+n {
|
||||
panic("blas: insufficient matrix slice length")
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package floats provides a set of helper routines for dealing with slices
|
||||
// of float64. The functions avoid allocations to allow for use within tight
|
||||
// loops without garbage collection overhead.
|
||||
//
|
||||
// The convention used is that when a slice is being modified in place, it has
|
||||
// the name dst.
|
||||
package floats // import "gonum.org/v1/gonum/floats"
|
||||
50
vendor/github.com/gonum/floats/floats.go → vendor/gonum.org/v1/gonum/floats/floats.go
generated
vendored
50
vendor/github.com/gonum/floats/floats.go → vendor/gonum.org/v1/gonum/floats/floats.go
generated
vendored
|
|
@ -2,20 +2,15 @@
|
|||
// Use of this code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file
|
||||
|
||||
// Package floats provides a set of helper routines for dealing with slices
|
||||
// of float64. The functions avoid allocations to allow for use within tight
|
||||
// loops without garbage collection overhead.
|
||||
//
|
||||
// The convention used is that when a slice is being modified in place, it has
|
||||
// the name dst.
|
||||
package floats
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"math"
|
||||
"sort"
|
||||
"strconv"
|
||||
|
||||
"github.com/gonum/internal/asm/f64"
|
||||
"gonum.org/v1/gonum/internal/asm/f64"
|
||||
)
|
||||
|
||||
// Add adds, element-wise, the elements of s and dst, and stores in dst.
|
||||
|
|
@ -89,8 +84,8 @@ func (a argsort) Swap(i, j int) {
|
|||
a.inds[i], a.inds[j] = a.inds[j], a.inds[i]
|
||||
}
|
||||
|
||||
// Argsort sorts the elements of s while tracking their original order.
|
||||
// At the conclusion of Argsort, s will contain the original elements of s
|
||||
// Argsort sorts the elements of dst while tracking their original order.
|
||||
// At the conclusion of Argsort, dst will contain the original elements of dst
|
||||
// but sorted in increasing order, and inds will contain the original position
|
||||
// of the elements in the slice such that dst[i] = origDst[inds[i]].
|
||||
// It panics if the lengths of dst and inds do not match.
|
||||
|
|
@ -342,7 +337,6 @@ func EqualLengths(slices ...[]float64) bool {
|
|||
// all of the found elements will be returned along with an error.
|
||||
// At the return of the function, the input inds will be in an undetermined state.
|
||||
func Find(inds []int, f func(float64) bool, s []float64, k int) ([]int, error) {
|
||||
|
||||
// inds is also returned to allow for calling with nil
|
||||
|
||||
// Reslice inds to have zero length
|
||||
|
|
@ -495,6 +489,29 @@ func MulTo(dst, s, t []float64) []float64 {
|
|||
return dst
|
||||
}
|
||||
|
||||
const (
|
||||
nanBits = 0x7ff8000000000000
|
||||
nanMask = 0xfff8000000000000
|
||||
)
|
||||
|
||||
// NaNWith returns an IEEE 754 "quiet not-a-number" value with the
|
||||
// payload specified in the low 51 bits of payload.
|
||||
// The NaN returned by math.NaN has a bit pattern equal to NaNWith(1).
|
||||
func NaNWith(payload uint64) float64 {
|
||||
return math.Float64frombits(nanBits | (payload &^ nanMask))
|
||||
}
|
||||
|
||||
// NaNPayload returns the lowest 51 bits payload of an IEEE 754 "quiet
|
||||
// not-a-number". For values of f other than quiet-NaN, NaNPayload
|
||||
// returns zero and false.
|
||||
func NaNPayload(f float64) (payload uint64, ok bool) {
|
||||
b := math.Float64bits(f)
|
||||
if b&nanBits != nanBits {
|
||||
return 0, false
|
||||
}
|
||||
return b &^ nanMask, true
|
||||
}
|
||||
|
||||
// Nearest returns the index of the element in s
|
||||
// whose value is nearest to v. If several such
|
||||
// elements exist, the lowest index is returned.
|
||||
|
|
@ -567,6 +584,19 @@ func Norm(s []float64, L float64) float64 {
|
|||
return math.Pow(norm, 1/L)
|
||||
}
|
||||
|
||||
// ParseWithNA converts the string s to a float64 in v.
|
||||
// If s equals missing, w is returned as 0, otherwise 1.
|
||||
func ParseWithNA(s, missing string) (v, w float64, err error) {
|
||||
if s == missing {
|
||||
return 0, 0, nil
|
||||
}
|
||||
v, err = strconv.ParseFloat(s, 64)
|
||||
if err == nil {
|
||||
w = 1
|
||||
}
|
||||
return v, w, err
|
||||
}
|
||||
|
||||
// Prod returns the product of the elements of the slice.
|
||||
// Returns 1 if len(s) = 0.
|
||||
func Prod(s []float64) float64 {
|
||||
|
|
@ -0,0 +1,134 @@
|
|||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// MOVDDUP X2, X3
|
||||
#define MOVDDUP_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xDA
|
||||
// MOVDDUP X4, X5
|
||||
#define MOVDDUP_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xEC
|
||||
// MOVDDUP X6, X7
|
||||
#define MOVDDUP_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xFE
|
||||
// MOVDDUP X8, X9
|
||||
#define MOVDDUP_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC8
|
||||
|
||||
// ADDSUBPD X2, X3
|
||||
#define ADDSUBPD_X2_X3 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA
|
||||
// ADDSUBPD X4, X5
|
||||
#define ADDSUBPD_X4_X5 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC
|
||||
// ADDSUBPD X6, X7
|
||||
#define ADDSUBPD_X6_X7 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE
|
||||
// ADDSUBPD X8, X9
|
||||
#define ADDSUBPD_X8_X9 BYTE $0x66; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8
|
||||
|
||||
// func AxpyInc(alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr)
|
||||
TEXT ·AxpyInc(SB), NOSPLIT, $0
|
||||
MOVQ x_base+16(FP), SI // SI = &x
|
||||
MOVQ y_base+40(FP), DI // DI = &y
|
||||
MOVQ n+64(FP), CX // CX = n
|
||||
CMPQ CX, $0 // if n==0 { return }
|
||||
JE axpyi_end
|
||||
MOVQ ix+88(FP), R8 // R8 = ix // Load the first index
|
||||
SHLQ $4, R8 // R8 *= sizeof(complex128)
|
||||
MOVQ iy+96(FP), R9 // R9 = iy
|
||||
SHLQ $4, R9 // R9 *= sizeof(complex128)
|
||||
LEAQ (SI)(R8*1), SI // SI = &(x[ix])
|
||||
LEAQ (DI)(R9*1), DI // DI = &(y[iy])
|
||||
MOVQ DI, DX // DX = DI // Separate Read/Write pointers
|
||||
MOVQ incX+72(FP), R8 // R8 = incX
|
||||
SHLQ $4, R8 // R8 *= sizeof(complex128)
|
||||
MOVQ incY+80(FP), R9 // R9 = iy
|
||||
SHLQ $4, R9 // R9 *= sizeof(complex128)
|
||||
MOVUPS alpha+0(FP), X0 // X0 = { imag(a), real(a) }
|
||||
MOVAPS X0, X1
|
||||
SHUFPD $0x1, X1, X1 // X1 = { real(a), imag(a) }
|
||||
MOVAPS X0, X10 // Copy X0 and X1 for pipelining
|
||||
MOVAPS X1, X11
|
||||
MOVQ CX, BX
|
||||
ANDQ $3, CX // CX = n % 4
|
||||
SHRQ $2, BX // BX = floor( n / 4 )
|
||||
JZ axpyi_tail // if BX == 0 { goto axpyi_tail }
|
||||
|
||||
axpyi_loop: // do {
|
||||
MOVUPS (SI), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||
MOVUPS (SI)(R8*1), X4
|
||||
LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2])
|
||||
MOVUPS (SI), X6
|
||||
MOVUPS (SI)(R8*1), X8
|
||||
|
||||
// X_(i+1) = { real(x[i], real(x[i]) }
|
||||
MOVDDUP_X2_X3
|
||||
MOVDDUP_X4_X5
|
||||
MOVDDUP_X6_X7
|
||||
MOVDDUP_X8_X9
|
||||
|
||||
// X_i = { imag(x[i]), imag(x[i]) }
|
||||
SHUFPD $0x3, X2, X2
|
||||
SHUFPD $0x3, X4, X4
|
||||
SHUFPD $0x3, X6, X6
|
||||
SHUFPD $0x3, X8, X8
|
||||
|
||||
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
MULPD X1, X2
|
||||
MULPD X0, X3
|
||||
MULPD X11, X4
|
||||
MULPD X10, X5
|
||||
MULPD X1, X6
|
||||
MULPD X0, X7
|
||||
MULPD X11, X8
|
||||
MULPD X10, X9
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
ADDSUBPD_X4_X5
|
||||
ADDSUBPD_X6_X7
|
||||
ADDSUBPD_X8_X9
|
||||
|
||||
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
|
||||
ADDPD (DX), X3
|
||||
ADDPD (DX)(R9*1), X5
|
||||
LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2])
|
||||
ADDPD (DX), X7
|
||||
ADDPD (DX)(R9*1), X9
|
||||
MOVUPS X3, (DI) // dst[i] = X_(i+1)
|
||||
MOVUPS X5, (DI)(R9*1)
|
||||
LEAQ (DI)(R9*2), DI
|
||||
MOVUPS X7, (DI)
|
||||
MOVUPS X9, (DI)(R9*1)
|
||||
LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2])
|
||||
LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2])
|
||||
LEAQ (DI)(R9*2), DI // DI = &(DI[incY*2])
|
||||
DECQ BX
|
||||
JNZ axpyi_loop // } while --BX > 0
|
||||
CMPQ CX, $0 // if CX == 0 { return }
|
||||
JE axpyi_end
|
||||
|
||||
axpyi_tail: // do {
|
||||
MOVUPS (SI), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||
MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||
SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||
MULPD X1, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
MULPD X0, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
|
||||
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
|
||||
ADDPD (DI), X3
|
||||
MOVUPS X3, (DI) // y[i] = X_i
|
||||
ADDQ R8, SI // SI = &(SI[incX])
|
||||
ADDQ R9, DI // DI = &(DI[incY])
|
||||
LOOP axpyi_tail // } while --CX > 0
|
||||
|
||||
axpyi_end:
|
||||
RET
|
||||
|
|
@ -0,0 +1,141 @@
|
|||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// MOVDDUP X2, X3
|
||||
#define MOVDDUP_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xDA
|
||||
// MOVDDUP X4, X5
|
||||
#define MOVDDUP_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xEC
|
||||
// MOVDDUP X6, X7
|
||||
#define MOVDDUP_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xFE
|
||||
// MOVDDUP X8, X9
|
||||
#define MOVDDUP_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC8
|
||||
|
||||
// ADDSUBPD X2, X3
|
||||
#define ADDSUBPD_X2_X3 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA
|
||||
// ADDSUBPD X4, X5
|
||||
#define ADDSUBPD_X4_X5 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC
|
||||
// ADDSUBPD X6, X7
|
||||
#define ADDSUBPD_X6_X7 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE
|
||||
// ADDSUBPD X8, X9
|
||||
#define ADDSUBPD_X8_X9 BYTE $0x66; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8
|
||||
|
||||
// func AxpyIncTo(dst []complex128, incDst, idst uintptr, alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr)
|
||||
TEXT ·AxpyIncTo(SB), NOSPLIT, $0
|
||||
MOVQ dst_base+0(FP), DI // DI = &dst
|
||||
MOVQ x_base+56(FP), SI // SI = &x
|
||||
MOVQ y_base+80(FP), DX // DX = &y
|
||||
MOVQ n+104(FP), CX // CX = n
|
||||
CMPQ CX, $0 // if n==0 { return }
|
||||
JE axpyi_end
|
||||
MOVQ ix+128(FP), R8 // R8 = ix // Load the first index
|
||||
SHLQ $4, R8 // R8 *= sizeof(complex128)
|
||||
MOVQ iy+136(FP), R9 // R9 = iy
|
||||
SHLQ $4, R9 // R9 *= sizeof(complex128)
|
||||
MOVQ idst+32(FP), R10 // R10 = idst
|
||||
SHLQ $4, R10 // R10 *= sizeof(complex128)
|
||||
LEAQ (SI)(R8*1), SI // SI = &(x[ix])
|
||||
LEAQ (DX)(R9*1), DX // DX = &(y[iy])
|
||||
LEAQ (DI)(R10*1), DI // DI = &(dst[idst])
|
||||
MOVQ incX+112(FP), R8 // R8 = incX
|
||||
SHLQ $4, R8 // R8 *= sizeof(complex128)
|
||||
MOVQ incY+120(FP), R9 // R9 = incY
|
||||
SHLQ $4, R9 // R9 *= sizeof(complex128)
|
||||
MOVQ incDst+24(FP), R10 // R10 = incDst
|
||||
SHLQ $4, R10 // R10 *= sizeof(complex128)
|
||||
MOVUPS alpha+40(FP), X0 // X0 = { imag(a), real(a) }
|
||||
MOVAPS X0, X1
|
||||
SHUFPD $0x1, X1, X1 // X1 = { real(a), imag(a) }
|
||||
MOVAPS X0, X10 // Copy X0 and X1 for pipelining
|
||||
MOVAPS X1, X11
|
||||
MOVQ CX, BX
|
||||
ANDQ $3, CX // CX = n % 4
|
||||
SHRQ $2, BX // BX = floor( n / 4 )
|
||||
JZ axpyi_tail // if BX == 0 { goto axpyi_tail }
|
||||
|
||||
axpyi_loop: // do {
|
||||
MOVUPS (SI), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||
MOVUPS (SI)(R8*1), X4
|
||||
LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2])
|
||||
|
||||
MOVUPS (SI), X6
|
||||
MOVUPS (SI)(R8*1), X8
|
||||
|
||||
// X_(i+1) = { real(x[i], real(x[i]) }
|
||||
MOVDDUP_X2_X3
|
||||
MOVDDUP_X4_X5
|
||||
MOVDDUP_X6_X7
|
||||
MOVDDUP_X8_X9
|
||||
|
||||
// X_i = { imag(x[i]), imag(x[i]) }
|
||||
SHUFPD $0x3, X2, X2
|
||||
SHUFPD $0x3, X4, X4
|
||||
SHUFPD $0x3, X6, X6
|
||||
SHUFPD $0x3, X8, X8
|
||||
|
||||
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
MULPD X1, X2
|
||||
MULPD X0, X3
|
||||
MULPD X11, X4
|
||||
MULPD X10, X5
|
||||
MULPD X1, X6
|
||||
MULPD X0, X7
|
||||
MULPD X11, X8
|
||||
MULPD X10, X9
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
ADDSUBPD_X4_X5
|
||||
ADDSUBPD_X6_X7
|
||||
ADDSUBPD_X8_X9
|
||||
|
||||
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
|
||||
ADDPD (DX), X3
|
||||
ADDPD (DX)(R9*1), X5
|
||||
LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2])
|
||||
ADDPD (DX), X7
|
||||
ADDPD (DX)(R9*1), X9
|
||||
MOVUPS X3, (DI) // dst[i] = X_(i+1)
|
||||
MOVUPS X5, (DI)(R10*1)
|
||||
LEAQ (DI)(R10*2), DI
|
||||
MOVUPS X7, (DI)
|
||||
MOVUPS X9, (DI)(R10*1)
|
||||
LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2])
|
||||
LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2])
|
||||
LEAQ (DI)(R10*2), DI // DI = &(DI[incDst*2])
|
||||
DECQ BX
|
||||
JNZ axpyi_loop // } while --BX > 0
|
||||
CMPQ CX, $0 // if CX == 0 { return }
|
||||
JE axpyi_end
|
||||
|
||||
axpyi_tail: // do {
|
||||
MOVUPS (SI), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||
MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||
SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||
MULPD X1, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
MULPD X0, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
|
||||
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
|
||||
ADDPD (DX), X3
|
||||
MOVUPS X3, (DI) // y[i] X_(i+1)
|
||||
ADDQ R8, SI // SI += incX
|
||||
ADDQ R9, DX // DX += incY
|
||||
ADDQ R10, DI // DI += incDst
|
||||
LOOP axpyi_tail // } while --CX > 0
|
||||
|
||||
axpyi_end:
|
||||
RET
|
||||
|
|
@ -0,0 +1,122 @@
|
|||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// MOVDDUP X2, X3
|
||||
#define MOVDDUP_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xDA
|
||||
// MOVDDUP X4, X5
|
||||
#define MOVDDUP_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xEC
|
||||
// MOVDDUP X6, X7
|
||||
#define MOVDDUP_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xFE
|
||||
// MOVDDUP X8, X9
|
||||
#define MOVDDUP_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC8
|
||||
|
||||
// ADDSUBPD X2, X3
|
||||
#define ADDSUBPD_X2_X3 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA
|
||||
// ADDSUBPD X4, X5
|
||||
#define ADDSUBPD_X4_X5 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC
|
||||
// ADDSUBPD X6, X7
|
||||
#define ADDSUBPD_X6_X7 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE
|
||||
// ADDSUBPD X8, X9
|
||||
#define ADDSUBPD_X8_X9 BYTE $0x66; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8
|
||||
|
||||
// func AxpyUnitary(alpha complex128, x, y []complex128)
|
||||
TEXT ·AxpyUnitary(SB), NOSPLIT, $0
|
||||
MOVQ x_base+16(FP), SI // SI = &x
|
||||
MOVQ y_base+40(FP), DI // DI = &y
|
||||
MOVQ x_len+24(FP), CX // CX = min( len(x), len(y) )
|
||||
CMPQ y_len+48(FP), CX
|
||||
CMOVQLE y_len+48(FP), CX
|
||||
CMPQ CX, $0 // if CX == 0 { return }
|
||||
JE caxy_end
|
||||
PXOR X0, X0 // Clear work registers and cache-align loop
|
||||
PXOR X1, X1
|
||||
MOVUPS alpha+0(FP), X0 // X0 = { imag(a), real(a) }
|
||||
MOVAPS X0, X1
|
||||
SHUFPD $0x1, X1, X1 // X1 = { real(a), imag(a) }
|
||||
XORQ AX, AX // i = 0
|
||||
MOVAPS X0, X10 // Copy X0 and X1 for pipelining
|
||||
MOVAPS X1, X11
|
||||
MOVQ CX, BX
|
||||
ANDQ $3, CX // CX = n % 4
|
||||
SHRQ $2, BX // BX = floor( n / 4 )
|
||||
JZ caxy_tail // if BX == 0 { goto caxy_tail }
|
||||
|
||||
caxy_loop: // do {
|
||||
MOVUPS (SI)(AX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||
MOVUPS 16(SI)(AX*8), X4
|
||||
MOVUPS 32(SI)(AX*8), X6
|
||||
MOVUPS 48(SI)(AX*8), X8
|
||||
|
||||
// X_(i+1) = { real(x[i], real(x[i]) }
|
||||
MOVDDUP_X2_X3
|
||||
MOVDDUP_X4_X5
|
||||
MOVDDUP_X6_X7
|
||||
MOVDDUP_X8_X9
|
||||
|
||||
// X_i = { imag(x[i]), imag(x[i]) }
|
||||
SHUFPD $0x3, X2, X2
|
||||
SHUFPD $0x3, X4, X4
|
||||
SHUFPD $0x3, X6, X6
|
||||
SHUFPD $0x3, X8, X8
|
||||
|
||||
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
MULPD X1, X2
|
||||
MULPD X0, X3
|
||||
MULPD X11, X4
|
||||
MULPD X10, X5
|
||||
MULPD X1, X6
|
||||
MULPD X0, X7
|
||||
MULPD X11, X8
|
||||
MULPD X10, X9
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
ADDSUBPD_X4_X5
|
||||
ADDSUBPD_X6_X7
|
||||
ADDSUBPD_X8_X9
|
||||
|
||||
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
|
||||
ADDPD (DI)(AX*8), X3
|
||||
ADDPD 16(DI)(AX*8), X5
|
||||
ADDPD 32(DI)(AX*8), X7
|
||||
ADDPD 48(DI)(AX*8), X9
|
||||
MOVUPS X3, (DI)(AX*8) // y[i] = X_(i+1)
|
||||
MOVUPS X5, 16(DI)(AX*8)
|
||||
MOVUPS X7, 32(DI)(AX*8)
|
||||
MOVUPS X9, 48(DI)(AX*8)
|
||||
ADDQ $8, AX // i += 8
|
||||
DECQ BX
|
||||
JNZ caxy_loop // } while --BX > 0
|
||||
CMPQ CX, $0 // if CX == 0 { return }
|
||||
JE caxy_end
|
||||
|
||||
caxy_tail: // do {
|
||||
MOVUPS (SI)(AX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||
MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||
SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||
MULPD X1, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
MULPD X0, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
|
||||
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
|
||||
ADDPD (DI)(AX*8), X3
|
||||
MOVUPS X3, (DI)(AX*8) // y[i] = X_(i+1)
|
||||
ADDQ $2, AX // i += 2
|
||||
LOOP caxy_tail // } while --CX > 0
|
||||
|
||||
caxy_end:
|
||||
RET
|
||||
123
vendor/gonum.org/v1/gonum/internal/asm/c128/axpyunitaryto_amd64.s
generated
vendored
Normal file
123
vendor/gonum.org/v1/gonum/internal/asm/c128/axpyunitaryto_amd64.s
generated
vendored
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// MOVDDUP X2, X3
|
||||
#define MOVDDUP_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xDA
|
||||
// MOVDDUP X4, X5
|
||||
#define MOVDDUP_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xEC
|
||||
// MOVDDUP X6, X7
|
||||
#define MOVDDUP_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xFE
|
||||
// MOVDDUP X8, X9
|
||||
#define MOVDDUP_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC8
|
||||
|
||||
// ADDSUBPD X2, X3
|
||||
#define ADDSUBPD_X2_X3 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA
|
||||
// ADDSUBPD X4, X5
|
||||
#define ADDSUBPD_X4_X5 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC
|
||||
// ADDSUBPD X6, X7
|
||||
#define ADDSUBPD_X6_X7 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE
|
||||
// ADDSUBPD X8, X9
|
||||
#define ADDSUBPD_X8_X9 BYTE $0x66; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8
|
||||
|
||||
// func AxpyUnitaryTo(dst []complex128, alpha complex64, x, y []complex128)
|
||||
TEXT ·AxpyUnitaryTo(SB), NOSPLIT, $0
|
||||
MOVQ dst_base+0(FP), DI // DI = &dst
|
||||
MOVQ x_base+40(FP), SI // SI = &x
|
||||
MOVQ y_base+64(FP), DX // DX = &y
|
||||
MOVQ x_len+48(FP), CX // CX = min( len(x), len(y), len(dst) )
|
||||
CMPQ y_len+72(FP), CX
|
||||
CMOVQLE y_len+72(FP), CX
|
||||
CMPQ dst_len+8(FP), CX
|
||||
CMOVQLE dst_len+8(FP), CX
|
||||
CMPQ CX, $0 // if CX == 0 { return }
|
||||
JE caxy_end
|
||||
MOVUPS alpha+24(FP), X0 // X0 = { imag(a), real(a) }
|
||||
MOVAPS X0, X1
|
||||
SHUFPD $0x1, X1, X1 // X1 = { real(a), imag(a) }
|
||||
XORQ AX, AX // i = 0
|
||||
MOVAPS X0, X10 // Copy X0 and X1 for pipelining
|
||||
MOVAPS X1, X11
|
||||
MOVQ CX, BX
|
||||
ANDQ $3, CX // CX = n % 4
|
||||
SHRQ $2, BX // BX = floor( n / 4 )
|
||||
JZ caxy_tail // if BX == 0 { goto caxy_tail }
|
||||
|
||||
caxy_loop: // do {
|
||||
MOVUPS (SI)(AX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||
MOVUPS 16(SI)(AX*8), X4
|
||||
MOVUPS 32(SI)(AX*8), X6
|
||||
MOVUPS 48(SI)(AX*8), X8
|
||||
|
||||
// X_(i+1) = { real(x[i], real(x[i]) }
|
||||
MOVDDUP_X2_X3 // Load and duplicate imag elements (xi, xi)
|
||||
MOVDDUP_X4_X5
|
||||
MOVDDUP_X6_X7
|
||||
MOVDDUP_X8_X9
|
||||
|
||||
// X_i = { imag(x[i]), imag(x[i]) }
|
||||
SHUFPD $0x3, X2, X2 // duplicate real elements (xr, xr)
|
||||
SHUFPD $0x3, X4, X4
|
||||
SHUFPD $0x3, X6, X6
|
||||
SHUFPD $0x3, X8, X8
|
||||
|
||||
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
MULPD X1, X2
|
||||
MULPD X0, X3
|
||||
MULPD X11, X4
|
||||
MULPD X10, X5
|
||||
MULPD X1, X6
|
||||
MULPD X0, X7
|
||||
MULPD X11, X8
|
||||
MULPD X10, X9
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
ADDSUBPD_X4_X5
|
||||
ADDSUBPD_X6_X7
|
||||
ADDSUBPD_X8_X9
|
||||
|
||||
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
|
||||
ADDPD (DX)(AX*8), X3
|
||||
ADDPD 16(DX)(AX*8), X5
|
||||
ADDPD 32(DX)(AX*8), X7
|
||||
ADDPD 48(DX)(AX*8), X9
|
||||
MOVUPS X3, (DI)(AX*8) // y[i] = X_(i+1)
|
||||
MOVUPS X5, 16(DI)(AX*8)
|
||||
MOVUPS X7, 32(DI)(AX*8)
|
||||
MOVUPS X9, 48(DI)(AX*8)
|
||||
ADDQ $8, AX // i += 8
|
||||
DECQ BX
|
||||
JNZ caxy_loop // } while --BX > 0
|
||||
CMPQ CX, $0 // if CX == 0 { return }
|
||||
JE caxy_end
|
||||
|
||||
caxy_tail: // Same calculation, but read in values to avoid trampling memory
|
||||
MOVUPS (SI)(AX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||
MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||
SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||
MULPD X1, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
MULPD X0, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
|
||||
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
|
||||
ADDPD (DX)(AX*8), X3
|
||||
MOVUPS X3, (DI)(AX*8) // y[i] = X_(i+1)
|
||||
ADDQ $2, AX // i += 2
|
||||
LOOP caxy_tail // } while --CX > 0
|
||||
|
||||
caxy_end:
|
||||
RET
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package c128 provides complex128 vector primitives.
|
||||
package c128 // import "gonum.org/v1/gonum/internal/asm/c128"
|
||||
|
|
@ -0,0 +1,153 @@
|
|||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define MOVDDUP_XPTR__X3 LONG $0x1E120FF2 // MOVDDUP (SI), X3
|
||||
#define MOVDDUP_XPTR_INCX__X5 LONG $0x120F42F2; WORD $0x062C // MOVDDUP (SI)(R8*1), X5
|
||||
#define MOVDDUP_XPTR_INCX_2__X7 LONG $0x120F42F2; WORD $0x463C // MOVDDUP (SI)(R8*2), X7
|
||||
#define MOVDDUP_XPTR_INCx3X__X9 LONG $0x120F46F2; WORD $0x0E0C // MOVDDUP (SI)(R9*1), X9
|
||||
|
||||
#define MOVDDUP_8_XPTR__X2 LONG $0x56120FF2; BYTE $0x08 // MOVDDUP 8(SI), X2
|
||||
#define MOVDDUP_8_XPTR_INCX__X4 LONG $0x120F42F2; WORD $0x0664; BYTE $0x08 // MOVDDUP 8(SI)(R8*1), X4
|
||||
#define MOVDDUP_8_XPTR_INCX_2__X6 LONG $0x120F42F2; WORD $0x4674; BYTE $0x08 // MOVDDUP 8(SI)(R8*2), X6
|
||||
#define MOVDDUP_8_XPTR_INCx3X__X8 LONG $0x120F46F2; WORD $0x0E44; BYTE $0x08 // MOVDDUP 8(SI)(R9*1), X8
|
||||
|
||||
#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3
|
||||
#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5
|
||||
#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7
|
||||
#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
|
||||
|
||||
#define X_PTR SI
|
||||
#define Y_PTR DI
|
||||
#define LEN CX
|
||||
#define TAIL BX
|
||||
#define SUM X0
|
||||
#define P_SUM X1
|
||||
#define INC_X R8
|
||||
#define INCx3_X R9
|
||||
#define INC_Y R10
|
||||
#define INCx3_Y R11
|
||||
#define NEG1 X15
|
||||
#define P_NEG1 X14
|
||||
|
||||
// func DotcInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128)
|
||||
TEXT ·DotcInc(SB), NOSPLIT, $0
|
||||
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
|
||||
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
|
||||
MOVQ n+48(FP), LEN // LEN = n
|
||||
PXOR SUM, SUM // SUM = 0
|
||||
CMPQ LEN, $0 // if LEN == 0 { return }
|
||||
JE dot_end
|
||||
PXOR P_SUM, P_SUM // P_SUM = 0
|
||||
MOVQ ix+72(FP), INC_X // INC_X = ix * sizeof(complex128)
|
||||
SHLQ $4, INC_X
|
||||
MOVQ iy+80(FP), INC_Y // INC_Y = iy * sizeof(complex128)
|
||||
SHLQ $4, INC_Y
|
||||
LEAQ (X_PTR)(INC_X*1), X_PTR // X_PTR = &(X_PTR[ix])
|
||||
LEAQ (Y_PTR)(INC_Y*1), Y_PTR // Y_PTR = &(Y_PTR[iy])
|
||||
MOVQ incX+56(FP), INC_X // INC_X = incX
|
||||
SHLQ $4, INC_X // INC_X *= sizeof(complex128)
|
||||
MOVQ incY+64(FP), INC_Y // INC_Y = incY
|
||||
SHLQ $4, INC_Y // INC_Y *= sizeof(complex128)
|
||||
MOVSD $(-1.0), NEG1
|
||||
SHUFPD $0, NEG1, NEG1 // { -1, -1 }
|
||||
MOVQ LEN, TAIL
|
||||
ANDQ $3, TAIL // TAIL = n % 4
|
||||
SHRQ $2, LEN // LEN = floor( n / 4 )
|
||||
JZ dot_tail // if n <= 4 { goto dot_tail }
|
||||
MOVAPS NEG1, P_NEG1 // Copy NEG1 to P_NEG1 for pipelining
|
||||
LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = 3 * incX * sizeof(complex128)
|
||||
LEAQ (INC_Y)(INC_Y*2), INCx3_Y // INCx3_Y = 3 * incY * sizeof(complex128)
|
||||
|
||||
dot_loop: // do {
|
||||
MOVDDUP_XPTR__X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||
MOVDDUP_XPTR_INCX__X5
|
||||
MOVDDUP_XPTR_INCX_2__X7
|
||||
MOVDDUP_XPTR_INCx3X__X9
|
||||
|
||||
MOVDDUP_8_XPTR__X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||
MOVDDUP_8_XPTR_INCX__X4
|
||||
MOVDDUP_8_XPTR_INCX_2__X6
|
||||
MOVDDUP_8_XPTR_INCx3X__X8
|
||||
|
||||
// X_i = { -imag(x[i]), -imag(x[i]) }
|
||||
MULPD NEG1, X2
|
||||
MULPD P_NEG1, X4
|
||||
MULPD NEG1, X6
|
||||
MULPD P_NEG1, X8
|
||||
|
||||
// X_j = { imag(y[i]), real(y[i]) }
|
||||
MOVUPS (Y_PTR), X10
|
||||
MOVUPS (Y_PTR)(INC_Y*1), X11
|
||||
MOVUPS (Y_PTR)(INC_Y*2), X12
|
||||
MOVUPS (Y_PTR)(INCx3_Y*1), X13
|
||||
|
||||
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
MULPD X10, X3
|
||||
MULPD X11, X5
|
||||
MULPD X12, X7
|
||||
MULPD X13, X9
|
||||
|
||||
// X_j = { real(y[i]), imag(y[i]) }
|
||||
SHUFPD $0x1, X10, X10
|
||||
SHUFPD $0x1, X11, X11
|
||||
SHUFPD $0x1, X12, X12
|
||||
SHUFPD $0x1, X13, X13
|
||||
|
||||
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
MULPD X10, X2
|
||||
MULPD X11, X4
|
||||
MULPD X12, X6
|
||||
MULPD X13, X8
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
ADDSUBPD_X4_X5
|
||||
ADDSUBPD_X6_X7
|
||||
ADDSUBPD_X8_X9
|
||||
|
||||
// psum += result[i]
|
||||
ADDPD X3, SUM
|
||||
ADDPD X5, P_SUM
|
||||
ADDPD X7, SUM
|
||||
ADDPD X9, P_SUM
|
||||
|
||||
LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[incX*4])
|
||||
LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(Y_PTR[incY*4])
|
||||
|
||||
DECQ LEN
|
||||
JNZ dot_loop // } while --LEN > 0
|
||||
ADDPD P_SUM, SUM // sum += psum
|
||||
CMPQ TAIL, $0 // if TAIL == 0 { return }
|
||||
JE dot_end
|
||||
|
||||
dot_tail: // do {
|
||||
MOVDDUP_XPTR__X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||
MOVDDUP_8_XPTR__X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||
MULPD NEG1, X2 // X_i = { -imag(x[i]) , -imag(x[i]) }
|
||||
MOVUPS (Y_PTR), X10 // X_j = { imag(y[i]) , real(y[i]) }
|
||||
MULPD X10, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
SHUFPD $0x1, X10, X10 // X_j = { real(y[i]) , imag(y[i]) }
|
||||
MULPD X10, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
ADDPD X3, SUM // sum += result[i]
|
||||
ADDQ INC_X, X_PTR // X_PTR += incX
|
||||
ADDQ INC_Y, Y_PTR // Y_PTR += incY
|
||||
DECQ TAIL
|
||||
JNZ dot_tail // } while --TAIL > 0
|
||||
|
||||
dot_end:
|
||||
MOVUPS SUM, sum+88(FP)
|
||||
RET
|
||||
|
|
@ -0,0 +1,143 @@
|
|||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define MOVDDUP_XPTR_IDX_8__X3 LONG $0x1C120FF2; BYTE $0xC6 // MOVDDUP (SI)(AX*8), X3
|
||||
#define MOVDDUP_16_XPTR_IDX_8__X5 LONG $0x6C120FF2; WORD $0x10C6 // MOVDDUP 16(SI)(AX*8), X5
|
||||
#define MOVDDUP_32_XPTR_IDX_8__X7 LONG $0x7C120FF2; WORD $0x20C6 // MOVDDUP 32(SI)(AX*8), X7
|
||||
#define MOVDDUP_48_XPTR_IDX_8__X9 LONG $0x120F44F2; WORD $0xC64C; BYTE $0x30 // MOVDDUP 48(SI)(AX*8), X9
|
||||
|
||||
#define MOVDDUP_XPTR_IIDX_8__X2 LONG $0x14120FF2; BYTE $0xD6 // MOVDDUP (SI)(DX*8), X2
|
||||
#define MOVDDUP_16_XPTR_IIDX_8__X4 LONG $0x64120FF2; WORD $0x10D6 // MOVDDUP 16(SI)(DX*8), X4
|
||||
#define MOVDDUP_32_XPTR_IIDX_8__X6 LONG $0x74120FF2; WORD $0x20D6 // MOVDDUP 32(SI)(DX*8), X6
|
||||
#define MOVDDUP_48_XPTR_IIDX_8__X8 LONG $0x120F44F2; WORD $0xD644; BYTE $0x30 // MOVDDUP 48(SI)(DX*8), X8
|
||||
|
||||
#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3
|
||||
#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5
|
||||
#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7
|
||||
#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
|
||||
|
||||
#define X_PTR SI
|
||||
#define Y_PTR DI
|
||||
#define LEN CX
|
||||
#define TAIL BX
|
||||
#define SUM X0
|
||||
#define P_SUM X1
|
||||
#define IDX AX
|
||||
#define I_IDX DX
|
||||
#define NEG1 X15
|
||||
#define P_NEG1 X14
|
||||
|
||||
// func DotcUnitary(x, y []complex128) (sum complex128)
|
||||
TEXT ·DotcUnitary(SB), NOSPLIT, $0
|
||||
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
|
||||
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
|
||||
MOVQ x_len+8(FP), LEN // LEN = min( len(x), len(y) )
|
||||
CMPQ y_len+32(FP), LEN
|
||||
CMOVQLE y_len+32(FP), LEN
|
||||
PXOR SUM, SUM // sum = 0
|
||||
CMPQ LEN, $0 // if LEN == 0 { return }
|
||||
JE dot_end
|
||||
XORPS P_SUM, P_SUM // psum = 0
|
||||
MOVSD $(-1.0), NEG1
|
||||
SHUFPD $0, NEG1, NEG1 // { -1, -1 }
|
||||
XORQ IDX, IDX // i := 0
|
||||
MOVQ $1, I_IDX // j := 1
|
||||
MOVQ LEN, TAIL
|
||||
ANDQ $3, TAIL // TAIL = floor( TAIL / 4 )
|
||||
SHRQ $2, LEN // LEN = TAIL % 4
|
||||
JZ dot_tail // if LEN == 0 { goto dot_tail }
|
||||
|
||||
MOVAPS NEG1, P_NEG1 // Copy NEG1 to P_NEG1 for pipelining
|
||||
|
||||
dot_loop: // do {
|
||||
MOVDDUP_XPTR_IDX_8__X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||
MOVDDUP_16_XPTR_IDX_8__X5
|
||||
MOVDDUP_32_XPTR_IDX_8__X7
|
||||
MOVDDUP_48_XPTR_IDX_8__X9
|
||||
|
||||
MOVDDUP_XPTR_IIDX_8__X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||
MOVDDUP_16_XPTR_IIDX_8__X4
|
||||
MOVDDUP_32_XPTR_IIDX_8__X6
|
||||
MOVDDUP_48_XPTR_IIDX_8__X8
|
||||
|
||||
// X_i = { -imag(x[i]), -imag(x[i]) }
|
||||
MULPD NEG1, X2
|
||||
MULPD P_NEG1, X4
|
||||
MULPD NEG1, X6
|
||||
MULPD P_NEG1, X8
|
||||
|
||||
// X_j = { imag(y[i]), real(y[i]) }
|
||||
MOVUPS (Y_PTR)(IDX*8), X10
|
||||
MOVUPS 16(Y_PTR)(IDX*8), X11
|
||||
MOVUPS 32(Y_PTR)(IDX*8), X12
|
||||
MOVUPS 48(Y_PTR)(IDX*8), X13
|
||||
|
||||
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
MULPD X10, X3
|
||||
MULPD X11, X5
|
||||
MULPD X12, X7
|
||||
MULPD X13, X9
|
||||
|
||||
// X_j = { real(y[i]), imag(y[i]) }
|
||||
SHUFPD $0x1, X10, X10
|
||||
SHUFPD $0x1, X11, X11
|
||||
SHUFPD $0x1, X12, X12
|
||||
SHUFPD $0x1, X13, X13
|
||||
|
||||
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
MULPD X10, X2
|
||||
MULPD X11, X4
|
||||
MULPD X12, X6
|
||||
MULPD X13, X8
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
ADDSUBPD_X4_X5
|
||||
ADDSUBPD_X6_X7
|
||||
ADDSUBPD_X8_X9
|
||||
|
||||
// psum += result[i]
|
||||
ADDPD X3, SUM
|
||||
ADDPD X5, P_SUM
|
||||
ADDPD X7, SUM
|
||||
ADDPD X9, P_SUM
|
||||
|
||||
ADDQ $8, IDX // IDX += 8
|
||||
ADDQ $8, I_IDX // I_IDX += 8
|
||||
DECQ LEN
|
||||
JNZ dot_loop // } while --LEN > 0
|
||||
ADDPD P_SUM, SUM // sum += psum
|
||||
CMPQ TAIL, $0 // if TAIL == 0 { return }
|
||||
JE dot_end
|
||||
|
||||
dot_tail: // do {
|
||||
MOVDDUP_XPTR_IDX_8__X3 // X_(i+1) = { real(x[i]) , real(x[i]) }
|
||||
MOVDDUP_XPTR_IIDX_8__X2 // X_i = { imag(x[i]) , imag(x[i]) }
|
||||
MULPD NEG1, X2 // X_i = { -imag(x[i]) , -imag(x[i]) }
|
||||
MOVUPS (Y_PTR)(IDX*8), X10 // X_j = { imag(y[i]) , real(y[i]) }
|
||||
MULPD X10, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
SHUFPD $0x1, X10, X10 // X_j = { real(y[i]) , imag(y[i]) }
|
||||
MULPD X10, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
ADDPD X3, SUM // SUM += result[i]
|
||||
ADDQ $2, IDX // IDX += 2
|
||||
ADDQ $2, I_IDX // I_IDX += 2
|
||||
DECQ TAIL
|
||||
JNZ dot_tail // } while --TAIL > 0
|
||||
|
||||
dot_end:
|
||||
MOVUPS SUM, sum+48(FP)
|
||||
RET
|
||||
|
|
@ -0,0 +1,141 @@
|
|||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define MOVDDUP_XPTR__X3 LONG $0x1E120FF2 // MOVDDUP (SI), X3
|
||||
#define MOVDDUP_XPTR_INCX__X5 LONG $0x120F42F2; WORD $0x062C // MOVDDUP (SI)(R8*1), X5
|
||||
#define MOVDDUP_XPTR_INCX_2__X7 LONG $0x120F42F2; WORD $0x463C // MOVDDUP (SI)(R8*2), X7
|
||||
#define MOVDDUP_XPTR_INCx3X__X9 LONG $0x120F46F2; WORD $0x0E0C // MOVDDUP (SI)(R9*1), X9
|
||||
|
||||
#define MOVDDUP_8_XPTR__X2 LONG $0x56120FF2; BYTE $0x08 // MOVDDUP 8(SI), X2
|
||||
#define MOVDDUP_8_XPTR_INCX__X4 LONG $0x120F42F2; WORD $0x0664; BYTE $0x08 // MOVDDUP 8(SI)(R8*1), X4
|
||||
#define MOVDDUP_8_XPTR_INCX_2__X6 LONG $0x120F42F2; WORD $0x4674; BYTE $0x08 // MOVDDUP 8(SI)(R8*2), X6
|
||||
#define MOVDDUP_8_XPTR_INCx3X__X8 LONG $0x120F46F2; WORD $0x0E44; BYTE $0x08 // MOVDDUP 8(SI)(R9*1), X8
|
||||
|
||||
#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3
|
||||
#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5
|
||||
#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7
|
||||
#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
|
||||
|
||||
#define X_PTR SI
|
||||
#define Y_PTR DI
|
||||
#define LEN CX
|
||||
#define TAIL BX
|
||||
#define SUM X0
|
||||
#define P_SUM X1
|
||||
#define INC_X R8
|
||||
#define INCx3_X R9
|
||||
#define INC_Y R10
|
||||
#define INCx3_Y R11
|
||||
|
||||
// func DotuInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128)
|
||||
TEXT ·DotuInc(SB), NOSPLIT, $0
|
||||
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
|
||||
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
|
||||
MOVQ n+48(FP), LEN // LEN = n
|
||||
PXOR SUM, SUM // sum = 0
|
||||
CMPQ LEN, $0 // if LEN == 0 { return }
|
||||
JE dot_end
|
||||
MOVQ ix+72(FP), INC_X // INC_X = ix * sizeof(complex128)
|
||||
SHLQ $4, INC_X
|
||||
MOVQ iy+80(FP), INC_Y // INC_Y = iy * sizeof(complex128)
|
||||
SHLQ $4, INC_Y
|
||||
LEAQ (X_PTR)(INC_X*1), X_PTR // X_PTR = &(X_PTR[ix])
|
||||
LEAQ (Y_PTR)(INC_Y*1), Y_PTR // Y_PTR = &(Y_PTR[iy])
|
||||
MOVQ incX+56(FP), INC_X // INC_X = incX
|
||||
SHLQ $4, INC_X // INC_X *= sizeof(complex128)
|
||||
MOVQ incY+64(FP), INC_Y // INC_Y = incY
|
||||
SHLQ $4, INC_Y // INC_Y *= sizeof(complex128)
|
||||
MOVQ LEN, TAIL
|
||||
ANDQ $3, TAIL // LEN = LEN % 4
|
||||
SHRQ $2, LEN // LEN = floor( LEN / 4 )
|
||||
JZ dot_tail // if LEN <= 4 { goto dot_tail }
|
||||
PXOR P_SUM, P_SUM // psum = 0
|
||||
LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = 3 * incX * sizeof(complex128)
|
||||
LEAQ (INC_Y)(INC_Y*2), INCx3_Y // INCx3_Y = 3 * incY * sizeof(complex128)
|
||||
|
||||
dot_loop: // do {
|
||||
MOVDDUP_XPTR__X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||
MOVDDUP_XPTR_INCX__X5
|
||||
MOVDDUP_XPTR_INCX_2__X7
|
||||
MOVDDUP_XPTR_INCx3X__X9
|
||||
|
||||
MOVDDUP_8_XPTR__X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||
MOVDDUP_8_XPTR_INCX__X4
|
||||
MOVDDUP_8_XPTR_INCX_2__X6
|
||||
MOVDDUP_8_XPTR_INCx3X__X8
|
||||
|
||||
// X_j = { imag(y[i]), real(y[i]) }
|
||||
MOVUPS (Y_PTR), X10
|
||||
MOVUPS (Y_PTR)(INC_Y*1), X11
|
||||
MOVUPS (Y_PTR)(INC_Y*2), X12
|
||||
MOVUPS (Y_PTR)(INCx3_Y*1), X13
|
||||
|
||||
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
MULPD X10, X3
|
||||
MULPD X11, X5
|
||||
MULPD X12, X7
|
||||
MULPD X13, X9
|
||||
|
||||
// X_j = { real(y[i]), imag(y[i]) }
|
||||
SHUFPD $0x1, X10, X10
|
||||
SHUFPD $0x1, X11, X11
|
||||
SHUFPD $0x1, X12, X12
|
||||
SHUFPD $0x1, X13, X13
|
||||
|
||||
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
MULPD X10, X2
|
||||
MULPD X11, X4
|
||||
MULPD X12, X6
|
||||
MULPD X13, X8
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
ADDSUBPD_X4_X5
|
||||
ADDSUBPD_X6_X7
|
||||
ADDSUBPD_X8_X9
|
||||
|
||||
// psum += result[i]
|
||||
ADDPD X3, SUM
|
||||
ADDPD X5, P_SUM
|
||||
ADDPD X7, SUM
|
||||
ADDPD X9, P_SUM
|
||||
|
||||
LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[incX*4])
|
||||
LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(Y_PTR[incY*4])
|
||||
|
||||
DECQ LEN
|
||||
JNZ dot_loop // } while --BX > 0
|
||||
ADDPD P_SUM, SUM // sum += psum
|
||||
CMPQ TAIL, $0 // if TAIL == 0 { return }
|
||||
JE dot_end
|
||||
|
||||
dot_tail: // do {
|
||||
MOVDDUP_XPTR__X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||
MOVDDUP_8_XPTR__X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||
MOVUPS (Y_PTR), X10 // X_j = { imag(y[i]) , real(y[i]) }
|
||||
MULPD X10, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
SHUFPD $0x1, X10, X10 // X_j = { real(y[i]) , imag(y[i]) }
|
||||
MULPD X10, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
ADDPD X3, SUM // sum += result[i]
|
||||
ADDQ INC_X, X_PTR // X_PTR += incX
|
||||
ADDQ INC_Y, Y_PTR // Y_PTR += incY
|
||||
DECQ TAIL // --TAIL
|
||||
JNZ dot_tail // } while TAIL > 0
|
||||
|
||||
dot_end:
|
||||
MOVUPS SUM, sum+88(FP)
|
||||
RET
|
||||
|
|
@ -0,0 +1,130 @@
|
|||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define MOVDDUP_XPTR_IDX_8__X3 LONG $0x1C120FF2; BYTE $0xC6 // MOVDDUP (SI)(AX*8), X3
|
||||
#define MOVDDUP_16_XPTR_IDX_8__X5 LONG $0x6C120FF2; WORD $0x10C6 // MOVDDUP 16(SI)(AX*8), X5
|
||||
#define MOVDDUP_32_XPTR_IDX_8__X7 LONG $0x7C120FF2; WORD $0x20C6 // MOVDDUP 32(SI)(AX*8), X7
|
||||
#define MOVDDUP_48_XPTR_IDX_8__X9 LONG $0x120F44F2; WORD $0xC64C; BYTE $0x30 // MOVDDUP 48(SI)(AX*8), X9
|
||||
|
||||
#define MOVDDUP_XPTR_IIDX_8__X2 LONG $0x14120FF2; BYTE $0xD6 // MOVDDUP (SI)(DX*8), X2
|
||||
#define MOVDDUP_16_XPTR_IIDX_8__X4 LONG $0x64120FF2; WORD $0x10D6 // MOVDDUP 16(SI)(DX*8), X4
|
||||
#define MOVDDUP_32_XPTR_IIDX_8__X6 LONG $0x74120FF2; WORD $0x20D6 // MOVDDUP 32(SI)(DX*8), X6
|
||||
#define MOVDDUP_48_XPTR_IIDX_8__X8 LONG $0x120F44F2; WORD $0xD644; BYTE $0x30 // MOVDDUP 48(SI)(DX*8), X8
|
||||
|
||||
#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3
|
||||
#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5
|
||||
#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7
|
||||
#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
|
||||
|
||||
#define X_PTR SI
|
||||
#define Y_PTR DI
|
||||
#define LEN CX
|
||||
#define TAIL BX
|
||||
#define SUM X0
|
||||
#define P_SUM X1
|
||||
#define IDX AX
|
||||
#define I_IDX DX
|
||||
|
||||
// func DotuUnitary(x, y []complex128) (sum complex128)
|
||||
TEXT ·DotuUnitary(SB), NOSPLIT, $0
|
||||
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
|
||||
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
|
||||
MOVQ x_len+8(FP), LEN // LEN = min( len(x), len(y) )
|
||||
CMPQ y_len+32(FP), LEN
|
||||
CMOVQLE y_len+32(FP), LEN
|
||||
PXOR SUM, SUM // SUM = 0
|
||||
CMPQ LEN, $0 // if LEN == 0 { return }
|
||||
JE dot_end
|
||||
PXOR P_SUM, P_SUM // P_SUM = 0
|
||||
XORQ IDX, IDX // IDX = 0
|
||||
MOVQ $1, DX // j = 1
|
||||
MOVQ LEN, TAIL
|
||||
ANDQ $3, TAIL // TAIL = floor( LEN / 4 )
|
||||
SHRQ $2, LEN // LEN = LEN % 4
|
||||
JZ dot_tail // if LEN == 0 { goto dot_tail }
|
||||
|
||||
dot_loop: // do {
|
||||
MOVDDUP_XPTR_IDX_8__X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||
MOVDDUP_16_XPTR_IDX_8__X5
|
||||
MOVDDUP_32_XPTR_IDX_8__X7
|
||||
MOVDDUP_48_XPTR_IDX_8__X9
|
||||
|
||||
MOVDDUP_XPTR_IIDX_8__X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||
MOVDDUP_16_XPTR_IIDX_8__X4
|
||||
MOVDDUP_32_XPTR_IIDX_8__X6
|
||||
MOVDDUP_48_XPTR_IIDX_8__X8
|
||||
|
||||
// X_j = { imag(y[i]), real(y[i]) }
|
||||
MOVUPS (Y_PTR)(IDX*8), X10
|
||||
MOVUPS 16(Y_PTR)(IDX*8), X11
|
||||
MOVUPS 32(Y_PTR)(IDX*8), X12
|
||||
MOVUPS 48(Y_PTR)(IDX*8), X13
|
||||
|
||||
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
MULPD X10, X3
|
||||
MULPD X11, X5
|
||||
MULPD X12, X7
|
||||
MULPD X13, X9
|
||||
|
||||
// X_j = { real(y[i]), imag(y[i]) }
|
||||
SHUFPD $0x1, X10, X10
|
||||
SHUFPD $0x1, X11, X11
|
||||
SHUFPD $0x1, X12, X12
|
||||
SHUFPD $0x1, X13, X13
|
||||
|
||||
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
MULPD X10, X2
|
||||
MULPD X11, X4
|
||||
MULPD X12, X6
|
||||
MULPD X13, X8
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
ADDSUBPD_X4_X5
|
||||
ADDSUBPD_X6_X7
|
||||
ADDSUBPD_X8_X9
|
||||
|
||||
// psum += result[i]
|
||||
ADDPD X3, SUM
|
||||
ADDPD X5, P_SUM
|
||||
ADDPD X7, SUM
|
||||
ADDPD X9, P_SUM
|
||||
|
||||
ADDQ $8, IDX // IDX += 8
|
||||
ADDQ $8, I_IDX // I_IDX += 8
|
||||
DECQ LEN
|
||||
JNZ dot_loop // } while --LEN > 0
|
||||
ADDPD P_SUM, SUM // SUM += P_SUM
|
||||
CMPQ TAIL, $0 // if TAIL == 0 { return }
|
||||
JE dot_end
|
||||
|
||||
dot_tail: // do {
|
||||
MOVDDUP_XPTR_IDX_8__X3 // X_(i+1) = { real(x[i] , real(x[i]) }
|
||||
MOVDDUP_XPTR_IIDX_8__X2 // X_i = { imag(x[i]) , imag(x[i]) }
|
||||
MOVUPS (Y_PTR)(IDX*8), X10 // X_j = { imag(y[i]) , real(y[i]) }
|
||||
MULPD X10, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||
SHUFPD $0x1, X10, X10 // X_j = { real(y[i]) , imag(y[i]) }
|
||||
MULPD X10, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
ADDPD X3, SUM // psum += result[i]
|
||||
ADDQ $2, IDX // IDX += 2
|
||||
ADDQ $2, I_IDX // I_IDX += 2
|
||||
DECQ TAIL // --TAIL
|
||||
JNZ dot_tail // } while TAIL > 0
|
||||
|
||||
dot_end:
|
||||
MOVUPS SUM, sum+48(FP)
|
||||
RET
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define SRC SI
|
||||
#define DST SI
|
||||
#define LEN CX
|
||||
#define TAIL BX
|
||||
#define INC R9
|
||||
#define INC3 R10
|
||||
#define ALPHA X0
|
||||
#define ALPHA_2 X1
|
||||
|
||||
#define MOVDDUP_ALPHA LONG $0x44120FF2; WORD $0x0824 // MOVDDUP 8(SP), X0
|
||||
|
||||
// func DscalInc(alpha float64, x []complex128, n, inc uintptr)
|
||||
TEXT ·DscalInc(SB), NOSPLIT, $0
|
||||
MOVQ x_base+8(FP), SRC // SRC = &x
|
||||
MOVQ n+32(FP), LEN // LEN = n
|
||||
CMPQ LEN, $0 // if LEN == 0 { return }
|
||||
JE dscal_end
|
||||
|
||||
MOVDDUP_ALPHA // ALPHA = alpha
|
||||
MOVQ inc+40(FP), INC // INC = inc
|
||||
SHLQ $4, INC // INC = INC * sizeof(complex128)
|
||||
LEAQ (INC)(INC*2), INC3 // INC3 = 3 * INC
|
||||
MOVUPS ALPHA, ALPHA_2 // Copy ALPHA and ALPHA_2 for pipelining
|
||||
MOVQ LEN, TAIL // TAIL = LEN
|
||||
SHRQ $2, LEN // LEN = floor( n / 4 )
|
||||
JZ dscal_tail // if LEN == 0 { goto dscal_tail }
|
||||
|
||||
dscal_loop: // do {
|
||||
MOVUPS (SRC), X2 // X_i = x[i]
|
||||
MOVUPS (SRC)(INC*1), X3
|
||||
MOVUPS (SRC)(INC*2), X4
|
||||
MOVUPS (SRC)(INC3*1), X5
|
||||
|
||||
MULPD ALPHA, X2 // X_i *= ALPHA
|
||||
MULPD ALPHA_2, X3
|
||||
MULPD ALPHA, X4
|
||||
MULPD ALPHA_2, X5
|
||||
|
||||
MOVUPS X2, (DST) // x[i] = X_i
|
||||
MOVUPS X3, (DST)(INC*1)
|
||||
MOVUPS X4, (DST)(INC*2)
|
||||
MOVUPS X5, (DST)(INC3*1)
|
||||
|
||||
LEAQ (SRC)(INC*4), SRC // SRC += INC*4
|
||||
DECQ LEN
|
||||
JNZ dscal_loop // } while --LEN > 0
|
||||
|
||||
dscal_tail:
|
||||
ANDQ $3, TAIL // TAIL = TAIL % 4
|
||||
JE dscal_end // if TAIL == 0 { return }
|
||||
|
||||
dscal_tail_loop: // do {
|
||||
MOVUPS (SRC), X2 // X_i = x[i]
|
||||
MULPD ALPHA, X2 // X_i *= ALPHA
|
||||
MOVUPS X2, (DST) // x[i] = X_i
|
||||
ADDQ INC, SRC // SRC += INC
|
||||
DECQ TAIL
|
||||
JNZ dscal_tail_loop // } while --TAIL > 0
|
||||
|
||||
dscal_end:
|
||||
RET
|
||||
|
|
@ -0,0 +1,66 @@
|
|||
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define SRC SI
|
||||
#define DST SI
|
||||
#define LEN CX
|
||||
#define IDX AX
|
||||
#define TAIL BX
|
||||
#define ALPHA X0
|
||||
#define ALPHA_2 X1
|
||||
|
||||
#define MOVDDUP_ALPHA LONG $0x44120FF2; WORD $0x0824 // MOVDDUP 8(SP), X0
|
||||
|
||||
// func DscalUnitary(alpha float64, x []complex128)
|
||||
TEXT ·DscalUnitary(SB), NOSPLIT, $0
|
||||
MOVQ x_base+8(FP), SRC // SRC = &x
|
||||
MOVQ x_len+16(FP), LEN // LEN = len(x)
|
||||
CMPQ LEN, $0 // if LEN == 0 { return }
|
||||
JE dscal_end
|
||||
|
||||
MOVDDUP_ALPHA // ALPHA = alpha
|
||||
XORQ IDX, IDX // IDX = 0
|
||||
MOVUPS ALPHA, ALPHA_2 // Copy ALPHA to ALPHA_2 for pipelining
|
||||
MOVQ LEN, TAIL // TAIL = LEN
|
||||
SHRQ $2, LEN // LEN = floor( n / 4 )
|
||||
JZ dscal_tail // if LEN == 0 { goto dscal_tail }
|
||||
|
||||
dscal_loop: // do {
|
||||
MOVUPS (SRC)(IDX*8), X2 // X_i = x[i]
|
||||
MOVUPS 16(SRC)(IDX*8), X3
|
||||
MOVUPS 32(SRC)(IDX*8), X4
|
||||
MOVUPS 48(SRC)(IDX*8), X5
|
||||
|
||||
MULPD ALPHA, X2 // X_i *= ALPHA
|
||||
MULPD ALPHA_2, X3
|
||||
MULPD ALPHA, X4
|
||||
MULPD ALPHA_2, X5
|
||||
|
||||
MOVUPS X2, (DST)(IDX*8) // x[i] = X_i
|
||||
MOVUPS X3, 16(DST)(IDX*8)
|
||||
MOVUPS X4, 32(DST)(IDX*8)
|
||||
MOVUPS X5, 48(DST)(IDX*8)
|
||||
|
||||
ADDQ $8, IDX // IDX += 8
|
||||
DECQ LEN
|
||||
JNZ dscal_loop // } while --LEN > 0
|
||||
|
||||
dscal_tail:
|
||||
ANDQ $3, TAIL // TAIL = TAIL % 4
|
||||
JZ dscal_end // if TAIL == 0 { return }
|
||||
|
||||
dscal_tail_loop: // do {
|
||||
MOVUPS (SRC)(IDX*8), X2 // X_i = x[i]
|
||||
MULPD ALPHA, X2 // X_i *= ALPHA
|
||||
MOVUPS X2, (DST)(IDX*8) // x[i] = X_i
|
||||
ADDQ $2, IDX // IDX += 2
|
||||
DECQ TAIL
|
||||
JNZ dscal_tail_loop // } while --TAIL > 0
|
||||
|
||||
dscal_end:
|
||||
RET
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package c128
|
||||
|
||||
// ScalUnitaryTo is
|
||||
// for i, v := range x {
|
||||
// dst[i] = alpha * v
|
||||
// }
|
||||
func ScalUnitaryTo(dst []complex128, alpha complex128, x []complex128) {
|
||||
for i, v := range x {
|
||||
dst[i] = alpha * v
|
||||
}
|
||||
}
|
||||
|
||||
// ScalIncTo is
|
||||
// var idst, ix uintptr
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// dst[idst] = alpha * x[ix]
|
||||
// ix += incX
|
||||
// idst += incDst
|
||||
// }
|
||||
func ScalIncTo(dst []complex128, incDst uintptr, alpha complex128, x []complex128, n, incX uintptr) {
|
||||
var idst, ix uintptr
|
||||
for i := 0; i < int(n); i++ {
|
||||
dst[idst] = alpha * x[ix]
|
||||
ix += incX
|
||||
idst += incDst
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,116 @@
|
|||
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define SRC SI
|
||||
#define DST SI
|
||||
#define LEN CX
|
||||
#define IDX AX
|
||||
#define TAIL BX
|
||||
#define ALPHA X0
|
||||
#define ALPHA_C X1
|
||||
#define ALPHA2 X10
|
||||
#define ALPHA_C2 X11
|
||||
|
||||
#define MOVDDUP_X2_X3 LONG $0xDA120FF2 // MOVDDUP X2, X3
|
||||
#define MOVDDUP_X4_X5 LONG $0xEC120FF2 // MOVDDUP X4, X5
|
||||
#define MOVDDUP_X6_X7 LONG $0xFE120FF2 // MOVDDUP X6, X7
|
||||
#define MOVDDUP_X8_X9 LONG $0x120F45F2; BYTE $0xC8 // MOVDDUP X8, X9
|
||||
|
||||
#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3
|
||||
#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5
|
||||
#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7
|
||||
#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
|
||||
|
||||
// func ScalUnitary(alpha complex128, x []complex128)
|
||||
TEXT ·ScalUnitary(SB), NOSPLIT, $0
|
||||
MOVQ x_base+16(FP), SRC // SRC = &x
|
||||
MOVQ x_len+24(FP), LEN // LEN = len(x)
|
||||
CMPQ LEN, $0 // if LEN == 0 { return }
|
||||
JE scal_end
|
||||
|
||||
MOVUPS alpha+0(FP), ALPHA // ALPHA = { imag(alpha), real(alpha) }
|
||||
MOVAPS ALPHA, ALPHA_C
|
||||
SHUFPD $0x1, ALPHA_C, ALPHA_C // ALPHA_C = { real(alpha), imag(alpha) }
|
||||
|
||||
XORQ IDX, IDX // IDX = 0
|
||||
MOVAPS ALPHA, ALPHA2 // Copy ALPHA and ALPHA_C for pipelining
|
||||
MOVAPS ALPHA_C, ALPHA_C2
|
||||
MOVQ LEN, TAIL
|
||||
SHRQ $2, LEN // LEN = floor( n / 4 )
|
||||
JZ scal_tail // if BX == 0 { goto scal_tail }
|
||||
|
||||
scal_loop: // do {
|
||||
MOVUPS (SRC)(IDX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||
MOVUPS 16(SRC)(IDX*8), X4
|
||||
MOVUPS 32(SRC)(IDX*8), X6
|
||||
MOVUPS 48(SRC)(IDX*8), X8
|
||||
|
||||
// X_(i+1) = { real(x[i], real(x[i]) }
|
||||
MOVDDUP_X2_X3
|
||||
MOVDDUP_X4_X5
|
||||
MOVDDUP_X6_X7
|
||||
MOVDDUP_X8_X9
|
||||
|
||||
// X_i = { imag(x[i]), imag(x[i]) }
|
||||
SHUFPD $0x3, X2, X2
|
||||
SHUFPD $0x3, X4, X4
|
||||
SHUFPD $0x3, X6, X6
|
||||
SHUFPD $0x3, X8, X8
|
||||
|
||||
// X_i = { real(ALPHA) * imag(x[i]), imag(ALPHA) * imag(x[i]) }
|
||||
// X_(i+1) = { imag(ALPHA) * real(x[i]), real(ALPHA) * real(x[i]) }
|
||||
MULPD ALPHA_C, X2
|
||||
MULPD ALPHA, X3
|
||||
MULPD ALPHA_C2, X4
|
||||
MULPD ALPHA2, X5
|
||||
MULPD ALPHA_C, X6
|
||||
MULPD ALPHA, X7
|
||||
MULPD ALPHA_C2, X8
|
||||
MULPD ALPHA2, X9
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(ALPHA)*real(x[i]) + real(ALPHA)*imag(x[i]),
|
||||
// real(result[i]): real(ALPHA)*real(x[i]) - imag(ALPHA)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
ADDSUBPD_X4_X5
|
||||
ADDSUBPD_X6_X7
|
||||
ADDSUBPD_X8_X9
|
||||
|
||||
MOVUPS X3, (DST)(IDX*8) // x[i] = X_(i+1)
|
||||
MOVUPS X5, 16(DST)(IDX*8)
|
||||
MOVUPS X7, 32(DST)(IDX*8)
|
||||
MOVUPS X9, 48(DST)(IDX*8)
|
||||
ADDQ $8, IDX // IDX += 8
|
||||
DECQ LEN
|
||||
JNZ scal_loop // } while --LEN > 0
|
||||
|
||||
scal_tail:
|
||||
ANDQ $3, TAIL // TAIL = TAIL % 4
|
||||
JZ scal_end // if TAIL == 0 { return }
|
||||
|
||||
scal_tail_loop: // do {
|
||||
MOVUPS (SRC)(IDX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||
MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||
SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||
MULPD ALPHA_C, X2 // X_i = { real(ALPHA) * imag(x[i]), imag(ALPHA) * imag(x[i]) }
|
||||
MULPD ALPHA, X3 // X_(i+1) = { imag(ALPHA) * real(x[i]), real(ALPHA) * real(x[i]) }
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(ALPHA)*real(x[i]) + real(ALPHA)*imag(x[i]),
|
||||
// real(result[i]): real(ALPHA)*real(x[i]) - imag(ALPHA)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
|
||||
MOVUPS X3, (DST)(IDX*8) // x[i] = X_(i+1)
|
||||
ADDQ $2, IDX // IDX += 2
|
||||
DECQ TAIL
|
||||
JNZ scal_tail_loop // } while --LEN > 0
|
||||
|
||||
scal_end:
|
||||
RET
|
||||
|
|
@ -0,0 +1,121 @@
|
|||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define SRC SI
|
||||
#define DST SI
|
||||
#define LEN CX
|
||||
#define TAIL BX
|
||||
#define INC R9
|
||||
#define INC3 R10
|
||||
#define ALPHA X0
|
||||
#define ALPHA_C X1
|
||||
#define ALPHA2 X10
|
||||
#define ALPHA_C2 X11
|
||||
|
||||
#define MOVDDUP_X2_X3 LONG $0xDA120FF2 // MOVDDUP X2, X3
|
||||
#define MOVDDUP_X4_X5 LONG $0xEC120FF2 // MOVDDUP X4, X5
|
||||
#define MOVDDUP_X6_X7 LONG $0xFE120FF2 // MOVDDUP X6, X7
|
||||
#define MOVDDUP_X8_X9 LONG $0x120F45F2; BYTE $0xC8 // MOVDDUP X8, X9
|
||||
|
||||
#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3
|
||||
#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5
|
||||
#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7
|
||||
#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
|
||||
|
||||
// func ScalInc(alpha complex128, x []complex128, n, inc uintptr)
|
||||
TEXT ·ScalInc(SB), NOSPLIT, $0
|
||||
MOVQ x_base+16(FP), SRC // SRC = &x
|
||||
MOVQ n+40(FP), LEN // LEN = len(x)
|
||||
CMPQ LEN, $0
|
||||
JE scal_end // if LEN == 0 { return }
|
||||
|
||||
MOVQ inc+48(FP), INC // INC = inc
|
||||
SHLQ $4, INC // INC = INC * sizeof(complex128)
|
||||
LEAQ (INC)(INC*2), INC3 // INC3 = 3 * INC
|
||||
|
||||
MOVUPS alpha+0(FP), ALPHA // ALPHA = { imag(alpha), real(alpha) }
|
||||
MOVAPS ALPHA, ALPHA_C
|
||||
SHUFPD $0x1, ALPHA_C, ALPHA_C // ALPHA_C = { real(alpha), imag(alpha) }
|
||||
|
||||
MOVAPS ALPHA, ALPHA2 // Copy ALPHA and ALPHA_C for pipelining
|
||||
MOVAPS ALPHA_C, ALPHA_C2
|
||||
MOVQ LEN, TAIL
|
||||
SHRQ $2, LEN // LEN = floor( n / 4 )
|
||||
JZ scal_tail // if BX == 0 { goto scal_tail }
|
||||
|
||||
scal_loop: // do {
|
||||
MOVUPS (SRC), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||
MOVUPS (SRC)(INC*1), X4
|
||||
MOVUPS (SRC)(INC*2), X6
|
||||
MOVUPS (SRC)(INC3*1), X8
|
||||
|
||||
// X_(i+1) = { real(x[i], real(x[i]) }
|
||||
MOVDDUP_X2_X3
|
||||
MOVDDUP_X4_X5
|
||||
MOVDDUP_X6_X7
|
||||
MOVDDUP_X8_X9
|
||||
|
||||
// X_i = { imag(x[i]), imag(x[i]) }
|
||||
SHUFPD $0x3, X2, X2
|
||||
SHUFPD $0x3, X4, X4
|
||||
SHUFPD $0x3, X6, X6
|
||||
SHUFPD $0x3, X8, X8
|
||||
|
||||
// X_i = { real(ALPHA) * imag(x[i]), imag(ALPHA) * imag(x[i]) }
|
||||
// X_(i+1) = { imag(ALPHA) * real(x[i]), real(ALPHA) * real(x[i]) }
|
||||
MULPD ALPHA_C, X2
|
||||
MULPD ALPHA, X3
|
||||
MULPD ALPHA_C2, X4
|
||||
MULPD ALPHA2, X5
|
||||
MULPD ALPHA_C, X6
|
||||
MULPD ALPHA, X7
|
||||
MULPD ALPHA_C2, X8
|
||||
MULPD ALPHA2, X9
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(ALPHA)*real(x[i]) + real(ALPHA)*imag(x[i]),
|
||||
// real(result[i]): real(ALPHA)*real(x[i]) - imag(ALPHA)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
ADDSUBPD_X4_X5
|
||||
ADDSUBPD_X6_X7
|
||||
ADDSUBPD_X8_X9
|
||||
|
||||
MOVUPS X3, (DST) // x[i] = X_(i+1)
|
||||
MOVUPS X5, (DST)(INC*1)
|
||||
MOVUPS X7, (DST)(INC*2)
|
||||
MOVUPS X9, (DST)(INC3*1)
|
||||
|
||||
LEAQ (SRC)(INC*4), SRC // SRC = &(SRC[inc*4])
|
||||
DECQ LEN
|
||||
JNZ scal_loop // } while --BX > 0
|
||||
|
||||
scal_tail:
|
||||
ANDQ $3, TAIL // TAIL = TAIL % 4
|
||||
JE scal_end // if TAIL == 0 { return }
|
||||
|
||||
scal_tail_loop: // do {
|
||||
MOVUPS (SRC), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||
MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||
SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||
MULPD ALPHA_C, X2 // X_i = { real(ALPHA) * imag(x[i]), imag(ALPHA) * imag(x[i]) }
|
||||
MULPD ALPHA, X3 // X_(i+1) = { imag(ALPHA) * real(x[i]), real(ALPHA) * real(x[i]) }
|
||||
|
||||
// X_(i+1) = {
|
||||
// imag(result[i]): imag(ALPHA)*real(x[i]) + real(ALPHA)*imag(x[i]),
|
||||
// real(result[i]): real(ALPHA)*real(x[i]) - imag(ALPHA)*imag(x[i])
|
||||
// }
|
||||
ADDSUBPD_X2_X3
|
||||
|
||||
MOVUPS X3, (DST) // x[i] = X_i
|
||||
ADDQ INC, SRC // SRC = &(SRC[incX])
|
||||
DECQ TAIL
|
||||
JNZ scal_tail_loop // } while --TAIL > 0
|
||||
|
||||
scal_end:
|
||||
RET
|
||||
|
|
@ -0,0 +1,96 @@
|
|||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine
|
||||
|
||||
package c128
|
||||
|
||||
// AxpyUnitary is
|
||||
// for i, v := range x {
|
||||
// y[i] += alpha * v
|
||||
// }
|
||||
func AxpyUnitary(alpha complex128, x, y []complex128)
|
||||
|
||||
// AxpyUnitaryTo is
|
||||
// for i, v := range x {
|
||||
// dst[i] = alpha*v + y[i]
|
||||
// }
|
||||
func AxpyUnitaryTo(dst []complex128, alpha complex128, x, y []complex128)
|
||||
|
||||
// AxpyInc is
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// y[iy] += alpha * x[ix]
|
||||
// ix += incX
|
||||
// iy += incY
|
||||
// }
|
||||
func AxpyInc(alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr)
|
||||
|
||||
// AxpyIncTo is
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// dst[idst] = alpha*x[ix] + y[iy]
|
||||
// ix += incX
|
||||
// iy += incY
|
||||
// idst += incDst
|
||||
// }
|
||||
func AxpyIncTo(dst []complex128, incDst, idst uintptr, alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr)
|
||||
|
||||
// DscalUnitary is
|
||||
// for i, v := range x {
|
||||
// x[i] = complex(real(v)*alpha, imag(v)*alpha)
|
||||
// }
|
||||
func DscalUnitary(alpha float64, x []complex128)
|
||||
|
||||
// DscalInc is
|
||||
// var ix uintptr
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// x[ix] = complex(real(x[ix])*alpha, imag(x[ix])*alpha)
|
||||
// ix += inc
|
||||
// }
|
||||
func DscalInc(alpha float64, x []complex128, n, inc uintptr)
|
||||
|
||||
// ScalInc is
|
||||
// var ix uintptr
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// x[ix] *= alpha
|
||||
// ix += incX
|
||||
// }
|
||||
func ScalInc(alpha complex128, x []complex128, n, inc uintptr)
|
||||
|
||||
// ScalUnitary is
|
||||
// for i := range x {
|
||||
// x[i] *= alpha
|
||||
// }
|
||||
func ScalUnitary(alpha complex128, x []complex128)
|
||||
|
||||
// DotcUnitary is
|
||||
// for i, v := range x {
|
||||
// sum += y[i] * cmplx.Conj(v)
|
||||
// }
|
||||
// return sum
|
||||
func DotcUnitary(x, y []complex128) (sum complex128)
|
||||
|
||||
// DotcInc is
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// sum += y[iy] * cmplx.Conj(x[ix])
|
||||
// ix += incX
|
||||
// iy += incY
|
||||
// }
|
||||
// return sum
|
||||
func DotcInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128)
|
||||
|
||||
// DotuUnitary is
|
||||
// for i, v := range x {
|
||||
// sum += y[i] * v
|
||||
// }
|
||||
// return sum
|
||||
func DotuUnitary(x, y []complex128) (sum complex128)
|
||||
|
||||
// DotuInc is
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// sum += y[iy] * x[ix]
|
||||
// ix += incX
|
||||
// iy += incY
|
||||
// }
|
||||
// return sum
|
||||
func DotuInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128)
|
||||
|
|
@ -0,0 +1,163 @@
|
|||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build !amd64 noasm appengine
|
||||
|
||||
package c128
|
||||
|
||||
import "math/cmplx"
|
||||
|
||||
// AxpyUnitary is
|
||||
// for i, v := range x {
|
||||
// y[i] += alpha * v
|
||||
// }
|
||||
func AxpyUnitary(alpha complex128, x, y []complex128) {
|
||||
for i, v := range x {
|
||||
y[i] += alpha * v
|
||||
}
|
||||
}
|
||||
|
||||
// AxpyUnitaryTo is
|
||||
// for i, v := range x {
|
||||
// dst[i] = alpha*v + y[i]
|
||||
// }
|
||||
func AxpyUnitaryTo(dst []complex128, alpha complex128, x, y []complex128) {
|
||||
for i, v := range x {
|
||||
dst[i] = alpha*v + y[i]
|
||||
}
|
||||
}
|
||||
|
||||
// AxpyInc is
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// y[iy] += alpha * x[ix]
|
||||
// ix += incX
|
||||
// iy += incY
|
||||
// }
|
||||
func AxpyInc(alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr) {
|
||||
for i := 0; i < int(n); i++ {
|
||||
y[iy] += alpha * x[ix]
|
||||
ix += incX
|
||||
iy += incY
|
||||
}
|
||||
}
|
||||
|
||||
// AxpyIncTo is
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// dst[idst] = alpha*x[ix] + y[iy]
|
||||
// ix += incX
|
||||
// iy += incY
|
||||
// idst += incDst
|
||||
// }
|
||||
func AxpyIncTo(dst []complex128, incDst, idst uintptr, alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr) {
|
||||
for i := 0; i < int(n); i++ {
|
||||
dst[idst] = alpha*x[ix] + y[iy]
|
||||
ix += incX
|
||||
iy += incY
|
||||
idst += incDst
|
||||
}
|
||||
}
|
||||
|
||||
// DscalUnitary is
|
||||
// for i, v := range x {
|
||||
// x[i] = complex(real(v)*alpha, imag(v)*alpha)
|
||||
// }
|
||||
func DscalUnitary(alpha float64, x []complex128) {
|
||||
for i, v := range x {
|
||||
x[i] = complex(real(v)*alpha, imag(v)*alpha)
|
||||
}
|
||||
}
|
||||
|
||||
// DscalInc is
|
||||
// var ix uintptr
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// x[ix] = complex(real(x[ix])*alpha, imag(x[ix])*alpha)
|
||||
// ix += inc
|
||||
// }
|
||||
func DscalInc(alpha float64, x []complex128, n, inc uintptr) {
|
||||
var ix uintptr
|
||||
for i := 0; i < int(n); i++ {
|
||||
x[ix] = complex(real(x[ix])*alpha, imag(x[ix])*alpha)
|
||||
ix += inc
|
||||
}
|
||||
}
|
||||
|
||||
// ScalInc is
|
||||
// var ix uintptr
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// x[ix] *= alpha
|
||||
// ix += incX
|
||||
// }
|
||||
func ScalInc(alpha complex128, x []complex128, n, inc uintptr) {
|
||||
var ix uintptr
|
||||
for i := 0; i < int(n); i++ {
|
||||
x[ix] *= alpha
|
||||
ix += inc
|
||||
}
|
||||
}
|
||||
|
||||
// ScalUnitary is
|
||||
// for i := range x {
|
||||
// x[i] *= alpha
|
||||
// }
|
||||
func ScalUnitary(alpha complex128, x []complex128) {
|
||||
for i := range x {
|
||||
x[i] *= alpha
|
||||
}
|
||||
}
|
||||
|
||||
// DotcUnitary is
|
||||
// for i, v := range x {
|
||||
// sum += y[i] * cmplx.Conj(v)
|
||||
// }
|
||||
// return sum
|
||||
func DotcUnitary(x, y []complex128) (sum complex128) {
|
||||
for i, v := range x {
|
||||
sum += y[i] * cmplx.Conj(v)
|
||||
}
|
||||
return sum
|
||||
}
|
||||
|
||||
// DotcInc is
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// sum += y[iy] * cmplx.Conj(x[ix])
|
||||
// ix += incX
|
||||
// iy += incY
|
||||
// }
|
||||
// return sum
|
||||
func DotcInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128) {
|
||||
for i := 0; i < int(n); i++ {
|
||||
sum += y[iy] * cmplx.Conj(x[ix])
|
||||
ix += incX
|
||||
iy += incY
|
||||
}
|
||||
return sum
|
||||
}
|
||||
|
||||
// DotuUnitary is
|
||||
// for i, v := range x {
|
||||
// sum += y[i] * v
|
||||
// }
|
||||
// return sum
|
||||
func DotuUnitary(x, y []complex128) (sum complex128) {
|
||||
for i, v := range x {
|
||||
sum += y[i] * v
|
||||
}
|
||||
return sum
|
||||
}
|
||||
|
||||
// DotuInc is
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// sum += y[iy] * x[ix]
|
||||
// ix += incX
|
||||
// iy += incY
|
||||
// }
|
||||
// return sum
|
||||
func DotuInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128) {
|
||||
for i := 0; i < int(n); i++ {
|
||||
sum += y[iy] * x[ix]
|
||||
ix += incX
|
||||
iy += incY
|
||||
}
|
||||
return sum
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
|
@ -0,0 +1,91 @@
|
|||
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define X_PTR SI
|
||||
#define Y_PTR DI
|
||||
#define LEN CX
|
||||
#define TAIL BX
|
||||
#define INC_X R8
|
||||
#define INCx3_X R10
|
||||
#define INC_Y R9
|
||||
#define INCx3_Y R11
|
||||
#define SUM X0
|
||||
#define P_SUM X1
|
||||
|
||||
// func DdotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float64)
|
||||
TEXT ·DdotInc(SB), NOSPLIT, $0
|
||||
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
|
||||
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
|
||||
MOVQ n+48(FP), LEN // LEN = n
|
||||
PXOR SUM, SUM // SUM = 0
|
||||
CMPQ LEN, $0
|
||||
JE dot_end
|
||||
|
||||
MOVQ ix+72(FP), INC_X // INC_X = ix
|
||||
MOVQ iy+80(FP), INC_Y // INC_Y = iy
|
||||
LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(x[ix])
|
||||
LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(y[iy])
|
||||
|
||||
MOVQ incX+56(FP), INC_X // INC_X = incX * sizeof(float32)
|
||||
SHLQ $2, INC_X
|
||||
MOVQ incY+64(FP), INC_Y // INC_Y = incY * sizeof(float32)
|
||||
SHLQ $2, INC_Y
|
||||
|
||||
MOVQ LEN, TAIL
|
||||
ANDQ $3, TAIL // TAIL = LEN % 4
|
||||
SHRQ $2, LEN // LEN = floor( LEN / 4 )
|
||||
JZ dot_tail // if LEN == 0 { goto dot_tail }
|
||||
|
||||
PXOR P_SUM, P_SUM // P_SUM = 0 for pipelining
|
||||
LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = INC_X * 3
|
||||
LEAQ (INC_Y)(INC_Y*2), INCx3_Y // INCx3_Y = INC_Y * 3
|
||||
|
||||
dot_loop: // Loop unrolled 4x do {
|
||||
CVTSS2SD (X_PTR), X2 // X_i = x[i:i+1]
|
||||
CVTSS2SD (X_PTR)(INC_X*1), X3
|
||||
CVTSS2SD (X_PTR)(INC_X*2), X4
|
||||
CVTSS2SD (X_PTR)(INCx3_X*1), X5
|
||||
|
||||
CVTSS2SD (Y_PTR), X6 // X_j = y[i:i+1]
|
||||
CVTSS2SD (Y_PTR)(INC_Y*1), X7
|
||||
CVTSS2SD (Y_PTR)(INC_Y*2), X8
|
||||
CVTSS2SD (Y_PTR)(INCx3_Y*1), X9
|
||||
|
||||
MULSD X6, X2 // X_i *= X_j
|
||||
MULSD X7, X3
|
||||
MULSD X8, X4
|
||||
MULSD X9, X5
|
||||
|
||||
ADDSD X2, SUM // SUM += X_i
|
||||
ADDSD X3, P_SUM
|
||||
ADDSD X4, SUM
|
||||
ADDSD X5, P_SUM
|
||||
|
||||
LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[INC_X * 4])
|
||||
LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(Y_PTR[INC_Y * 4])
|
||||
|
||||
DECQ LEN
|
||||
JNZ dot_loop // } while --LEN > 0
|
||||
|
||||
ADDSD P_SUM, SUM // SUM += P_SUM
|
||||
CMPQ TAIL, $0 // if TAIL == 0 { return }
|
||||
JE dot_end
|
||||
|
||||
dot_tail: // do {
|
||||
CVTSS2SD (X_PTR), X2 // X2 = x[i]
|
||||
CVTSS2SD (Y_PTR), X3 // X2 *= y[i]
|
||||
MULSD X3, X2
|
||||
ADDSD X2, SUM // SUM += X2
|
||||
ADDQ INC_X, X_PTR // X_PTR += INC_X
|
||||
ADDQ INC_Y, Y_PTR // Y_PTR += INC_Y
|
||||
DECQ TAIL
|
||||
JNZ dot_tail // } while --TAIL > 0
|
||||
|
||||
dot_end:
|
||||
MOVSD SUM, sum+88(FP) // return SUM
|
||||
RET
|
||||
|
|
@ -0,0 +1,110 @@
|
|||
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define HADDPD_SUM_SUM LONG $0xC07C0F66 // @ HADDPD X0, X0
|
||||
|
||||
#define X_PTR SI
|
||||
#define Y_PTR DI
|
||||
#define LEN CX
|
||||
#define TAIL BX
|
||||
#define IDX AX
|
||||
#define SUM X0
|
||||
#define P_SUM X1
|
||||
|
||||
// func DdotUnitary(x, y []float32) (sum float32)
|
||||
TEXT ·DdotUnitary(SB), NOSPLIT, $0
|
||||
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
|
||||
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
|
||||
MOVQ x_len+8(FP), LEN // LEN = min( len(x), len(y) )
|
||||
CMPQ y_len+32(FP), LEN
|
||||
CMOVQLE y_len+32(FP), LEN
|
||||
PXOR SUM, SUM // psum = 0
|
||||
CMPQ LEN, $0
|
||||
JE dot_end
|
||||
|
||||
XORQ IDX, IDX
|
||||
MOVQ Y_PTR, DX
|
||||
ANDQ $0xF, DX // Align on 16-byte boundary for ADDPS
|
||||
JZ dot_no_trim // if DX == 0 { goto dot_no_trim }
|
||||
|
||||
SUBQ $16, DX
|
||||
|
||||
dot_align: // Trim first value(s) in unaligned buffer do {
|
||||
CVTSS2SD (X_PTR)(IDX*4), X2 // X2 = float64(x[i])
|
||||
CVTSS2SD (Y_PTR)(IDX*4), X3 // X3 = float64(y[i])
|
||||
MULSD X3, X2
|
||||
ADDSD X2, SUM // SUM += X2
|
||||
INCQ IDX // IDX++
|
||||
DECQ LEN
|
||||
JZ dot_end // if --TAIL == 0 { return }
|
||||
ADDQ $4, DX
|
||||
JNZ dot_align // } while --LEN > 0
|
||||
|
||||
dot_no_trim:
|
||||
PXOR P_SUM, P_SUM // P_SUM = 0 for pipelining
|
||||
MOVQ LEN, TAIL
|
||||
ANDQ $0x7, TAIL // TAIL = LEN % 8
|
||||
SHRQ $3, LEN // LEN = floor( LEN / 8 )
|
||||
JZ dot_tail_start // if LEN == 0 { goto dot_tail_start }
|
||||
|
||||
dot_loop: // Loop unrolled 8x do {
|
||||
CVTPS2PD (X_PTR)(IDX*4), X2 // X_i = x[i:i+1]
|
||||
CVTPS2PD 8(X_PTR)(IDX*4), X3
|
||||
CVTPS2PD 16(X_PTR)(IDX*4), X4
|
||||
CVTPS2PD 24(X_PTR)(IDX*4), X5
|
||||
|
||||
CVTPS2PD (Y_PTR)(IDX*4), X6 // X_j = y[i:i+1]
|
||||
CVTPS2PD 8(Y_PTR)(IDX*4), X7
|
||||
CVTPS2PD 16(Y_PTR)(IDX*4), X8
|
||||
CVTPS2PD 24(Y_PTR)(IDX*4), X9
|
||||
|
||||
MULPD X6, X2 // X_i *= X_j
|
||||
MULPD X7, X3
|
||||
MULPD X8, X4
|
||||
MULPD X9, X5
|
||||
|
||||
ADDPD X2, SUM // SUM += X_i
|
||||
ADDPD X3, P_SUM
|
||||
ADDPD X4, SUM
|
||||
ADDPD X5, P_SUM
|
||||
|
||||
ADDQ $8, IDX // IDX += 8
|
||||
DECQ LEN
|
||||
JNZ dot_loop // } while --LEN > 0
|
||||
|
||||
ADDPD P_SUM, SUM // SUM += P_SUM
|
||||
CMPQ TAIL, $0 // if TAIL == 0 { return }
|
||||
JE dot_end
|
||||
|
||||
dot_tail_start:
|
||||
MOVQ TAIL, LEN
|
||||
SHRQ $1, LEN
|
||||
JZ dot_tail_one
|
||||
|
||||
dot_tail_two:
|
||||
CVTPS2PD (X_PTR)(IDX*4), X2 // X_i = x[i:i+1]
|
||||
CVTPS2PD (Y_PTR)(IDX*4), X6 // X_j = y[i:i+1]
|
||||
MULPD X6, X2 // X_i *= X_j
|
||||
ADDPD X2, SUM // SUM += X_i
|
||||
ADDQ $2, IDX // IDX += 2
|
||||
DECQ LEN
|
||||
JNZ dot_tail_two // } while --LEN > 0
|
||||
|
||||
ANDQ $1, TAIL
|
||||
JZ dot_end
|
||||
|
||||
dot_tail_one:
|
||||
CVTSS2SD (X_PTR)(IDX*4), X2 // X2 = float64(x[i])
|
||||
CVTSS2SD (Y_PTR)(IDX*4), X3 // X3 = float64(y[i])
|
||||
MULSD X3, X2 // X2 *= X3
|
||||
ADDSD X2, SUM // SUM += X2
|
||||
|
||||
dot_end:
|
||||
HADDPD_SUM_SUM // SUM = \sum{ SUM[i] }
|
||||
MOVSD SUM, sum+48(FP) // return SUM
|
||||
RET
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
// Copyright ©2017 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package f32 provides float32 vector primitives.
|
||||
package f32
|
||||
package f32 // import "gonum.org/v1/gonum/internal/asm/f32"
|
||||
|
|
@ -0,0 +1,85 @@
|
|||
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define X_PTR SI
|
||||
#define Y_PTR DI
|
||||
#define LEN CX
|
||||
#define TAIL BX
|
||||
#define INC_X R8
|
||||
#define INCx3_X R10
|
||||
#define INC_Y R9
|
||||
#define INCx3_Y R11
|
||||
#define SUM X0
|
||||
#define P_SUM X1
|
||||
|
||||
// func DotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float32)
|
||||
TEXT ·DotInc(SB), NOSPLIT, $0
|
||||
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
|
||||
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
|
||||
PXOR SUM, SUM // SUM = 0
|
||||
MOVQ n+48(FP), LEN // LEN = n
|
||||
CMPQ LEN, $0
|
||||
JE dot_end
|
||||
|
||||
MOVQ ix+72(FP), INC_X // INC_X = ix
|
||||
MOVQ iy+80(FP), INC_Y // INC_Y = iy
|
||||
LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(x[ix])
|
||||
LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(y[iy])
|
||||
|
||||
MOVQ incX+56(FP), INC_X // INC_X := incX * sizeof(float32)
|
||||
SHLQ $2, INC_X
|
||||
MOVQ incY+64(FP), INC_Y // INC_Y := incY * sizeof(float32)
|
||||
SHLQ $2, INC_Y
|
||||
|
||||
MOVQ LEN, TAIL
|
||||
ANDQ $0x3, TAIL // TAIL = LEN % 4
|
||||
SHRQ $2, LEN // LEN = floor( LEN / 4 )
|
||||
JZ dot_tail // if LEN == 0 { goto dot_tail }
|
||||
|
||||
PXOR P_SUM, P_SUM // P_SUM = 0 for pipelining
|
||||
LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = INC_X * 3
|
||||
LEAQ (INC_Y)(INC_Y*2), INCx3_Y // INCx3_Y = INC_Y * 3
|
||||
|
||||
dot_loop: // Loop unrolled 4x do {
|
||||
MOVSS (X_PTR), X2 // X_i = x[i:i+1]
|
||||
MOVSS (X_PTR)(INC_X*1), X3
|
||||
MOVSS (X_PTR)(INC_X*2), X4
|
||||
MOVSS (X_PTR)(INCx3_X*1), X5
|
||||
|
||||
MULSS (Y_PTR), X2 // X_i *= y[i:i+1]
|
||||
MULSS (Y_PTR)(INC_Y*1), X3
|
||||
MULSS (Y_PTR)(INC_Y*2), X4
|
||||
MULSS (Y_PTR)(INCx3_Y*1), X5
|
||||
|
||||
ADDSS X2, SUM // SUM += X_i
|
||||
ADDSS X3, P_SUM
|
||||
ADDSS X4, SUM
|
||||
ADDSS X5, P_SUM
|
||||
|
||||
LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[INC_X * 4])
|
||||
LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(Y_PTR[INC_Y * 4])
|
||||
|
||||
DECQ LEN
|
||||
JNZ dot_loop // } while --LEN > 0
|
||||
|
||||
ADDSS P_SUM, SUM // P_SUM += SUM
|
||||
CMPQ TAIL, $0 // if TAIL == 0 { return }
|
||||
JE dot_end
|
||||
|
||||
dot_tail: // do {
|
||||
MOVSS (X_PTR), X2 // X2 = x[i]
|
||||
MULSS (Y_PTR), X2 // X2 *= y[i]
|
||||
ADDSS X2, SUM // SUM += X2
|
||||
ADDQ INC_X, X_PTR // X_PTR += INC_X
|
||||
ADDQ INC_Y, Y_PTR // Y_PTR += INC_Y
|
||||
DECQ TAIL
|
||||
JNZ dot_tail // } while --TAIL > 0
|
||||
|
||||
dot_end:
|
||||
MOVSS SUM, sum+88(FP) // return SUM
|
||||
RET
|
||||
|
|
@ -0,0 +1,106 @@
|
|||
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//+build !noasm,!appengine
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define HADDPS_SUM_SUM LONG $0xC07C0FF2 // @ HADDPS X0, X0
|
||||
|
||||
#define X_PTR SI
|
||||
#define Y_PTR DI
|
||||
#define LEN CX
|
||||
#define TAIL BX
|
||||
#define IDX AX
|
||||
#define SUM X0
|
||||
#define P_SUM X1
|
||||
|
||||
// func DotUnitary(x, y []float32) (sum float32)
|
||||
TEXT ·DotUnitary(SB), NOSPLIT, $0
|
||||
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
|
||||
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
|
||||
PXOR SUM, SUM // SUM = 0
|
||||
MOVQ x_len+8(FP), LEN // LEN = min( len(x), len(y) )
|
||||
CMPQ y_len+32(FP), LEN
|
||||
CMOVQLE y_len+32(FP), LEN
|
||||
CMPQ LEN, $0
|
||||
JE dot_end
|
||||
|
||||
XORQ IDX, IDX
|
||||
MOVQ Y_PTR, DX
|
||||
ANDQ $0xF, DX // Align on 16-byte boundary for MULPS
|
||||
JZ dot_no_trim // if DX == 0 { goto dot_no_trim }
|
||||
SUBQ $16, DX
|
||||
|
||||
dot_align: // Trim first value(s) in unaligned buffer do {
|
||||
MOVSS (X_PTR)(IDX*4), X2 // X2 = x[i]
|
||||
MULSS (Y_PTR)(IDX*4), X2 // X2 *= y[i]
|
||||
ADDSS X2, SUM // SUM += X2
|
||||
INCQ IDX // IDX++
|
||||
DECQ LEN
|
||||
JZ dot_end // if --TAIL == 0 { return }
|
||||
ADDQ $4, DX
|
||||
JNZ dot_align // } while --DX > 0
|
||||
|
||||
dot_no_trim:
|
||||
PXOR P_SUM, P_SUM // P_SUM = 0 for pipelining
|
||||
MOVQ LEN, TAIL
|
||||
ANDQ $0xF, TAIL // TAIL = LEN % 16
|
||||
SHRQ $4, LEN // LEN = floor( LEN / 16 )
|
||||
JZ dot_tail4_start // if LEN == 0 { goto dot_tail4_start }
|
||||
|
||||
dot_loop: // Loop unrolled 16x do {
|
||||
MOVUPS (X_PTR)(IDX*4), X2 // X_i = x[i:i+1]
|
||||
MOVUPS 16(X_PTR)(IDX*4), X3
|
||||
MOVUPS 32(X_PTR)(IDX*4), X4
|
||||
MOVUPS 48(X_PTR)(IDX*4), X5
|
||||
|
||||
MULPS (Y_PTR)(IDX*4), X2 // X_i *= y[i:i+1]
|
||||
MULPS 16(Y_PTR)(IDX*4), X3
|
||||
MULPS 32(Y_PTR)(IDX*4), X4
|
||||
MULPS 48(Y_PTR)(IDX*4), X5
|
||||
|
||||
ADDPS X2, SUM // SUM += X_i
|
||||
ADDPS X3, P_SUM
|
||||
ADDPS X4, SUM
|
||||
ADDPS X5, P_SUM
|
||||
|
||||
ADDQ $16, IDX // IDX += 16
|
||||
DECQ LEN
|
||||
JNZ dot_loop // } while --LEN > 0
|
||||
|
||||
ADDPS P_SUM, SUM // SUM += P_SUM
|
||||
CMPQ TAIL, $0 // if TAIL == 0 { return }
|
||||
JE dot_end
|
||||
|
||||
dot_tail4_start: // Reset loop counter for 4-wide tail loop
|
||||
MOVQ TAIL, LEN // LEN = floor( TAIL / 4 )
|
||||
SHRQ $2, LEN
|
||||
JZ dot_tail_start // if LEN == 0 { goto dot_tail_start }
|
||||
|
||||
dot_tail4_loop: // Loop unrolled 4x do {
|
||||
MOVUPS (X_PTR)(IDX*4), X2 // X_i = x[i:i+1]
|
||||
MULPS (Y_PTR)(IDX*4), X2 // X_i *= y[i:i+1]
|
||||
ADDPS X2, SUM // SUM += X_i
|
||||
ADDQ $4, IDX // i += 4
|
||||
DECQ LEN
|
||||
JNZ dot_tail4_loop // } while --LEN > 0
|
||||
|
||||
dot_tail_start: // Reset loop counter for 1-wide tail loop
|
||||
ANDQ $3, TAIL // TAIL = TAIL % 4
|
||||
JZ dot_end // if TAIL == 0 { return }
|
||||
|
||||
dot_tail: // do {
|
||||
MOVSS (X_PTR)(IDX*4), X2 // X2 = x[i]
|
||||
MULSS (Y_PTR)(IDX*4), X2 // X2 *= y[i]
|
||||
ADDSS X2, SUM // psum += X2
|
||||
INCQ IDX // IDX++
|
||||
DECQ TAIL
|
||||
JNZ dot_tail // } while --TAIL > 0
|
||||
|
||||
dot_end:
|
||||
HADDPS_SUM_SUM // SUM = \sum{ SUM[i] }
|
||||
HADDPS_SUM_SUM
|
||||
MOVSS SUM, sum+48(FP) // return SUM
|
||||
RET
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
|
@ -34,3 +34,35 @@ func AxpyInc(alpha float32, x, y []float32, n, incX, incY, ix, iy uintptr)
|
|||
// idst += incDst
|
||||
// }
|
||||
func AxpyIncTo(dst []float32, incDst, idst uintptr, alpha float32, x, y []float32, n, incX, incY, ix, iy uintptr)
|
||||
|
||||
// DdotUnitary is
|
||||
// for i, v := range x {
|
||||
// sum += float64(y[i]) * float64(v)
|
||||
// }
|
||||
// return
|
||||
func DdotUnitary(x, y []float32) (sum float64)
|
||||
|
||||
// DdotInc is
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// sum += float64(y[iy]) * float64(x[ix])
|
||||
// ix += incX
|
||||
// iy += incY
|
||||
// }
|
||||
// return
|
||||
func DdotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float64)
|
||||
|
||||
// DotUnitary is
|
||||
// for i, v := range x {
|
||||
// sum += y[i] * v
|
||||
// }
|
||||
// return sum
|
||||
func DotUnitary(x, y []float32) (sum float32)
|
||||
|
||||
// DotInc is
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// sum += y[iy] * x[ix]
|
||||
// ix += incX
|
||||
// iy += incY
|
||||
// }
|
||||
// return sum
|
||||
func DotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float32)
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
|
@ -55,3 +55,59 @@ func AxpyIncTo(dst []float32, incDst, idst uintptr, alpha float32, x, y []float3
|
|||
idst += incDst
|
||||
}
|
||||
}
|
||||
|
||||
// DotUnitary is
|
||||
// for i, v := range x {
|
||||
// sum += y[i] * v
|
||||
// }
|
||||
// return sum
|
||||
func DotUnitary(x, y []float32) (sum float32) {
|
||||
for i, v := range x {
|
||||
sum += y[i] * v
|
||||
}
|
||||
return sum
|
||||
}
|
||||
|
||||
// DotInc is
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// sum += y[iy] * x[ix]
|
||||
// ix += incX
|
||||
// iy += incY
|
||||
// }
|
||||
// return sum
|
||||
func DotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float32) {
|
||||
for i := 0; i < int(n); i++ {
|
||||
sum += y[iy] * x[ix]
|
||||
ix += incX
|
||||
iy += incY
|
||||
}
|
||||
return sum
|
||||
}
|
||||
|
||||
// DdotUnitary is
|
||||
// for i, v := range x {
|
||||
// sum += float64(y[i]) * float64(v)
|
||||
// }
|
||||
// return
|
||||
func DdotUnitary(x, y []float32) (sum float64) {
|
||||
for i, v := range x {
|
||||
sum += float64(y[i]) * float64(v)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DdotInc is
|
||||
// for i := 0; i < int(n); i++ {
|
||||
// sum += float64(y[iy]) * float64(x[ix])
|
||||
// ix += incX
|
||||
// iy += incY
|
||||
// }
|
||||
// return
|
||||
func DdotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float64) {
|
||||
for i := 0; i < int(n); i++ {
|
||||
sum += float64(y[iy]) * float64(x[ix])
|
||||
ix += incX
|
||||
iy += incY
|
||||
}
|
||||
return
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
//
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
//
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
//
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
//
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
// Copyright ©2017 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package f64 provides float64 vector primitives.
|
||||
package f64
|
||||
package f64 // import "gonum.org/v1/gonum/internal/asm/f64"
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
//
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
//
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
//
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
//
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
//
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue