mirror of https://github.com/etcd-io/dbtester.git
vendor: update import paths to "gonum.org/v1"
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
This commit is contained in:
parent
f4955798bc
commit
e7294311a0
|
|
@ -99,70 +99,6 @@
|
||||||
revision = "1e59b77b52bf8e4b449a57e6f79f21226d571845"
|
revision = "1e59b77b52bf8e4b449a57e6f79f21226d571845"
|
||||||
source = "https://github.com/golang/protobuf"
|
source = "https://github.com/golang/protobuf"
|
||||||
|
|
||||||
[[projects]]
|
|
||||||
branch = "master"
|
|
||||||
name = "github.com/gonum/blas"
|
|
||||||
packages = [
|
|
||||||
".",
|
|
||||||
"blas64",
|
|
||||||
"native",
|
|
||||||
"native/internal/math32"
|
|
||||||
]
|
|
||||||
revision = "37e82626499e1df7c54aeaba0959fd6e7e8dc1e4"
|
|
||||||
|
|
||||||
[[projects]]
|
|
||||||
branch = "master"
|
|
||||||
name = "github.com/gonum/floats"
|
|
||||||
packages = ["."]
|
|
||||||
revision = "f74b330d45c56584a6ea7a27f5c64ea2900631e9"
|
|
||||||
|
|
||||||
[[projects]]
|
|
||||||
branch = "master"
|
|
||||||
name = "github.com/gonum/internal"
|
|
||||||
packages = [
|
|
||||||
"asm/f32",
|
|
||||||
"asm/f64"
|
|
||||||
]
|
|
||||||
revision = "e57e4534cf9b3b00ef6c0175f59d8d2d34f60914"
|
|
||||||
|
|
||||||
[[projects]]
|
|
||||||
branch = "master"
|
|
||||||
name = "github.com/gonum/lapack"
|
|
||||||
packages = [
|
|
||||||
".",
|
|
||||||
"lapack64",
|
|
||||||
"native"
|
|
||||||
]
|
|
||||||
revision = "5ed4b826becd1807e09377508f51756586d1a98c"
|
|
||||||
|
|
||||||
[[projects]]
|
|
||||||
branch = "master"
|
|
||||||
name = "github.com/gonum/matrix"
|
|
||||||
packages = [
|
|
||||||
".",
|
|
||||||
"mat64"
|
|
||||||
]
|
|
||||||
revision = "dd6034299e4242c9f0ea36735e6d4264dfcb3f9f"
|
|
||||||
|
|
||||||
[[projects]]
|
|
||||||
name = "github.com/gonum/plot"
|
|
||||||
packages = [
|
|
||||||
".",
|
|
||||||
"palette",
|
|
||||||
"plotter",
|
|
||||||
"plotutil",
|
|
||||||
"tools/bezier",
|
|
||||||
"vg",
|
|
||||||
"vg/draw",
|
|
||||||
"vg/fonts",
|
|
||||||
"vg/vgeps",
|
|
||||||
"vg/vgimg",
|
|
||||||
"vg/vgpdf",
|
|
||||||
"vg/vgsvg"
|
|
||||||
]
|
|
||||||
revision = "51b62dc5319d7fce41240d13e780a93e640b9a38"
|
|
||||||
source = "https://github.com/gonum/plot"
|
|
||||||
|
|
||||||
[[projects]]
|
[[projects]]
|
||||||
name = "github.com/googleapis/gax-go"
|
name = "github.com/googleapis/gax-go"
|
||||||
packages = ["."]
|
packages = ["."]
|
||||||
|
|
@ -344,6 +280,44 @@
|
||||||
revision = "6dc17368e09b0e8634d71cac8168d853e869a0c7"
|
revision = "6dc17368e09b0e8634d71cac8168d853e869a0c7"
|
||||||
source = "https://github.com/golang/time"
|
source = "https://github.com/golang/time"
|
||||||
|
|
||||||
|
[[projects]]
|
||||||
|
branch = "master"
|
||||||
|
name = "gonum.org/v1/gonum"
|
||||||
|
packages = [
|
||||||
|
"blas",
|
||||||
|
"blas/blas64",
|
||||||
|
"blas/gonum",
|
||||||
|
"floats",
|
||||||
|
"internal/asm/c128",
|
||||||
|
"internal/asm/f32",
|
||||||
|
"internal/asm/f64",
|
||||||
|
"internal/math32",
|
||||||
|
"lapack",
|
||||||
|
"lapack/gonum",
|
||||||
|
"lapack/lapack64",
|
||||||
|
"mat"
|
||||||
|
]
|
||||||
|
revision = "69fc04c7c31754cf196d3bcc70c3bc0eec9da1b7"
|
||||||
|
|
||||||
|
[[projects]]
|
||||||
|
name = "gonum.org/v1/plot"
|
||||||
|
packages = [
|
||||||
|
".",
|
||||||
|
"palette",
|
||||||
|
"plotter",
|
||||||
|
"plotutil",
|
||||||
|
"tools/bezier",
|
||||||
|
"vg",
|
||||||
|
"vg/draw",
|
||||||
|
"vg/fonts",
|
||||||
|
"vg/vgeps",
|
||||||
|
"vg/vgimg",
|
||||||
|
"vg/vgpdf",
|
||||||
|
"vg/vgsvg"
|
||||||
|
]
|
||||||
|
revision = "feab214a240f4312b98ab52baf662b55ff1ee377"
|
||||||
|
source = "https://github.com/gonum/plot"
|
||||||
|
|
||||||
[[projects]]
|
[[projects]]
|
||||||
name = "google.golang.org/api"
|
name = "google.golang.org/api"
|
||||||
packages = [
|
packages = [
|
||||||
|
|
@ -421,6 +395,6 @@
|
||||||
[solve-meta]
|
[solve-meta]
|
||||||
analyzer-name = "dep"
|
analyzer-name = "dep"
|
||||||
analyzer-version = 1
|
analyzer-version = 1
|
||||||
inputs-digest = "057a961c21585211116e57f99b60e5bdd28cfe46411715ec4e13dbcf12ddd876"
|
inputs-digest = "21abf25cf507cffab0266c5836fb8acc3cb14f96b0f3fba49e0c55b85fedf5e5"
|
||||||
solver-name = "gps-cdcl"
|
solver-name = "gps-cdcl"
|
||||||
solver-version = 1
|
solver-version = 1
|
||||||
|
|
|
||||||
|
|
@ -88,9 +88,9 @@
|
||||||
|
|
||||||
|
|
||||||
[[constraint]]
|
[[constraint]]
|
||||||
name = "github.com/gonum/plot"
|
name = "gonum.org/v1/plot"
|
||||||
source = "https://github.com/gonum/plot"
|
source = "https://github.com/gonum/plot"
|
||||||
revision = "51b62dc5319d7fce41240d13e780a93e640b9a38"
|
revision = "feab214a240f4312b98ab52baf662b55ff1ee377"
|
||||||
|
|
||||||
|
|
||||||
[[constraint]]
|
[[constraint]]
|
||||||
|
|
|
||||||
|
|
@ -19,11 +19,11 @@ import (
|
||||||
|
|
||||||
"github.com/coreos/dbtester/dbtesterpb"
|
"github.com/coreos/dbtester/dbtesterpb"
|
||||||
|
|
||||||
"github.com/gonum/plot"
|
|
||||||
"github.com/gonum/plot/plotter"
|
|
||||||
"github.com/gonum/plot/plotutil"
|
|
||||||
"github.com/gonum/plot/vg"
|
|
||||||
"github.com/gyuho/dataframe"
|
"github.com/gyuho/dataframe"
|
||||||
|
"gonum.org/v1/plot"
|
||||||
|
"gonum.org/v1/plot/plotter"
|
||||||
|
"gonum.org/v1/plot/plotutil"
|
||||||
|
"gonum.org/v1/plot/vg"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,7 @@ import (
|
||||||
"image/color"
|
"image/color"
|
||||||
"sort"
|
"sort"
|
||||||
|
|
||||||
"github.com/gonum/plot/plotutil"
|
"gonum.org/v1/plot/plotutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
// IsValidDatabaseID returns false if the database id is not supported.
|
// IsValidDatabaseID returns false if the database id is not supported.
|
||||||
|
|
|
||||||
|
|
@ -1,155 +0,0 @@
|
||||||
// Copyright ©2014 The gonum Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package native
|
|
||||||
|
|
||||||
import (
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"math"
|
|
||||||
)
|
|
||||||
|
|
||||||
func newGeneral64(r, c int) general64 {
|
|
||||||
return general64{
|
|
||||||
data: make([]float64, r*c),
|
|
||||||
rows: r,
|
|
||||||
cols: c,
|
|
||||||
stride: c,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type general64 struct {
|
|
||||||
data []float64
|
|
||||||
rows, cols int
|
|
||||||
stride int
|
|
||||||
}
|
|
||||||
|
|
||||||
// adds element-wise into receiver. rows and columns must match
|
|
||||||
func (g general64) add(h general64) {
|
|
||||||
if debug {
|
|
||||||
if g.rows != h.rows {
|
|
||||||
panic("blas: row size mismatch")
|
|
||||||
}
|
|
||||||
if g.cols != h.cols {
|
|
||||||
panic("blas: col size mismatch")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for i := 0; i < g.rows; i++ {
|
|
||||||
gtmp := g.data[i*g.stride : i*g.stride+g.cols]
|
|
||||||
for j, v := range h.data[i*h.stride : i*h.stride+h.cols] {
|
|
||||||
gtmp[j] += v
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// at returns the value at the ith row and jth column. For speed reasons, the
|
|
||||||
// rows and columns are not bounds checked.
|
|
||||||
func (g general64) at(i, j int) float64 {
|
|
||||||
if debug {
|
|
||||||
if i < 0 || i >= g.rows {
|
|
||||||
panic("blas: row out of bounds")
|
|
||||||
}
|
|
||||||
if j < 0 || j >= g.cols {
|
|
||||||
panic("blas: col out of bounds")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return g.data[i*g.stride+j]
|
|
||||||
}
|
|
||||||
|
|
||||||
func (g general64) check(c byte) error {
|
|
||||||
if g.rows < 0 {
|
|
||||||
return errors.New("blas: rows < 0")
|
|
||||||
}
|
|
||||||
if g.cols < 0 {
|
|
||||||
return errors.New("blas: cols < 0")
|
|
||||||
}
|
|
||||||
if g.stride < 1 {
|
|
||||||
return errors.New("blas: stride < 1")
|
|
||||||
}
|
|
||||||
if g.stride < g.cols {
|
|
||||||
return errors.New("blas: illegal stride")
|
|
||||||
}
|
|
||||||
if (g.rows-1)*g.stride+g.cols > len(g.data) {
|
|
||||||
return fmt.Errorf("blas: index of %c out of range", c)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (g general64) clone() general64 {
|
|
||||||
data := make([]float64, len(g.data))
|
|
||||||
copy(data, g.data)
|
|
||||||
return general64{
|
|
||||||
data: data,
|
|
||||||
rows: g.rows,
|
|
||||||
cols: g.cols,
|
|
||||||
stride: g.stride,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// assumes they are the same size
|
|
||||||
func (g general64) copy(h general64) {
|
|
||||||
if debug {
|
|
||||||
if g.rows != h.rows {
|
|
||||||
panic("blas: row mismatch")
|
|
||||||
}
|
|
||||||
if g.cols != h.cols {
|
|
||||||
panic("blas: col mismatch")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for k := 0; k < g.rows; k++ {
|
|
||||||
copy(g.data[k*g.stride:(k+1)*g.stride], h.data[k*h.stride:(k+1)*h.stride])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (g general64) equal(a general64) bool {
|
|
||||||
if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
for i, v := range g.data {
|
|
||||||
if a.data[i] != v {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
// print is to aid debugging. Commented out to avoid fmt import
|
|
||||||
func (g general64) print() {
|
|
||||||
fmt.Println("r = ", g.rows, "c = ", g.cols, "stride: ", g.stride)
|
|
||||||
for i := 0; i < g.rows; i++ {
|
|
||||||
fmt.Println(g.data[i*g.stride : (i+1)*g.stride])
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
func (g general64) view(i, j, r, c int) general64 {
|
|
||||||
if debug {
|
|
||||||
if i < 0 || i+r > g.rows {
|
|
||||||
panic("blas: row out of bounds")
|
|
||||||
}
|
|
||||||
if j < 0 || j+c > g.cols {
|
|
||||||
panic("blas: col out of bounds")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return general64{
|
|
||||||
data: g.data[i*g.stride+j : (i+r-1)*g.stride+j+c],
|
|
||||||
rows: r,
|
|
||||||
cols: c,
|
|
||||||
stride: g.stride,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (g general64) equalWithinAbs(a general64, tol float64) bool {
|
|
||||||
if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
for i, v := range g.data {
|
|
||||||
if math.Abs(a.data[i]-v) > tol {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
@ -1,157 +0,0 @@
|
||||||
// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
|
|
||||||
|
|
||||||
// Copyright ©2014 The gonum Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package native
|
|
||||||
|
|
||||||
import (
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
math "github.com/gonum/blas/native/internal/math32"
|
|
||||||
)
|
|
||||||
|
|
||||||
func newGeneral32(r, c int) general32 {
|
|
||||||
return general32{
|
|
||||||
data: make([]float32, r*c),
|
|
||||||
rows: r,
|
|
||||||
cols: c,
|
|
||||||
stride: c,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type general32 struct {
|
|
||||||
data []float32
|
|
||||||
rows, cols int
|
|
||||||
stride int
|
|
||||||
}
|
|
||||||
|
|
||||||
// adds element-wise into receiver. rows and columns must match
|
|
||||||
func (g general32) add(h general32) {
|
|
||||||
if debug {
|
|
||||||
if g.rows != h.rows {
|
|
||||||
panic("blas: row size mismatch")
|
|
||||||
}
|
|
||||||
if g.cols != h.cols {
|
|
||||||
panic("blas: col size mismatch")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for i := 0; i < g.rows; i++ {
|
|
||||||
gtmp := g.data[i*g.stride : i*g.stride+g.cols]
|
|
||||||
for j, v := range h.data[i*h.stride : i*h.stride+h.cols] {
|
|
||||||
gtmp[j] += v
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// at returns the value at the ith row and jth column. For speed reasons, the
|
|
||||||
// rows and columns are not bounds checked.
|
|
||||||
func (g general32) at(i, j int) float32 {
|
|
||||||
if debug {
|
|
||||||
if i < 0 || i >= g.rows {
|
|
||||||
panic("blas: row out of bounds")
|
|
||||||
}
|
|
||||||
if j < 0 || j >= g.cols {
|
|
||||||
panic("blas: col out of bounds")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return g.data[i*g.stride+j]
|
|
||||||
}
|
|
||||||
|
|
||||||
func (g general32) check(c byte) error {
|
|
||||||
if g.rows < 0 {
|
|
||||||
return errors.New("blas: rows < 0")
|
|
||||||
}
|
|
||||||
if g.cols < 0 {
|
|
||||||
return errors.New("blas: cols < 0")
|
|
||||||
}
|
|
||||||
if g.stride < 1 {
|
|
||||||
return errors.New("blas: stride < 1")
|
|
||||||
}
|
|
||||||
if g.stride < g.cols {
|
|
||||||
return errors.New("blas: illegal stride")
|
|
||||||
}
|
|
||||||
if (g.rows-1)*g.stride+g.cols > len(g.data) {
|
|
||||||
return fmt.Errorf("blas: index of %c out of range", c)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (g general32) clone() general32 {
|
|
||||||
data := make([]float32, len(g.data))
|
|
||||||
copy(data, g.data)
|
|
||||||
return general32{
|
|
||||||
data: data,
|
|
||||||
rows: g.rows,
|
|
||||||
cols: g.cols,
|
|
||||||
stride: g.stride,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// assumes they are the same size
|
|
||||||
func (g general32) copy(h general32) {
|
|
||||||
if debug {
|
|
||||||
if g.rows != h.rows {
|
|
||||||
panic("blas: row mismatch")
|
|
||||||
}
|
|
||||||
if g.cols != h.cols {
|
|
||||||
panic("blas: col mismatch")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for k := 0; k < g.rows; k++ {
|
|
||||||
copy(g.data[k*g.stride:(k+1)*g.stride], h.data[k*h.stride:(k+1)*h.stride])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (g general32) equal(a general32) bool {
|
|
||||||
if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
for i, v := range g.data {
|
|
||||||
if a.data[i] != v {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
// print is to aid debugging. Commented out to avoid fmt import
|
|
||||||
func (g general32) print() {
|
|
||||||
fmt.Println("r = ", g.rows, "c = ", g.cols, "stride: ", g.stride)
|
|
||||||
for i := 0; i < g.rows; i++ {
|
|
||||||
fmt.Println(g.data[i*g.stride : (i+1)*g.stride])
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
func (g general32) view(i, j, r, c int) general32 {
|
|
||||||
if debug {
|
|
||||||
if i < 0 || i+r > g.rows {
|
|
||||||
panic("blas: row out of bounds")
|
|
||||||
}
|
|
||||||
if j < 0 || j+c > g.cols {
|
|
||||||
panic("blas: col out of bounds")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return general32{
|
|
||||||
data: g.data[i*g.stride+j : (i+r-1)*g.stride+j+c],
|
|
||||||
rows: r,
|
|
||||||
cols: c,
|
|
||||||
stride: g.stride,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (g general32) equalWithinAbs(a general32, tol float32) bool {
|
|
||||||
if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
for i, v := range g.data {
|
|
||||||
if math.Abs(a.data[i]-v) > tol {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
@ -1,33 +0,0 @@
|
||||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package f32
|
|
||||||
|
|
||||||
// DdotUnitary is
|
|
||||||
// for i, v := range x {
|
|
||||||
// sum += float64(y[i]) * float64(v)
|
|
||||||
// }
|
|
||||||
// return
|
|
||||||
func DdotUnitary(x, y []float32) (sum float64) {
|
|
||||||
for i, v := range x {
|
|
||||||
sum += float64(y[i]) * float64(v)
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// DdotInc is
|
|
||||||
// for i := 0; i < int(n); i++ {
|
|
||||||
// sum += float64(y[iy]) * float64(x[ix])
|
|
||||||
// ix += incX
|
|
||||||
// iy += incY
|
|
||||||
// }
|
|
||||||
// return
|
|
||||||
func DdotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float64) {
|
|
||||||
for i := 0; i < int(n); i++ {
|
|
||||||
sum += float64(y[iy]) * float64(x[ix])
|
|
||||||
ix += incX
|
|
||||||
iy += incY
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
@ -1,33 +0,0 @@
|
||||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package f32
|
|
||||||
|
|
||||||
// DotUnitary is
|
|
||||||
// for i, v := range x {
|
|
||||||
// sum += y[i] * v
|
|
||||||
// }
|
|
||||||
// return sum
|
|
||||||
func DotUnitary(x, y []float32) (sum float32) {
|
|
||||||
for i, v := range x {
|
|
||||||
sum += y[i] * v
|
|
||||||
}
|
|
||||||
return sum
|
|
||||||
}
|
|
||||||
|
|
||||||
// DotInc is
|
|
||||||
// for i := 0; i < int(n); i++ {
|
|
||||||
// sum += y[iy] * x[ix]
|
|
||||||
// ix += incX
|
|
||||||
// iy += incY
|
|
||||||
// }
|
|
||||||
// return sum
|
|
||||||
func DotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float32) {
|
|
||||||
for i := 0; i < int(n); i++ {
|
|
||||||
sum += y[iy] * x[ix]
|
|
||||||
ix += incX
|
|
||||||
iy += incY
|
|
||||||
}
|
|
||||||
return sum
|
|
||||||
}
|
|
||||||
|
|
@ -1,103 +0,0 @@
|
||||||
// Generated by running
|
|
||||||
// go generate github.com/gonum/matrix
|
|
||||||
// DO NOT EDIT.
|
|
||||||
|
|
||||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
// Package matrix provides common error handling mechanisms for matrix operations
|
|
||||||
// in mat64 and cmat128.
|
|
||||||
//
|
|
||||||
// Overview
|
|
||||||
//
|
|
||||||
// This section provides a quick overview of the matrix package. The following
|
|
||||||
// sections provide more in depth commentary.
|
|
||||||
//
|
|
||||||
// matrix provides:
|
|
||||||
// - Error type definitions
|
|
||||||
// - Error recovery mechanisms
|
|
||||||
// - Common constants used by mat64 and cmat128
|
|
||||||
//
|
|
||||||
// Errors
|
|
||||||
//
|
|
||||||
// The mat64 and cmat128 matrix packages share a common set of errors
|
|
||||||
// provided by matrix via the matrix.Error type.
|
|
||||||
//
|
|
||||||
// Errors are either returned directly or used as the parameter of a panic
|
|
||||||
// depending on the class of error encountered. Returned errors indicate
|
|
||||||
// that a call was not able to complete successfully while panics generally
|
|
||||||
// indicate a programmer or unrecoverable error.
|
|
||||||
//
|
|
||||||
// Examples of each type are found in the mat64 Solve methods, which find
|
|
||||||
// x such that A*x = b.
|
|
||||||
//
|
|
||||||
// An error value is returned from the function or method when the operation
|
|
||||||
// can meaningfully fail. The Solve operation cannot complete if A is
|
|
||||||
// singular. However, determining the singularity of A is most easily
|
|
||||||
// discovered during the Solve procedure itself and is a valid result from
|
|
||||||
// the operation, so in this case an error is returned.
|
|
||||||
//
|
|
||||||
// A function will panic when the input parameters are inappropriate for
|
|
||||||
// the function. In Solve, for example, the number of rows of each input
|
|
||||||
// matrix must be equal because of the rules of matrix multiplication.
|
|
||||||
// Similarly, for solving A*x = b, a non-zero receiver must have the same
|
|
||||||
// number of rows as A has columns and must have the same number of columns
|
|
||||||
// as b. In all cases where a function will panic, conditions that would
|
|
||||||
// lead to a panic can easily be checked prior to a call.
|
|
||||||
//
|
|
||||||
// Error Recovery
|
|
||||||
//
|
|
||||||
// When a matrix.Error is the parameter of a panic, the panic can be
|
|
||||||
// recovered by a Maybe function, which will then return the error.
|
|
||||||
// Panics that are not of type matrix.Error are re-panicked by the
|
|
||||||
// Maybe functions.
|
|
||||||
//
|
|
||||||
// Invariants
|
|
||||||
//
|
|
||||||
// Matrix input arguments to functions are never directly modified. If an operation
|
|
||||||
// changes Matrix data, the mutated matrix will be the receiver of a function.
|
|
||||||
//
|
|
||||||
// For convenience, a matrix may be used as both a receiver and as an input, e.g.
|
|
||||||
// a.Pow(a, 6)
|
|
||||||
// v.SolveVec(a.T(), v)
|
|
||||||
// though in many cases this will cause an allocation (see Element Aliasing).
|
|
||||||
// An exception to this rule is Copy, which does not allow a.Copy(a.T()).
|
|
||||||
//
|
|
||||||
// Element Aliasing
|
|
||||||
//
|
|
||||||
// Most methods in the matrix packages modify receiver data. It is forbidden for the modified
|
|
||||||
// data region of the receiver to overlap the used data area of the input
|
|
||||||
// arguments. The exception to this rule is when the method receiver is equal to one
|
|
||||||
// of the input arguments, as in the a.Pow(a, 6) call above, or its implicit transpose.
|
|
||||||
//
|
|
||||||
// This prohibition is to help avoid subtle mistakes when the method needs to read
|
|
||||||
// from and write to the same data region. There are ways to make mistakes using the
|
|
||||||
// matrix API, and matrix functions will detect and complain about those.
|
|
||||||
// There are many ways to make mistakes by excursion from the matrix API via
|
|
||||||
// interaction with raw matrix values.
|
|
||||||
//
|
|
||||||
// If you need to read the rest of this section to understand the behavior of
|
|
||||||
// your program, you are being clever. Don't be clever. If you must be clever,
|
|
||||||
// blas64/cblas128 and lapack64/clapack128 may be used to call the behavior directly.
|
|
||||||
//
|
|
||||||
// The matrix packages will use the following rules to detect overlap between the receiver and one
|
|
||||||
// of the inputs:
|
|
||||||
// - the input implements one of the Raw methods, and
|
|
||||||
// - the Raw type matches that of the receiver or
|
|
||||||
// one is a RawMatrixer and the other is a RawVectorer, and
|
|
||||||
// - the address ranges of the backing data slices overlap, and
|
|
||||||
// - the strides differ or there is an overlap in the used data elements.
|
|
||||||
// If such an overlap is detected, the method will panic.
|
|
||||||
//
|
|
||||||
// The following cases will not panic:
|
|
||||||
// - the data slices do not overlap,
|
|
||||||
// - there is pointer identity between the receiver and input values after
|
|
||||||
// the value has been untransposed if necessary.
|
|
||||||
//
|
|
||||||
// The matrix packages will not attempt to detect element overlap if the input does not implement a
|
|
||||||
// Raw method, or if the Raw method differs from that of the receiver except when a
|
|
||||||
// conversion has occurred through a matrix API function. Method behavior is undefined
|
|
||||||
// if there is undetected overlap.
|
|
||||||
//
|
|
||||||
package matrix
|
|
||||||
|
|
@ -1,343 +0,0 @@
|
||||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
//+build ignore
|
|
||||||
|
|
||||||
// gendoc creates the matrix, mat64 and cmat128 package doc comments.
|
|
||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"log"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"strings"
|
|
||||||
"text/template"
|
|
||||||
"unicode/utf8"
|
|
||||||
)
|
|
||||||
|
|
||||||
var docs = template.Must(template.New("docs").Funcs(funcs).Parse(`{{define "common"}}// Generated by running
|
|
||||||
// go generate github.com/gonum/matrix
|
|
||||||
// DO NOT EDIT.
|
|
||||||
|
|
||||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
// Package {{.Name}} provides {{.Provides}}
|
|
||||||
//
|
|
||||||
// Overview
|
|
||||||
//
|
|
||||||
// This section provides a quick overview of the {{.Name}} package. The following
|
|
||||||
// sections provide more in depth commentary.
|
|
||||||
//
|
|
||||||
{{.Overview}}
|
|
||||||
//{{end}}
|
|
||||||
{{define "interfaces"}}// The Matrix Interfaces
|
|
||||||
//
|
|
||||||
// The Matrix interface is the common link between the concrete types. The Matrix
|
|
||||||
// interface is defined by three functions: Dims, which returns the dimensions
|
|
||||||
// of the Matrix, At, which returns the element in the specified location, and
|
|
||||||
// T for returning a Transpose (discussed later). All of the concrete types can
|
|
||||||
// perform these behaviors and so implement the interface. Methods and functions
|
|
||||||
// are designed to use this interface, so in particular the method
|
|
||||||
// func (m *Dense) Mul(a, b Matrix)
|
|
||||||
// constructs a *Dense from the result of a multiplication with any Matrix types,
|
|
||||||
// not just *Dense. Where more restrictive requirements must be met, there are also the
|
|
||||||
// Symmetric and Triangular interfaces. For example, in
|
|
||||||
// func (s *SymDense) AddSym(a, b Symmetric)
|
|
||||||
// the Symmetric interface guarantees a symmetric result.
|
|
||||||
//
|
|
||||||
// Transposes
|
|
||||||
//
|
|
||||||
// The T method is used for transposition. For example, c.Mul(a.T(), b) computes
|
|
||||||
// c = a^T * b. The {{if .ExamplePackage}}{{.ExamplePackage}}{{else}}{{.Name}}{{end}} types implement this method using an implicit transpose —
|
|
||||||
// see the Transpose type for more details. Note that some operations have a
|
|
||||||
// transpose as part of their definition, as in *SymDense.SymOuterK.
|
|
||||||
//{{end}}
|
|
||||||
{{define "factorization"}}// Matrix Factorization
|
|
||||||
//
|
|
||||||
// Matrix factorizations, such as the LU decomposition, typically have their own
|
|
||||||
// specific data storage, and so are each implemented as a specific type. The
|
|
||||||
// factorization can be computed through a call to Factorize
|
|
||||||
// var lu {{if .ExamplePackage}}{{.ExamplePackage}}{{else}}{{.Name}}{{end}}.LU
|
|
||||||
// lu.Factorize(a)
|
|
||||||
// The elements of the factorization can be extracted through methods on the
|
|
||||||
// appropriate type, i.e. *TriDense.LFromLU and *TriDense.UFromLU. Alternatively,
|
|
||||||
// they can be used directly, as in *Dense.SolveLU. Some factorizations can be
|
|
||||||
// updated directly, without needing to update the original matrix and refactorize,
|
|
||||||
// as in *LU.RankOne.
|
|
||||||
//{{end}}
|
|
||||||
{{define "blas"}}// BLAS and LAPACK
|
|
||||||
//
|
|
||||||
// BLAS and LAPACK are the standard APIs for linear algebra routines. Many
|
|
||||||
// operations in {{if .Description}}{{.Description}}{{else}}{{.Name}}{{end}} are implemented using calls to the wrapper functions
|
|
||||||
// in gonum/blas/{{.BLAS|alts}} and gonum/lapack/{{.LAPACK|alts}}. By default, {{.BLAS|join "/"}} and
|
|
||||||
// {{.LAPACK|join "/"}} call the native Go implementations of the routines. Alternatively,
|
|
||||||
// it is possible to use C-based implementations of the APIs through the respective
|
|
||||||
// cgo packages and "Use" functions. The Go implementation of LAPACK makes calls
|
|
||||||
// through {{.BLAS|join "/"}}, so if a cgo BLAS implementation is registered, the {{.LAPACK|join "/"}}
|
|
||||||
// calls will be partially executed in Go and partially executed in C.
|
|
||||||
//{{end}}
|
|
||||||
{{define "switching"}}// Type Switching
|
|
||||||
//
|
|
||||||
// The Matrix abstraction enables efficiency as well as interoperability. Go's
|
|
||||||
// type reflection capabilities are used to choose the most efficient routine
|
|
||||||
// given the specific concrete types. For example, in
|
|
||||||
// c.Mul(a, b)
|
|
||||||
// if a and b both implement RawMatrixer, that is, they can be represented as a
|
|
||||||
// {{.BLAS|alts}}.General, {{.BLAS|alts}}.Gemm (general matrix multiplication) is called, while
|
|
||||||
// instead if b is a RawSymmetricer {{.BLAS|alts}}.Symm is used (general-symmetric
|
|
||||||
// multiplication), and if b is a *Vector {{.BLAS|alts}}.Gemv is used.
|
|
||||||
//
|
|
||||||
// There are many possible type combinations and special cases. No specific guarantees
|
|
||||||
// are made about the performance of any method, and in particular, note that an
|
|
||||||
// abstract matrix type may be copied into a concrete type of the corresponding
|
|
||||||
// value. If there are specific special cases that are needed, please submit a
|
|
||||||
// pull-request or file an issue.
|
|
||||||
//{{end}}
|
|
||||||
{{define "invariants"}}// Invariants
|
|
||||||
//
|
|
||||||
// Matrix input arguments to functions are never directly modified. If an operation
|
|
||||||
// changes Matrix data, the mutated matrix will be the receiver of a function.
|
|
||||||
//
|
|
||||||
// For convenience, a matrix may be used as both a receiver and as an input, e.g.
|
|
||||||
// a.Pow(a, 6)
|
|
||||||
// v.SolveVec(a.T(), v)
|
|
||||||
// though in many cases this will cause an allocation (see Element Aliasing).
|
|
||||||
// An exception to this rule is Copy, which does not allow a.Copy(a.T()).
|
|
||||||
//{{end}}
|
|
||||||
{{define "aliasing"}}// Element Aliasing
|
|
||||||
//
|
|
||||||
// Most methods in {{if .Description}}{{.Description}}{{else}}{{.Name}}{{end}} modify receiver data. It is forbidden for the modified
|
|
||||||
// data region of the receiver to overlap the used data area of the input
|
|
||||||
// arguments. The exception to this rule is when the method receiver is equal to one
|
|
||||||
// of the input arguments, as in the a.Pow(a, 6) call above, or its implicit transpose.
|
|
||||||
//
|
|
||||||
// This prohibition is to help avoid subtle mistakes when the method needs to read
|
|
||||||
// from and write to the same data region. There are ways to make mistakes using the
|
|
||||||
// {{.Name}} API, and {{.Name}} functions will detect and complain about those.
|
|
||||||
// There are many ways to make mistakes by excursion from the {{.Name}} API via
|
|
||||||
// interaction with raw matrix values.
|
|
||||||
//
|
|
||||||
// If you need to read the rest of this section to understand the behavior of
|
|
||||||
// your program, you are being clever. Don't be clever. If you must be clever,
|
|
||||||
// {{.BLAS|join "/"}} and {{.LAPACK|join "/"}} may be used to call the behavior directly.
|
|
||||||
//
|
|
||||||
// {{if .Description}}{{.Description|sentence}}{{else}}{{.Name}}{{end}} will use the following rules to detect overlap between the receiver and one
|
|
||||||
// of the inputs:
|
|
||||||
// - the input implements one of the Raw methods, and
|
|
||||||
// - the Raw type matches that of the receiver or
|
|
||||||
// one is a RawMatrixer and the other is a RawVectorer, and
|
|
||||||
// - the address ranges of the backing data slices overlap, and
|
|
||||||
// - the strides differ or there is an overlap in the used data elements.
|
|
||||||
// If such an overlap is detected, the method will panic.
|
|
||||||
//
|
|
||||||
// The following cases will not panic:
|
|
||||||
// - the data slices do not overlap,
|
|
||||||
// - there is pointer identity between the receiver and input values after
|
|
||||||
// the value has been untransposed if necessary.
|
|
||||||
//
|
|
||||||
// {{if .Description}}{{.Description|sentence}}{{else}}{{.Name}}{{end}} will not attempt to detect element overlap if the input does not implement a
|
|
||||||
// Raw method, or if the Raw method differs from that of the receiver except when a
|
|
||||||
// conversion has occurred through a {{.Name}} API function. Method behavior is undefined
|
|
||||||
// if there is undetected overlap.
|
|
||||||
//{{end}}`))
|
|
||||||
|
|
||||||
type Package struct {
|
|
||||||
path string
|
|
||||||
|
|
||||||
Name string
|
|
||||||
Provides string
|
|
||||||
Description string
|
|
||||||
ExamplePackage string
|
|
||||||
Overview string
|
|
||||||
|
|
||||||
BLAS []string
|
|
||||||
LAPACK []string
|
|
||||||
|
|
||||||
template string
|
|
||||||
}
|
|
||||||
|
|
||||||
var pkgs = []Package{
|
|
||||||
{
|
|
||||||
path: ".",
|
|
||||||
|
|
||||||
Name: "matrix",
|
|
||||||
Description: "the matrix packages",
|
|
||||||
Provides: `common error handling mechanisms for matrix operations
|
|
||||||
// in mat64 and cmat128.`,
|
|
||||||
ExamplePackage: "mat64",
|
|
||||||
|
|
||||||
Overview: `// matrix provides:
|
|
||||||
// - Error type definitions
|
|
||||||
// - Error recovery mechanisms
|
|
||||||
// - Common constants used by mat64 and cmat128
|
|
||||||
//
|
|
||||||
// Errors
|
|
||||||
//
|
|
||||||
// The mat64 and cmat128 matrix packages share a common set of errors
|
|
||||||
// provided by matrix via the matrix.Error type.
|
|
||||||
//
|
|
||||||
// Errors are either returned directly or used as the parameter of a panic
|
|
||||||
// depending on the class of error encountered. Returned errors indicate
|
|
||||||
// that a call was not able to complete successfully while panics generally
|
|
||||||
// indicate a programmer or unrecoverable error.
|
|
||||||
//
|
|
||||||
// Examples of each type are found in the mat64 Solve methods, which find
|
|
||||||
// x such that A*x = b.
|
|
||||||
//
|
|
||||||
// An error value is returned from the function or method when the operation
|
|
||||||
// can meaningfully fail. The Solve operation cannot complete if A is
|
|
||||||
// singular. However, determining the singularity of A is most easily
|
|
||||||
// discovered during the Solve procedure itself and is a valid result from
|
|
||||||
// the operation, so in this case an error is returned.
|
|
||||||
//
|
|
||||||
// A function will panic when the input parameters are inappropriate for
|
|
||||||
// the function. In Solve, for example, the number of rows of each input
|
|
||||||
// matrix must be equal because of the rules of matrix multiplication.
|
|
||||||
// Similarly, for solving A*x = b, a non-zero receiver must have the same
|
|
||||||
// number of rows as A has columns and must have the same number of columns
|
|
||||||
// as b. In all cases where a function will panic, conditions that would
|
|
||||||
// lead to a panic can easily be checked prior to a call.
|
|
||||||
//
|
|
||||||
// Error Recovery
|
|
||||||
//
|
|
||||||
// When a matrix.Error is the parameter of a panic, the panic can be
|
|
||||||
// recovered by a Maybe function, which will then return the error.
|
|
||||||
// Panics that are not of type matrix.Error are re-panicked by the
|
|
||||||
// Maybe functions.`,
|
|
||||||
BLAS: []string{"blas64", "cblas128"},
|
|
||||||
LAPACK: []string{"lapack64", "clapack128"},
|
|
||||||
|
|
||||||
template: `{{template "common" .}}
|
|
||||||
{{template "invariants" .}}
|
|
||||||
{{template "aliasing" .}}
|
|
||||||
package {{.Name}}
|
|
||||||
`,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
path: "mat64",
|
|
||||||
|
|
||||||
Name: "mat64",
|
|
||||||
Provides: `implementations of float64 matrix structures and
|
|
||||||
// linear algebra operations on them.`,
|
|
||||||
|
|
||||||
Overview: `// mat64 provides:
|
|
||||||
// - Interfaces for Matrix classes (Matrix, Symmetric, Triangular)
|
|
||||||
// - Concrete implementations (Dense, SymDense, TriDense)
|
|
||||||
// - Methods and functions for using matrix data (Add, Trace, SymRankOne)
|
|
||||||
// - Types for constructing and using matrix factorizations (QR, LU)
|
|
||||||
//
|
|
||||||
// A matrix may be constructed through the corresponding New function. If no
|
|
||||||
// backing array is provided the matrix will be initialized to all zeros.
|
|
||||||
// // Allocate a zeroed matrix of size 3×5
|
|
||||||
// zero := mat64.NewDense(3, 5, nil)
|
|
||||||
// If a backing data slice is provided, the matrix will have those elements.
|
|
||||||
// Matrices are all stored in row-major format.
|
|
||||||
// // Generate a 6×6 matrix of random values.
|
|
||||||
// data := make([]float64, 36)
|
|
||||||
// for i := range data {
|
|
||||||
// data[i] = rand.NormFloat64()
|
|
||||||
// }
|
|
||||||
// a := mat64.NewDense(6, 6, data)
|
|
||||||
//
|
|
||||||
// Operations involving matrix data are implemented as functions when the values
|
|
||||||
// of the matrix remain unchanged
|
|
||||||
// tr := mat64.Trace(a)
|
|
||||||
// and are implemented as methods when the operation modifies the receiver.
|
|
||||||
// zero.Copy(a)
|
|
||||||
//
|
|
||||||
// Receivers must be the correct size for the matrix operations, otherwise the
|
|
||||||
// operation will panic. As a special case for convenience, a zero-sized matrix
|
|
||||||
// will be modified to have the correct size, allocating data if necessary.
|
|
||||||
// var c mat64.Dense // construct a new zero-sized matrix
|
|
||||||
// c.Mul(a, a) // c is automatically adjusted to be 6×6`,
|
|
||||||
|
|
||||||
BLAS: []string{"blas64"},
|
|
||||||
LAPACK: []string{"lapack64"},
|
|
||||||
|
|
||||||
template: `{{template "common" .}}
|
|
||||||
{{template "interfaces" .}}
|
|
||||||
{{template "factorization" .}}
|
|
||||||
{{template "blas" .}}
|
|
||||||
{{template "switching" .}}
|
|
||||||
{{template "invariants" .}}
|
|
||||||
{{template "aliasing" .}}
|
|
||||||
package {{.Name}}
|
|
||||||
`,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
path: "cmat128",
|
|
||||||
|
|
||||||
Name: "cmat128",
|
|
||||||
Provides: `implementations of complex128 matrix structures and
|
|
||||||
// linear algebra operations on them.`,
|
|
||||||
|
|
||||||
Overview: `// cmat128 provides:
|
|
||||||
// - Interfaces for a complex Matrix`,
|
|
||||||
|
|
||||||
BLAS: []string{"cblas128"},
|
|
||||||
LAPACK: []string{"clapack128"},
|
|
||||||
|
|
||||||
template: `{{template "common" . }}
|
|
||||||
{{template "blas" .}}
|
|
||||||
{{template "switching" .}}
|
|
||||||
{{template "invariants" .}}
|
|
||||||
{{template "aliasing" .}}
|
|
||||||
package {{.Name}}
|
|
||||||
`,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
var funcs = template.FuncMap{
|
|
||||||
"sentence": sentence,
|
|
||||||
"alts": alts,
|
|
||||||
"join": join,
|
|
||||||
}
|
|
||||||
|
|
||||||
// sentence converts a string to sentence case where the string is the prefix of the sentence.
|
|
||||||
func sentence(s string) string {
|
|
||||||
if len(s) == 0 {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
_, size := utf8.DecodeRune([]byte(s))
|
|
||||||
return strings.ToUpper(s[:size]) + s[size:]
|
|
||||||
}
|
|
||||||
|
|
||||||
// alts renders a []string as a glob alternatives list.
|
|
||||||
func alts(s []string) string {
|
|
||||||
switch len(s) {
|
|
||||||
case 0:
|
|
||||||
return ""
|
|
||||||
case 1:
|
|
||||||
return s[0]
|
|
||||||
default:
|
|
||||||
return fmt.Sprintf("{%s}", strings.Join(s, ","))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// join is strings.Join with the parameter order changed.
|
|
||||||
func join(sep string, s []string) string {
|
|
||||||
return strings.Join(s, sep)
|
|
||||||
}
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
for _, pkg := range pkgs {
|
|
||||||
t, err := template.Must(docs.Clone()).Parse(pkg.template)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("failed to parse template: %v", err)
|
|
||||||
}
|
|
||||||
file := filepath.Join(pkg.path, "doc.go")
|
|
||||||
f, err := os.Create(file)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("failed to create %q: %v", file, err)
|
|
||||||
}
|
|
||||||
err = t.Execute(f, pkg)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("failed to execute template: %v", err)
|
|
||||||
}
|
|
||||||
f.Close()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,7 +0,0 @@
|
||||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
//go:generate go run gendoc.go
|
|
||||||
|
|
||||||
package matrix
|
|
||||||
|
|
@ -1,145 +0,0 @@
|
||||||
// Copyright ©2014 The gonum Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
// This file must be kept in sync with index_no_bound_checks.go.
|
|
||||||
|
|
||||||
//+build bounds
|
|
||||||
|
|
||||||
package mat64
|
|
||||||
|
|
||||||
import "github.com/gonum/matrix"
|
|
||||||
|
|
||||||
// At returns the element at row i, column j.
|
|
||||||
func (m *Dense) At(i, j int) float64 {
|
|
||||||
return m.at(i, j)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *Dense) at(i, j int) float64 {
|
|
||||||
if uint(i) >= uint(m.mat.Rows) {
|
|
||||||
panic(matrix.ErrRowAccess)
|
|
||||||
}
|
|
||||||
if uint(j) >= uint(m.mat.Cols) {
|
|
||||||
panic(matrix.ErrColAccess)
|
|
||||||
}
|
|
||||||
return m.mat.Data[i*m.mat.Stride+j]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set sets the element at row i, column j to the value v.
|
|
||||||
func (m *Dense) Set(i, j int, v float64) {
|
|
||||||
m.set(i, j, v)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *Dense) set(i, j int, v float64) {
|
|
||||||
if uint(i) >= uint(m.mat.Rows) {
|
|
||||||
panic(matrix.ErrRowAccess)
|
|
||||||
}
|
|
||||||
if uint(j) >= uint(m.mat.Cols) {
|
|
||||||
panic(matrix.ErrColAccess)
|
|
||||||
}
|
|
||||||
m.mat.Data[i*m.mat.Stride+j] = v
|
|
||||||
}
|
|
||||||
|
|
||||||
// At returns the element at row i.
|
|
||||||
// It panics if i is out of bounds or if j is not zero.
|
|
||||||
func (v *Vector) At(i, j int) float64 {
|
|
||||||
if j != 0 {
|
|
||||||
panic(matrix.ErrColAccess)
|
|
||||||
}
|
|
||||||
return v.at(i)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (v *Vector) at(i int) float64 {
|
|
||||||
if uint(i) >= uint(v.n) {
|
|
||||||
panic(matrix.ErrRowAccess)
|
|
||||||
}
|
|
||||||
return v.mat.Data[i*v.mat.Inc]
|
|
||||||
}
|
|
||||||
|
|
||||||
// SetVec sets the element at row i to the value val.
|
|
||||||
// It panics if i is out of bounds.
|
|
||||||
func (v *Vector) SetVec(i int, val float64) {
|
|
||||||
v.setVec(i, val)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (v *Vector) setVec(i int, val float64) {
|
|
||||||
if uint(i) >= uint(v.n) {
|
|
||||||
panic(matrix.ErrVectorAccess)
|
|
||||||
}
|
|
||||||
v.mat.Data[i*v.mat.Inc] = val
|
|
||||||
}
|
|
||||||
|
|
||||||
// At returns the element at row i and column j.
|
|
||||||
func (t *SymDense) At(i, j int) float64 {
|
|
||||||
return t.at(i, j)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *SymDense) at(i, j int) float64 {
|
|
||||||
if uint(i) >= uint(t.mat.N) {
|
|
||||||
panic(matrix.ErrRowAccess)
|
|
||||||
}
|
|
||||||
if uint(j) >= uint(t.mat.N) {
|
|
||||||
panic(matrix.ErrColAccess)
|
|
||||||
}
|
|
||||||
if i > j {
|
|
||||||
i, j = j, i
|
|
||||||
}
|
|
||||||
return t.mat.Data[i*t.mat.Stride+j]
|
|
||||||
}
|
|
||||||
|
|
||||||
// SetSym sets the elements at (i,j) and (j,i) to the value v.
|
|
||||||
func (t *SymDense) SetSym(i, j int, v float64) {
|
|
||||||
t.set(i, j, v)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *SymDense) set(i, j int, v float64) {
|
|
||||||
if uint(i) >= uint(t.mat.N) {
|
|
||||||
panic(matrix.ErrRowAccess)
|
|
||||||
}
|
|
||||||
if uint(j) >= uint(t.mat.N) {
|
|
||||||
panic(matrix.ErrColAccess)
|
|
||||||
}
|
|
||||||
if i > j {
|
|
||||||
i, j = j, i
|
|
||||||
}
|
|
||||||
t.mat.Data[i*t.mat.Stride+j] = v
|
|
||||||
}
|
|
||||||
|
|
||||||
// At returns the element at row i, column j.
|
|
||||||
func (t *TriDense) At(i, j int) float64 {
|
|
||||||
return t.at(i, j)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *TriDense) at(i, j int) float64 {
|
|
||||||
if uint(i) >= uint(t.mat.N) {
|
|
||||||
panic(matrix.ErrRowAccess)
|
|
||||||
}
|
|
||||||
if uint(j) >= uint(t.mat.N) {
|
|
||||||
panic(matrix.ErrColAccess)
|
|
||||||
}
|
|
||||||
isUpper := t.isUpper()
|
|
||||||
if (isUpper && i > j) || (!isUpper && i < j) {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
return t.mat.Data[i*t.mat.Stride+j]
|
|
||||||
}
|
|
||||||
|
|
||||||
// SetTri sets the element of the triangular matrix at row i, column j to the value v.
|
|
||||||
// It panics if the location is outside the appropriate half of the matrix.
|
|
||||||
func (t *TriDense) SetTri(i, j int, v float64) {
|
|
||||||
t.set(i, j, v)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *TriDense) set(i, j int, v float64) {
|
|
||||||
if uint(i) >= uint(t.mat.N) {
|
|
||||||
panic(matrix.ErrRowAccess)
|
|
||||||
}
|
|
||||||
if uint(j) >= uint(t.mat.N) {
|
|
||||||
panic(matrix.ErrColAccess)
|
|
||||||
}
|
|
||||||
isUpper := t.isUpper()
|
|
||||||
if (isUpper && i > j) || (!isUpper && i < j) {
|
|
||||||
panic(matrix.ErrTriangleSet)
|
|
||||||
}
|
|
||||||
t.mat.Data[i*t.mat.Stride+j] = v
|
|
||||||
}
|
|
||||||
|
|
@ -1,145 +0,0 @@
|
||||||
// Copyright ©2014 The gonum Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
// This file must be kept in sync with index_bound_checks.go.
|
|
||||||
|
|
||||||
//+build !bounds
|
|
||||||
|
|
||||||
package mat64
|
|
||||||
|
|
||||||
import "github.com/gonum/matrix"
|
|
||||||
|
|
||||||
// At returns the element at row i, column j.
|
|
||||||
func (m *Dense) At(i, j int) float64 {
|
|
||||||
if uint(i) >= uint(m.mat.Rows) {
|
|
||||||
panic(matrix.ErrRowAccess)
|
|
||||||
}
|
|
||||||
if uint(j) >= uint(m.mat.Cols) {
|
|
||||||
panic(matrix.ErrColAccess)
|
|
||||||
}
|
|
||||||
return m.at(i, j)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *Dense) at(i, j int) float64 {
|
|
||||||
return m.mat.Data[i*m.mat.Stride+j]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set sets the element at row i, column j to the value v.
|
|
||||||
func (m *Dense) Set(i, j int, v float64) {
|
|
||||||
if uint(i) >= uint(m.mat.Rows) {
|
|
||||||
panic(matrix.ErrRowAccess)
|
|
||||||
}
|
|
||||||
if uint(j) >= uint(m.mat.Cols) {
|
|
||||||
panic(matrix.ErrColAccess)
|
|
||||||
}
|
|
||||||
m.set(i, j, v)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *Dense) set(i, j int, v float64) {
|
|
||||||
m.mat.Data[i*m.mat.Stride+j] = v
|
|
||||||
}
|
|
||||||
|
|
||||||
// At returns the element at row i.
|
|
||||||
// It panics if i is out of bounds or if j is not zero.
|
|
||||||
func (v *Vector) At(i, j int) float64 {
|
|
||||||
if uint(i) >= uint(v.n) {
|
|
||||||
panic(matrix.ErrRowAccess)
|
|
||||||
}
|
|
||||||
if j != 0 {
|
|
||||||
panic(matrix.ErrColAccess)
|
|
||||||
}
|
|
||||||
return v.at(i)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (v *Vector) at(i int) float64 {
|
|
||||||
return v.mat.Data[i*v.mat.Inc]
|
|
||||||
}
|
|
||||||
|
|
||||||
// SetVec sets the element at row i to the value val.
|
|
||||||
// It panics if i is out of bounds.
|
|
||||||
func (v *Vector) SetVec(i int, val float64) {
|
|
||||||
if uint(i) >= uint(v.n) {
|
|
||||||
panic(matrix.ErrVectorAccess)
|
|
||||||
}
|
|
||||||
v.setVec(i, val)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (v *Vector) setVec(i int, val float64) {
|
|
||||||
v.mat.Data[i*v.mat.Inc] = val
|
|
||||||
}
|
|
||||||
|
|
||||||
// At returns the element at row i and column j.
|
|
||||||
func (s *SymDense) At(i, j int) float64 {
|
|
||||||
if uint(i) >= uint(s.mat.N) {
|
|
||||||
panic(matrix.ErrRowAccess)
|
|
||||||
}
|
|
||||||
if uint(j) >= uint(s.mat.N) {
|
|
||||||
panic(matrix.ErrColAccess)
|
|
||||||
}
|
|
||||||
return s.at(i, j)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *SymDense) at(i, j int) float64 {
|
|
||||||
if i > j {
|
|
||||||
i, j = j, i
|
|
||||||
}
|
|
||||||
return s.mat.Data[i*s.mat.Stride+j]
|
|
||||||
}
|
|
||||||
|
|
||||||
// SetSym sets the elements at (i,j) and (j,i) to the value v.
|
|
||||||
func (s *SymDense) SetSym(i, j int, v float64) {
|
|
||||||
if uint(i) >= uint(s.mat.N) {
|
|
||||||
panic(matrix.ErrRowAccess)
|
|
||||||
}
|
|
||||||
if uint(j) >= uint(s.mat.N) {
|
|
||||||
panic(matrix.ErrColAccess)
|
|
||||||
}
|
|
||||||
s.set(i, j, v)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *SymDense) set(i, j int, v float64) {
|
|
||||||
if i > j {
|
|
||||||
i, j = j, i
|
|
||||||
}
|
|
||||||
s.mat.Data[i*s.mat.Stride+j] = v
|
|
||||||
}
|
|
||||||
|
|
||||||
// At returns the element at row i, column j.
|
|
||||||
func (t *TriDense) At(i, j int) float64 {
|
|
||||||
if uint(i) >= uint(t.mat.N) {
|
|
||||||
panic(matrix.ErrRowAccess)
|
|
||||||
}
|
|
||||||
if uint(j) >= uint(t.mat.N) {
|
|
||||||
panic(matrix.ErrColAccess)
|
|
||||||
}
|
|
||||||
return t.at(i, j)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *TriDense) at(i, j int) float64 {
|
|
||||||
isUpper := t.triKind()
|
|
||||||
if (isUpper && i > j) || (!isUpper && i < j) {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
return t.mat.Data[i*t.mat.Stride+j]
|
|
||||||
}
|
|
||||||
|
|
||||||
// SetTri sets the element at row i, column j to the value v.
|
|
||||||
// It panics if the location is outside the appropriate half of the matrix.
|
|
||||||
func (t *TriDense) SetTri(i, j int, v float64) {
|
|
||||||
if uint(i) >= uint(t.mat.N) {
|
|
||||||
panic(matrix.ErrRowAccess)
|
|
||||||
}
|
|
||||||
if uint(j) >= uint(t.mat.N) {
|
|
||||||
panic(matrix.ErrColAccess)
|
|
||||||
}
|
|
||||||
isUpper := t.isUpper()
|
|
||||||
if (isUpper && i > j) || (!isUpper && i < j) {
|
|
||||||
panic(matrix.ErrTriangleSet)
|
|
||||||
}
|
|
||||||
t.set(i, j, v)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *TriDense) set(i, j int, v float64) {
|
|
||||||
t.mat.Data[i*t.mat.Stride+j] = v
|
|
||||||
}
|
|
||||||
|
|
@ -1,103 +0,0 @@
|
||||||
// Copyright ©2014 The gonum Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package mat64
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/gonum/blas"
|
|
||||||
"github.com/gonum/internal/asm/f64"
|
|
||||||
"github.com/gonum/matrix"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Inner computes the generalized inner product
|
|
||||||
// x^T A y
|
|
||||||
// between vectors x and y with matrix A. This is only a true inner product if
|
|
||||||
// A is symmetric positive definite, though the operation works for any matrix A.
|
|
||||||
//
|
|
||||||
// Inner panics if x.Len != m or y.Len != n when A is an m x n matrix.
|
|
||||||
func Inner(x *Vector, A Matrix, y *Vector) float64 {
|
|
||||||
m, n := A.Dims()
|
|
||||||
if x.Len() != m {
|
|
||||||
panic(matrix.ErrShape)
|
|
||||||
}
|
|
||||||
if y.Len() != n {
|
|
||||||
panic(matrix.ErrShape)
|
|
||||||
}
|
|
||||||
if m == 0 || n == 0 {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
var sum float64
|
|
||||||
|
|
||||||
switch b := A.(type) {
|
|
||||||
case RawSymmetricer:
|
|
||||||
bmat := b.RawSymmetric()
|
|
||||||
if bmat.Uplo != blas.Upper {
|
|
||||||
// Panic as a string not a mat64.Error.
|
|
||||||
panic(badSymTriangle)
|
|
||||||
}
|
|
||||||
for i := 0; i < x.Len(); i++ {
|
|
||||||
xi := x.at(i)
|
|
||||||
if xi != 0 {
|
|
||||||
if y.mat.Inc == 1 {
|
|
||||||
sum += xi * f64.DotUnitary(
|
|
||||||
bmat.Data[i*bmat.Stride+i:i*bmat.Stride+n],
|
|
||||||
y.mat.Data[i:],
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
sum += xi * f64.DotInc(
|
|
||||||
bmat.Data[i*bmat.Stride+i:i*bmat.Stride+n],
|
|
||||||
y.mat.Data[i*y.mat.Inc:], uintptr(n-i),
|
|
||||||
1, uintptr(y.mat.Inc),
|
|
||||||
0, 0,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
yi := y.at(i)
|
|
||||||
if i != n-1 && yi != 0 {
|
|
||||||
if x.mat.Inc == 1 {
|
|
||||||
sum += yi * f64.DotUnitary(
|
|
||||||
bmat.Data[i*bmat.Stride+i+1:i*bmat.Stride+n],
|
|
||||||
x.mat.Data[i+1:],
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
sum += yi * f64.DotInc(
|
|
||||||
bmat.Data[i*bmat.Stride+i+1:i*bmat.Stride+n],
|
|
||||||
x.mat.Data[(i+1)*x.mat.Inc:], uintptr(n-i-1),
|
|
||||||
1, uintptr(x.mat.Inc),
|
|
||||||
0, 0,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case RawMatrixer:
|
|
||||||
bmat := b.RawMatrix()
|
|
||||||
for i := 0; i < x.Len(); i++ {
|
|
||||||
xi := x.at(i)
|
|
||||||
if xi != 0 {
|
|
||||||
if y.mat.Inc == 1 {
|
|
||||||
sum += xi * f64.DotUnitary(
|
|
||||||
bmat.Data[i*bmat.Stride:i*bmat.Stride+n],
|
|
||||||
y.mat.Data,
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
sum += xi * f64.DotInc(
|
|
||||||
bmat.Data[i*bmat.Stride:i*bmat.Stride+n],
|
|
||||||
y.mat.Data, uintptr(n),
|
|
||||||
1, uintptr(y.mat.Inc),
|
|
||||||
0, 0,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
for i := 0; i < x.Len(); i++ {
|
|
||||||
xi := x.at(i)
|
|
||||||
for j := 0; j < y.Len(); j++ {
|
|
||||||
sum += xi * A.At(i, j) * y.at(j)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return sum
|
|
||||||
}
|
|
||||||
|
|
@ -1,217 +0,0 @@
|
||||||
// Copyright ©2013 The gonum Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package mat64
|
|
||||||
|
|
||||||
import (
|
|
||||||
"math"
|
|
||||||
|
|
||||||
"github.com/gonum/blas"
|
|
||||||
"github.com/gonum/blas/blas64"
|
|
||||||
"github.com/gonum/lapack/lapack64"
|
|
||||||
"github.com/gonum/matrix"
|
|
||||||
)
|
|
||||||
|
|
||||||
// LQ is a type for creating and using the LQ factorization of a matrix.
|
|
||||||
type LQ struct {
|
|
||||||
lq *Dense
|
|
||||||
tau []float64
|
|
||||||
cond float64
|
|
||||||
}
|
|
||||||
|
|
||||||
func (lq *LQ) updateCond() {
|
|
||||||
// A = LQ, where Q is orthonormal. Orthonormal multiplications do not change
|
|
||||||
// the condition number. Thus, ||A|| = ||L|| ||Q|| = ||Q||.
|
|
||||||
m := lq.lq.mat.Rows
|
|
||||||
work := make([]float64, 3*m)
|
|
||||||
iwork := make([]int, m)
|
|
||||||
l := lq.lq.asTriDense(m, blas.NonUnit, blas.Lower)
|
|
||||||
v := lapack64.Trcon(matrix.CondNorm, l.mat, work, iwork)
|
|
||||||
lq.cond = 1 / v
|
|
||||||
}
|
|
||||||
|
|
||||||
// Factorize computes the LQ factorization of an m×n matrix a where n <= m. The LQ
|
|
||||||
// factorization always exists even if A is singular.
|
|
||||||
//
|
|
||||||
// The LQ decomposition is a factorization of the matrix A such that A = L * Q.
|
|
||||||
// The matrix Q is an orthonormal n×n matrix, and L is an m×n upper triangular matrix.
|
|
||||||
// L and Q can be extracted from the LFromLQ and QFromLQ methods on Dense.
|
|
||||||
func (lq *LQ) Factorize(a Matrix) {
|
|
||||||
m, n := a.Dims()
|
|
||||||
if m > n {
|
|
||||||
panic(matrix.ErrShape)
|
|
||||||
}
|
|
||||||
k := min(m, n)
|
|
||||||
if lq.lq == nil {
|
|
||||||
lq.lq = &Dense{}
|
|
||||||
}
|
|
||||||
lq.lq.Clone(a)
|
|
||||||
work := make([]float64, 1)
|
|
||||||
lq.tau = make([]float64, k)
|
|
||||||
lapack64.Gelqf(lq.lq.mat, lq.tau, work, -1)
|
|
||||||
work = make([]float64, int(work[0]))
|
|
||||||
lapack64.Gelqf(lq.lq.mat, lq.tau, work, len(work))
|
|
||||||
lq.updateCond()
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO(btracey): Add in the "Reduced" forms for extracting the m×m orthogonal
|
|
||||||
// and upper triangular matrices.
|
|
||||||
|
|
||||||
// LFromLQ extracts the m×n lower trapezoidal matrix from a LQ decomposition.
|
|
||||||
func (m *Dense) LFromLQ(lq *LQ) {
|
|
||||||
r, c := lq.lq.Dims()
|
|
||||||
m.reuseAs(r, c)
|
|
||||||
|
|
||||||
// Disguise the LQ as a lower triangular
|
|
||||||
t := &TriDense{
|
|
||||||
mat: blas64.Triangular{
|
|
||||||
N: r,
|
|
||||||
Stride: lq.lq.mat.Stride,
|
|
||||||
Data: lq.lq.mat.Data,
|
|
||||||
Uplo: blas.Lower,
|
|
||||||
Diag: blas.NonUnit,
|
|
||||||
},
|
|
||||||
cap: lq.lq.capCols,
|
|
||||||
}
|
|
||||||
m.Copy(t)
|
|
||||||
|
|
||||||
if r == c {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
// Zero right of the triangular.
|
|
||||||
for i := 0; i < r; i++ {
|
|
||||||
zero(m.mat.Data[i*m.mat.Stride+r : i*m.mat.Stride+c])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// QFromLQ extracts the n×n orthonormal matrix Q from an LQ decomposition.
|
|
||||||
func (m *Dense) QFromLQ(lq *LQ) {
|
|
||||||
r, c := lq.lq.Dims()
|
|
||||||
m.reuseAs(c, c)
|
|
||||||
|
|
||||||
// Set Q = I.
|
|
||||||
for i := 0; i < c; i++ {
|
|
||||||
v := m.mat.Data[i*m.mat.Stride : i*m.mat.Stride+c]
|
|
||||||
zero(v)
|
|
||||||
v[i] = 1
|
|
||||||
}
|
|
||||||
|
|
||||||
// Construct Q from the elementary reflectors.
|
|
||||||
h := blas64.General{
|
|
||||||
Rows: c,
|
|
||||||
Cols: c,
|
|
||||||
Stride: c,
|
|
||||||
Data: make([]float64, c*c),
|
|
||||||
}
|
|
||||||
qCopy := getWorkspace(c, c, false)
|
|
||||||
v := blas64.Vector{
|
|
||||||
Inc: 1,
|
|
||||||
Data: make([]float64, c),
|
|
||||||
}
|
|
||||||
for i := 0; i < r; i++ {
|
|
||||||
// Set h = I.
|
|
||||||
zero(h.Data)
|
|
||||||
for j := 0; j < len(h.Data); j += c + 1 {
|
|
||||||
h.Data[j] = 1
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set the vector data as the elementary reflector.
|
|
||||||
for j := 0; j < i; j++ {
|
|
||||||
v.Data[j] = 0
|
|
||||||
}
|
|
||||||
v.Data[i] = 1
|
|
||||||
for j := i + 1; j < c; j++ {
|
|
||||||
v.Data[j] = lq.lq.mat.Data[i*lq.lq.mat.Stride+j]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compute the multiplication matrix.
|
|
||||||
blas64.Ger(-lq.tau[i], v, v, h)
|
|
||||||
qCopy.Copy(m)
|
|
||||||
blas64.Gemm(blas.NoTrans, blas.NoTrans,
|
|
||||||
1, h, qCopy.mat,
|
|
||||||
0, m.mat)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// SolveLQ finds a minimum-norm solution to a system of linear equations defined
|
|
||||||
// by the matrices A and b, where A is an m×n matrix represented in its LQ factorized
|
|
||||||
// form. If A is singular or near-singular a Condition error is returned. Please
|
|
||||||
// see the documentation for Condition for more information.
|
|
||||||
//
|
|
||||||
// The minimization problem solved depends on the input parameters.
|
|
||||||
// If trans == false, find the minimum norm solution of A * X = b.
|
|
||||||
// If trans == true, find X such that ||A*X - b||_2 is minimized.
|
|
||||||
// The solution matrix, X, is stored in place into the receiver.
|
|
||||||
func (m *Dense) SolveLQ(lq *LQ, trans bool, b Matrix) error {
|
|
||||||
r, c := lq.lq.Dims()
|
|
||||||
br, bc := b.Dims()
|
|
||||||
|
|
||||||
// The LQ solve algorithm stores the result in-place into the right hand side.
|
|
||||||
// The storage for the answer must be large enough to hold both b and x.
|
|
||||||
// However, this method's receiver must be the size of x. Copy b, and then
|
|
||||||
// copy the result into m at the end.
|
|
||||||
if trans {
|
|
||||||
if c != br {
|
|
||||||
panic(matrix.ErrShape)
|
|
||||||
}
|
|
||||||
m.reuseAs(r, bc)
|
|
||||||
} else {
|
|
||||||
if r != br {
|
|
||||||
panic(matrix.ErrShape)
|
|
||||||
}
|
|
||||||
m.reuseAs(c, bc)
|
|
||||||
}
|
|
||||||
// Do not need to worry about overlap between m and b because x has its own
|
|
||||||
// independent storage.
|
|
||||||
x := getWorkspace(max(r, c), bc, false)
|
|
||||||
x.Copy(b)
|
|
||||||
t := lq.lq.asTriDense(lq.lq.mat.Rows, blas.NonUnit, blas.Lower).mat
|
|
||||||
if trans {
|
|
||||||
work := make([]float64, 1)
|
|
||||||
lapack64.Ormlq(blas.Left, blas.NoTrans, lq.lq.mat, lq.tau, x.mat, work, -1)
|
|
||||||
work = make([]float64, int(work[0]))
|
|
||||||
lapack64.Ormlq(blas.Left, blas.NoTrans, lq.lq.mat, lq.tau, x.mat, work, len(work))
|
|
||||||
|
|
||||||
ok := lapack64.Trtrs(blas.Trans, t, x.mat)
|
|
||||||
if !ok {
|
|
||||||
return matrix.Condition(math.Inf(1))
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
ok := lapack64.Trtrs(blas.NoTrans, t, x.mat)
|
|
||||||
if !ok {
|
|
||||||
return matrix.Condition(math.Inf(1))
|
|
||||||
}
|
|
||||||
for i := r; i < c; i++ {
|
|
||||||
zero(x.mat.Data[i*x.mat.Stride : i*x.mat.Stride+bc])
|
|
||||||
}
|
|
||||||
work := make([]float64, 1)
|
|
||||||
lapack64.Ormlq(blas.Left, blas.Trans, lq.lq.mat, lq.tau, x.mat, work, -1)
|
|
||||||
work = make([]float64, int(work[0]))
|
|
||||||
lapack64.Ormlq(blas.Left, blas.Trans, lq.lq.mat, lq.tau, x.mat, work, len(work))
|
|
||||||
}
|
|
||||||
// M was set above to be the correct size for the result.
|
|
||||||
m.Copy(x)
|
|
||||||
putWorkspace(x)
|
|
||||||
if lq.cond > matrix.ConditionTolerance {
|
|
||||||
return matrix.Condition(lq.cond)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// SolveLQVec finds a minimum-norm solution to a system of linear equations.
|
|
||||||
// Please see Dense.SolveLQ for the full documentation.
|
|
||||||
func (v *Vector) SolveLQVec(lq *LQ, trans bool, b *Vector) error {
|
|
||||||
if v != b {
|
|
||||||
v.checkOverlap(b.mat)
|
|
||||||
}
|
|
||||||
r, c := lq.lq.Dims()
|
|
||||||
// The Solve implementation is non-trivial, so rather than duplicate the code,
|
|
||||||
// instead recast the Vectors as Dense and call the matrix code.
|
|
||||||
if trans {
|
|
||||||
v.reuseAs(r)
|
|
||||||
} else {
|
|
||||||
v.reuseAs(c)
|
|
||||||
}
|
|
||||||
return v.asDense().SolveLQ(lq, trans, b.asDense())
|
|
||||||
}
|
|
||||||
|
|
@ -1,183 +0,0 @@
|
||||||
// Copyright ©2013 The gonum Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package mat64
|
|
||||||
|
|
||||||
import (
|
|
||||||
"math"
|
|
||||||
|
|
||||||
"github.com/gonum/blas"
|
|
||||||
"github.com/gonum/blas/blas64"
|
|
||||||
"github.com/gonum/lapack/lapack64"
|
|
||||||
"github.com/gonum/matrix"
|
|
||||||
)
|
|
||||||
|
|
||||||
// QR is a type for creating and using the QR factorization of a matrix.
|
|
||||||
type QR struct {
|
|
||||||
qr *Dense
|
|
||||||
tau []float64
|
|
||||||
cond float64
|
|
||||||
}
|
|
||||||
|
|
||||||
func (qr *QR) updateCond() {
|
|
||||||
// A = QR, where Q is orthonormal. Orthonormal multiplications do not change
|
|
||||||
// the condition number. Thus, ||A|| = ||Q|| ||R|| = ||R||.
|
|
||||||
n := qr.qr.mat.Cols
|
|
||||||
work := make([]float64, 3*n)
|
|
||||||
iwork := make([]int, n)
|
|
||||||
r := qr.qr.asTriDense(n, blas.NonUnit, blas.Upper)
|
|
||||||
v := lapack64.Trcon(matrix.CondNorm, r.mat, work, iwork)
|
|
||||||
qr.cond = 1 / v
|
|
||||||
}
|
|
||||||
|
|
||||||
// Factorize computes the QR factorization of an m×n matrix a where m >= n. The QR
|
|
||||||
// factorization always exists even if A is singular.
|
|
||||||
//
|
|
||||||
// The QR decomposition is a factorization of the matrix A such that A = Q * R.
|
|
||||||
// The matrix Q is an orthonormal m×m matrix, and R is an m×n upper triangular matrix.
|
|
||||||
// Q and R can be extracted from the QFromQR and RFromQR methods on Dense.
|
|
||||||
func (qr *QR) Factorize(a Matrix) {
|
|
||||||
m, n := a.Dims()
|
|
||||||
if m < n {
|
|
||||||
panic(matrix.ErrShape)
|
|
||||||
}
|
|
||||||
k := min(m, n)
|
|
||||||
if qr.qr == nil {
|
|
||||||
qr.qr = &Dense{}
|
|
||||||
}
|
|
||||||
qr.qr.Clone(a)
|
|
||||||
work := make([]float64, 1)
|
|
||||||
qr.tau = make([]float64, k)
|
|
||||||
lapack64.Geqrf(qr.qr.mat, qr.tau, work, -1)
|
|
||||||
|
|
||||||
work = make([]float64, int(work[0]))
|
|
||||||
lapack64.Geqrf(qr.qr.mat, qr.tau, work, len(work))
|
|
||||||
qr.updateCond()
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO(btracey): Add in the "Reduced" forms for extracting the n×n orthogonal
|
|
||||||
// and upper triangular matrices.
|
|
||||||
|
|
||||||
// RFromQR extracts the m×n upper trapezoidal matrix from a QR decomposition.
|
|
||||||
func (m *Dense) RFromQR(qr *QR) {
|
|
||||||
r, c := qr.qr.Dims()
|
|
||||||
m.reuseAs(r, c)
|
|
||||||
|
|
||||||
// Disguise the QR as an upper triangular
|
|
||||||
t := &TriDense{
|
|
||||||
mat: blas64.Triangular{
|
|
||||||
N: c,
|
|
||||||
Stride: qr.qr.mat.Stride,
|
|
||||||
Data: qr.qr.mat.Data,
|
|
||||||
Uplo: blas.Upper,
|
|
||||||
Diag: blas.NonUnit,
|
|
||||||
},
|
|
||||||
cap: qr.qr.capCols,
|
|
||||||
}
|
|
||||||
m.Copy(t)
|
|
||||||
|
|
||||||
// Zero below the triangular.
|
|
||||||
for i := r; i < c; i++ {
|
|
||||||
zero(m.mat.Data[i*m.mat.Stride : i*m.mat.Stride+c])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// QFromQR extracts the m×m orthonormal matrix Q from a QR decomposition.
|
|
||||||
func (m *Dense) QFromQR(qr *QR) {
|
|
||||||
r, _ := qr.qr.Dims()
|
|
||||||
m.reuseAsZeroed(r, r)
|
|
||||||
|
|
||||||
// Set Q = I.
|
|
||||||
for i := 0; i < r*r; i += r + 1 {
|
|
||||||
m.mat.Data[i] = 1
|
|
||||||
}
|
|
||||||
|
|
||||||
// Construct Q from the elementary reflectors.
|
|
||||||
work := make([]float64, 1)
|
|
||||||
lapack64.Ormqr(blas.Left, blas.NoTrans, qr.qr.mat, qr.tau, m.mat, work, -1)
|
|
||||||
work = make([]float64, int(work[0]))
|
|
||||||
lapack64.Ormqr(blas.Left, blas.NoTrans, qr.qr.mat, qr.tau, m.mat, work, len(work))
|
|
||||||
}
|
|
||||||
|
|
||||||
// SolveQR finds a minimum-norm solution to a system of linear equations defined
|
|
||||||
// by the matrices A and b, where A is an m×n matrix represented in its QR factorized
|
|
||||||
// form. If A is singular or near-singular a Condition error is returned. Please
|
|
||||||
// see the documentation for Condition for more information.
|
|
||||||
//
|
|
||||||
// The minimization problem solved depends on the input parameters.
|
|
||||||
// If trans == false, find X such that ||A*X - b||_2 is minimized.
|
|
||||||
// If trans == true, find the minimum norm solution of A^T * X = b.
|
|
||||||
// The solution matrix, X, is stored in place into the receiver.
|
|
||||||
func (m *Dense) SolveQR(qr *QR, trans bool, b Matrix) error {
|
|
||||||
r, c := qr.qr.Dims()
|
|
||||||
br, bc := b.Dims()
|
|
||||||
|
|
||||||
// The QR solve algorithm stores the result in-place into the right hand side.
|
|
||||||
// The storage for the answer must be large enough to hold both b and x.
|
|
||||||
// However, this method's receiver must be the size of x. Copy b, and then
|
|
||||||
// copy the result into m at the end.
|
|
||||||
if trans {
|
|
||||||
if c != br {
|
|
||||||
panic(matrix.ErrShape)
|
|
||||||
}
|
|
||||||
m.reuseAs(r, bc)
|
|
||||||
} else {
|
|
||||||
if r != br {
|
|
||||||
panic(matrix.ErrShape)
|
|
||||||
}
|
|
||||||
m.reuseAs(c, bc)
|
|
||||||
}
|
|
||||||
// Do not need to worry about overlap between m and b because x has its own
|
|
||||||
// independent storage.
|
|
||||||
x := getWorkspace(max(r, c), bc, false)
|
|
||||||
x.Copy(b)
|
|
||||||
t := qr.qr.asTriDense(qr.qr.mat.Cols, blas.NonUnit, blas.Upper).mat
|
|
||||||
if trans {
|
|
||||||
ok := lapack64.Trtrs(blas.Trans, t, x.mat)
|
|
||||||
if !ok {
|
|
||||||
return matrix.Condition(math.Inf(1))
|
|
||||||
}
|
|
||||||
for i := c; i < r; i++ {
|
|
||||||
zero(x.mat.Data[i*x.mat.Stride : i*x.mat.Stride+bc])
|
|
||||||
}
|
|
||||||
work := make([]float64, 1)
|
|
||||||
lapack64.Ormqr(blas.Left, blas.NoTrans, qr.qr.mat, qr.tau, x.mat, work, -1)
|
|
||||||
work = make([]float64, int(work[0]))
|
|
||||||
lapack64.Ormqr(blas.Left, blas.NoTrans, qr.qr.mat, qr.tau, x.mat, work, len(work))
|
|
||||||
} else {
|
|
||||||
work := make([]float64, 1)
|
|
||||||
lapack64.Ormqr(blas.Left, blas.Trans, qr.qr.mat, qr.tau, x.mat, work, -1)
|
|
||||||
work = make([]float64, int(work[0]))
|
|
||||||
lapack64.Ormqr(blas.Left, blas.Trans, qr.qr.mat, qr.tau, x.mat, work, len(work))
|
|
||||||
|
|
||||||
ok := lapack64.Trtrs(blas.NoTrans, t, x.mat)
|
|
||||||
if !ok {
|
|
||||||
return matrix.Condition(math.Inf(1))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// M was set above to be the correct size for the result.
|
|
||||||
m.Copy(x)
|
|
||||||
putWorkspace(x)
|
|
||||||
if qr.cond > matrix.ConditionTolerance {
|
|
||||||
return matrix.Condition(qr.cond)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// SolveQRVec finds a minimum-norm solution to a system of linear equations.
|
|
||||||
// Please see Dense.SolveQR for the full documentation.
|
|
||||||
func (v *Vector) SolveQRVec(qr *QR, trans bool, b *Vector) error {
|
|
||||||
if v != b {
|
|
||||||
v.checkOverlap(b.mat)
|
|
||||||
}
|
|
||||||
r, c := qr.qr.Dims()
|
|
||||||
// The Solve implementation is non-trivial, so rather than duplicate the code,
|
|
||||||
// instead recast the Vectors as Dense and call the matrix code.
|
|
||||||
if trans {
|
|
||||||
v.reuseAs(r)
|
|
||||||
} else {
|
|
||||||
v.reuseAs(c)
|
|
||||||
}
|
|
||||||
return v.asDense().SolveQR(qr, trans, b.asDense())
|
|
||||||
}
|
|
||||||
|
|
@ -1,281 +0,0 @@
|
||||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package mat64
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/gonum/blas"
|
|
||||||
"github.com/gonum/blas/blas64"
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
// regionOverlap is the panic string used for the general case
|
|
||||||
// of a matrix region overlap between a source and destination.
|
|
||||||
regionOverlap = "mat64: bad region: overlap"
|
|
||||||
|
|
||||||
// regionIdentity is the panic string used for the specific
|
|
||||||
// case of complete agreement between a source and a destination.
|
|
||||||
regionIdentity = "mat64: bad region: identical"
|
|
||||||
|
|
||||||
// mismatchedStrides is the panic string used for overlapping
|
|
||||||
// data slices with differing strides.
|
|
||||||
mismatchedStrides = "mat64: bad region: different strides"
|
|
||||||
)
|
|
||||||
|
|
||||||
// checkOverlap returns false if the receiver does not overlap data elements
|
|
||||||
// referenced by the parameter and panics otherwise.
|
|
||||||
//
|
|
||||||
// checkOverlap methods return a boolean to allow the check call to be added to a
|
|
||||||
// boolean expression, making use of short-circuit operators.
|
|
||||||
|
|
||||||
func (m *Dense) checkOverlap(a blas64.General) bool {
|
|
||||||
mat := m.RawMatrix()
|
|
||||||
if cap(mat.Data) == 0 || cap(a.Data) == 0 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
off := offset(mat.Data[:1], a.Data[:1])
|
|
||||||
|
|
||||||
if off == 0 {
|
|
||||||
// At least one element overlaps.
|
|
||||||
if mat.Cols == a.Cols && mat.Rows == a.Rows && mat.Stride == a.Stride {
|
|
||||||
panic(regionIdentity)
|
|
||||||
}
|
|
||||||
panic(regionOverlap)
|
|
||||||
}
|
|
||||||
|
|
||||||
if off > 0 && len(mat.Data) <= off {
|
|
||||||
// We know m is completely before a.
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if off < 0 && len(a.Data) <= -off {
|
|
||||||
// We know m is completely after a.
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
if mat.Stride != a.Stride {
|
|
||||||
// Too hard, so assume the worst.
|
|
||||||
panic(mismatchedStrides)
|
|
||||||
}
|
|
||||||
|
|
||||||
if off < 0 {
|
|
||||||
off = -off
|
|
||||||
mat.Cols, a.Cols = a.Cols, mat.Cols
|
|
||||||
}
|
|
||||||
if rectanglesOverlap(off, mat.Cols, a.Cols, mat.Stride) {
|
|
||||||
panic(regionOverlap)
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *SymDense) checkOverlap(a blas64.Symmetric) bool {
|
|
||||||
mat := s.RawSymmetric()
|
|
||||||
if cap(mat.Data) == 0 || cap(a.Data) == 0 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
off := offset(mat.Data[:1], a.Data[:1])
|
|
||||||
|
|
||||||
if off == 0 {
|
|
||||||
// At least one element overlaps.
|
|
||||||
if mat.N == a.N && mat.Stride == a.Stride {
|
|
||||||
panic(regionIdentity)
|
|
||||||
}
|
|
||||||
panic(regionOverlap)
|
|
||||||
}
|
|
||||||
|
|
||||||
if off > 0 && len(mat.Data) <= off {
|
|
||||||
// We know s is completely before a.
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if off < 0 && len(a.Data) <= -off {
|
|
||||||
// We know s is completely after a.
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
if mat.Stride != a.Stride {
|
|
||||||
// Too hard, so assume the worst.
|
|
||||||
panic(mismatchedStrides)
|
|
||||||
}
|
|
||||||
|
|
||||||
if off < 0 {
|
|
||||||
off = -off
|
|
||||||
mat.N, a.N = a.N, mat.N
|
|
||||||
// If we created the matrix it will always
|
|
||||||
// be in the upper triangle, but don't trust
|
|
||||||
// that this is the case.
|
|
||||||
mat.Uplo, a.Uplo = a.Uplo, mat.Uplo
|
|
||||||
}
|
|
||||||
if trianglesOverlap(off, mat.N, a.N, mat.Stride, mat.Uplo == blas.Upper, a.Uplo == blas.Upper) {
|
|
||||||
panic(regionOverlap)
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *TriDense) checkOverlap(a blas64.Triangular) bool {
|
|
||||||
mat := t.RawTriangular()
|
|
||||||
if cap(mat.Data) == 0 || cap(a.Data) == 0 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
off := offset(mat.Data[:1], a.Data[:1])
|
|
||||||
|
|
||||||
if off == 0 {
|
|
||||||
// At least one element overlaps.
|
|
||||||
if mat.N == a.N && mat.Stride == a.Stride {
|
|
||||||
panic(regionIdentity)
|
|
||||||
}
|
|
||||||
panic(regionOverlap)
|
|
||||||
}
|
|
||||||
|
|
||||||
if off > 0 && len(mat.Data) <= off {
|
|
||||||
// We know t is completely before a.
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if off < 0 && len(a.Data) <= -off {
|
|
||||||
// We know t is completely after a.
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
if mat.Stride != a.Stride {
|
|
||||||
// Too hard, so assume the worst.
|
|
||||||
panic(mismatchedStrides)
|
|
||||||
}
|
|
||||||
|
|
||||||
if off < 0 {
|
|
||||||
off = -off
|
|
||||||
mat.N, a.N = a.N, mat.N
|
|
||||||
mat.Uplo, a.Uplo = a.Uplo, mat.Uplo
|
|
||||||
}
|
|
||||||
if trianglesOverlap(off, mat.N, a.N, mat.Stride, mat.Uplo == blas.Upper, a.Uplo == blas.Upper) {
|
|
||||||
panic(regionOverlap)
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func (v *Vector) checkOverlap(a blas64.Vector) bool {
|
|
||||||
mat := v.mat
|
|
||||||
if cap(mat.Data) == 0 || cap(a.Data) == 0 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
off := offset(mat.Data[:1], a.Data[:1])
|
|
||||||
|
|
||||||
if off == 0 {
|
|
||||||
// At least one element overlaps.
|
|
||||||
if mat.Inc == a.Inc && len(mat.Data) == len(a.Data) {
|
|
||||||
panic(regionIdentity)
|
|
||||||
}
|
|
||||||
panic(regionOverlap)
|
|
||||||
}
|
|
||||||
|
|
||||||
if off > 0 && len(mat.Data) <= off {
|
|
||||||
// We know v is completely before a.
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if off < 0 && len(a.Data) <= -off {
|
|
||||||
// We know v is completely after a.
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
if mat.Inc != a.Inc {
|
|
||||||
// Too hard, so assume the worst.
|
|
||||||
panic(mismatchedStrides)
|
|
||||||
}
|
|
||||||
|
|
||||||
if mat.Inc == 1 || off&mat.Inc == 0 {
|
|
||||||
panic(regionOverlap)
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// rectanglesOverlap returns whether the strided rectangles a and b overlap
|
|
||||||
// when b is offset by off elements after a but has at least one element before
|
|
||||||
// the end of a. off must be positive. a and b have aCols and bCols respectively.
|
|
||||||
//
|
|
||||||
// rectanglesOverlap works by shifting both matrices left such that the left
|
|
||||||
// column of a is at 0. The column indexes are flattened by obtaining the shifted
|
|
||||||
// relative left and right column positions modulo the common stride. This allows
|
|
||||||
// direct comparison of the column offsets when the matrix backing data slices
|
|
||||||
// are known to overlap.
|
|
||||||
func rectanglesOverlap(off, aCols, bCols, stride int) bool {
|
|
||||||
if stride == 1 {
|
|
||||||
// Unit stride means overlapping data
|
|
||||||
// slices must overlap as matrices.
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// Flatten the shifted matrix column positions
|
|
||||||
// so a starts at 0, modulo the common stride.
|
|
||||||
const aFrom = 0
|
|
||||||
aTo := aCols
|
|
||||||
// The mod stride operations here make the from
|
|
||||||
// and to indexes comparable between a and b when
|
|
||||||
// the data slices of a and b overlap.
|
|
||||||
bFrom := off % stride
|
|
||||||
bTo := (bFrom + bCols) % stride
|
|
||||||
|
|
||||||
if bTo == 0 || bFrom < bTo {
|
|
||||||
// b matrix is not wrapped: compare for
|
|
||||||
// simple overlap.
|
|
||||||
return bFrom < aTo
|
|
||||||
}
|
|
||||||
|
|
||||||
// b strictly wraps and so must overlap with a.
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// trianglesOverlap returns whether the strided triangles a and b overlap
|
|
||||||
// when b is offset by off elements after a but has at least one element before
|
|
||||||
// the end of a. off must be positive. a and b are aSize×aSize and bSize×bSize
|
|
||||||
// respectively.
|
|
||||||
func trianglesOverlap(off, aSize, bSize, stride int, aUpper, bUpper bool) bool {
|
|
||||||
if !rectanglesOverlap(off, aSize, bSize, stride) {
|
|
||||||
// Fast return if bounding rectangles do not overlap.
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find location of b relative to a.
|
|
||||||
rowOffset := off / stride
|
|
||||||
colOffset := off % stride
|
|
||||||
if (off+bSize)%stride < colOffset {
|
|
||||||
// We have wrapped, so readjust offsets.
|
|
||||||
rowOffset++
|
|
||||||
colOffset -= stride
|
|
||||||
}
|
|
||||||
|
|
||||||
if aUpper {
|
|
||||||
// Check whether the upper left of b
|
|
||||||
// is in the triangle of a
|
|
||||||
if rowOffset >= 0 && rowOffset <= colOffset {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
// Check whether the upper right of b
|
|
||||||
// is in the triangle of a.
|
|
||||||
return bUpper && rowOffset < colOffset+bSize
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check whether the upper left of b
|
|
||||||
// is in the triangle of a
|
|
||||||
if colOffset >= 0 && rowOffset >= colOffset {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
if bUpper {
|
|
||||||
// Check whether the upper right corner of b
|
|
||||||
// is in a or the upper row of b spans a row
|
|
||||||
// of a.
|
|
||||||
return rowOffset > colOffset+bSize || colOffset < 0
|
|
||||||
}
|
|
||||||
if colOffset < 0 {
|
|
||||||
// Check whether the lower left of a
|
|
||||||
// is in the triangle of b or below
|
|
||||||
// the diagonal of a. This requires a
|
|
||||||
// swap of reference origin.
|
|
||||||
return -rowOffset+aSize > -colOffset
|
|
||||||
}
|
|
||||||
// Check whether the lower left of b
|
|
||||||
// is in the triangle of a or below
|
|
||||||
// the diagonal of a.
|
|
||||||
return rowOffset+bSize > colOffset
|
|
||||||
}
|
|
||||||
|
|
@ -1,468 +0,0 @@
|
||||||
// Copyright ©2013 The gonum Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package mat64
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/gonum/blas"
|
|
||||||
"github.com/gonum/blas/blas64"
|
|
||||||
"github.com/gonum/internal/asm/f64"
|
|
||||||
"github.com/gonum/matrix"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
vector *Vector
|
|
||||||
|
|
||||||
_ Matrix = vector
|
|
||||||
|
|
||||||
_ Reseter = vector
|
|
||||||
)
|
|
||||||
|
|
||||||
// Vector represents a column vector.
|
|
||||||
type Vector struct {
|
|
||||||
mat blas64.Vector
|
|
||||||
n int
|
|
||||||
// A BLAS vector can have a negative increment, but allowing this
|
|
||||||
// in the mat64 type complicates a lot of code, and doesn't gain anything.
|
|
||||||
// Vector must have positive increment in this package.
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewVector creates a new Vector of length n. If data == nil,
|
|
||||||
// a new slice is allocated for the backing slice. If len(data) == n, data is
|
|
||||||
// used as the backing slice, and changes to the elements of the returned Vector
|
|
||||||
// will be reflected in data. If neither of these is true, NewVector will panic.
|
|
||||||
func NewVector(n int, data []float64) *Vector {
|
|
||||||
if len(data) != n && data != nil {
|
|
||||||
panic(matrix.ErrShape)
|
|
||||||
}
|
|
||||||
if data == nil {
|
|
||||||
data = make([]float64, n)
|
|
||||||
}
|
|
||||||
return &Vector{
|
|
||||||
mat: blas64.Vector{
|
|
||||||
Inc: 1,
|
|
||||||
Data: data,
|
|
||||||
},
|
|
||||||
n: n,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ViewVec returns a sub-vector view of the receiver starting at element i and
|
|
||||||
// extending n rows. If i is out of range, n is zero, or the view extends
|
|
||||||
// beyond the bounds of the Vector, ViewVec will panic with ErrIndexOutOfRange.
|
|
||||||
// The returned Vector retains reference to the underlying vector.
|
|
||||||
//
|
|
||||||
// ViewVec is deprecated and should not be used. It will be removed at a later date.
|
|
||||||
func (v *Vector) ViewVec(i, n int) *Vector {
|
|
||||||
return v.SliceVec(i, i+n)
|
|
||||||
}
|
|
||||||
|
|
||||||
// SliceVec returns a new Vector that shares backing data with the receiver.
|
|
||||||
// The returned matrix starts at i of the recevier and extends k-i elements.
|
|
||||||
// SliceVec panics with ErrIndexOutOfRange if the slice is outside the bounds
|
|
||||||
// of the receiver.
|
|
||||||
func (v *Vector) SliceVec(i, k int) *Vector {
|
|
||||||
if i < 0 || k <= i || v.n < k {
|
|
||||||
panic(matrix.ErrIndexOutOfRange)
|
|
||||||
}
|
|
||||||
return &Vector{
|
|
||||||
n: k - i,
|
|
||||||
mat: blas64.Vector{
|
|
||||||
Inc: v.mat.Inc,
|
|
||||||
Data: v.mat.Data[i*v.mat.Inc : (k-1)*v.mat.Inc+1],
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (v *Vector) Dims() (r, c int) {
|
|
||||||
if v.isZero() {
|
|
||||||
return 0, 0
|
|
||||||
}
|
|
||||||
return v.n, 1
|
|
||||||
}
|
|
||||||
|
|
||||||
// Len returns the length of the vector.
|
|
||||||
func (v *Vector) Len() int {
|
|
||||||
return v.n
|
|
||||||
}
|
|
||||||
|
|
||||||
// T performs an implicit transpose by returning the receiver inside a Transpose.
|
|
||||||
func (v *Vector) T() Matrix {
|
|
||||||
return Transpose{v}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reset zeros the length of the vector so that it can be reused as the
|
|
||||||
// receiver of a dimensionally restricted operation.
|
|
||||||
//
|
|
||||||
// See the Reseter interface for more information.
|
|
||||||
func (v *Vector) Reset() {
|
|
||||||
// No change of Inc or n to 0 may be
|
|
||||||
// made unless both are set to 0.
|
|
||||||
v.mat.Inc = 0
|
|
||||||
v.n = 0
|
|
||||||
v.mat.Data = v.mat.Data[:0]
|
|
||||||
}
|
|
||||||
|
|
||||||
// CloneVec makes a copy of a into the receiver, overwriting the previous value
|
|
||||||
// of the receiver.
|
|
||||||
func (v *Vector) CloneVec(a *Vector) {
|
|
||||||
if v == a {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
v.n = a.n
|
|
||||||
v.mat = blas64.Vector{
|
|
||||||
Inc: 1,
|
|
||||||
Data: use(v.mat.Data, v.n),
|
|
||||||
}
|
|
||||||
blas64.Copy(v.n, a.mat, v.mat)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (v *Vector) RawVector() blas64.Vector {
|
|
||||||
return v.mat
|
|
||||||
}
|
|
||||||
|
|
||||||
// CopyVec makes a copy of elements of a into the receiver. It is similar to the
|
|
||||||
// built-in copy; it copies as much as the overlap between the two vectors and
|
|
||||||
// returns the number of elements it copied.
|
|
||||||
func (v *Vector) CopyVec(a *Vector) int {
|
|
||||||
n := min(v.Len(), a.Len())
|
|
||||||
if v != a {
|
|
||||||
blas64.Copy(n, a.mat, v.mat)
|
|
||||||
}
|
|
||||||
return n
|
|
||||||
}
|
|
||||||
|
|
||||||
// ScaleVec scales the vector a by alpha, placing the result in the receiver.
|
|
||||||
func (v *Vector) ScaleVec(alpha float64, a *Vector) {
|
|
||||||
n := a.Len()
|
|
||||||
if v != a {
|
|
||||||
v.reuseAs(n)
|
|
||||||
if v.mat.Inc == 1 && a.mat.Inc == 1 {
|
|
||||||
f64.ScalUnitaryTo(v.mat.Data, alpha, a.mat.Data)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
f64.ScalIncTo(v.mat.Data, uintptr(v.mat.Inc),
|
|
||||||
alpha, a.mat.Data, uintptr(n), uintptr(a.mat.Inc))
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if v.mat.Inc == 1 {
|
|
||||||
f64.ScalUnitary(alpha, v.mat.Data)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
f64.ScalInc(alpha, v.mat.Data, uintptr(n), uintptr(v.mat.Inc))
|
|
||||||
}
|
|
||||||
|
|
||||||
// AddScaledVec adds the vectors a and alpha*b, placing the result in the receiver.
|
|
||||||
func (v *Vector) AddScaledVec(a *Vector, alpha float64, b *Vector) {
|
|
||||||
if alpha == 1 {
|
|
||||||
v.AddVec(a, b)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if alpha == -1 {
|
|
||||||
v.SubVec(a, b)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
ar := a.Len()
|
|
||||||
br := b.Len()
|
|
||||||
|
|
||||||
if ar != br {
|
|
||||||
panic(matrix.ErrShape)
|
|
||||||
}
|
|
||||||
|
|
||||||
if v != a {
|
|
||||||
v.checkOverlap(a.mat)
|
|
||||||
}
|
|
||||||
if v != b {
|
|
||||||
v.checkOverlap(b.mat)
|
|
||||||
}
|
|
||||||
|
|
||||||
v.reuseAs(ar)
|
|
||||||
|
|
||||||
switch {
|
|
||||||
case alpha == 0: // v <- a
|
|
||||||
v.CopyVec(a)
|
|
||||||
case v == a && v == b: // v <- v + alpha * v = (alpha + 1) * v
|
|
||||||
blas64.Scal(ar, alpha+1, v.mat)
|
|
||||||
case v == a && v != b: // v <- v + alpha * b
|
|
||||||
if v.mat.Inc == 1 && b.mat.Inc == 1 {
|
|
||||||
// Fast path for a common case.
|
|
||||||
f64.AxpyUnitaryTo(v.mat.Data, alpha, b.mat.Data, a.mat.Data)
|
|
||||||
} else {
|
|
||||||
f64.AxpyInc(alpha, b.mat.Data, v.mat.Data,
|
|
||||||
uintptr(ar), uintptr(b.mat.Inc), uintptr(v.mat.Inc), 0, 0)
|
|
||||||
}
|
|
||||||
default: // v <- a + alpha * b or v <- a + alpha * v
|
|
||||||
if v.mat.Inc == 1 && a.mat.Inc == 1 && b.mat.Inc == 1 {
|
|
||||||
// Fast path for a common case.
|
|
||||||
f64.AxpyUnitaryTo(v.mat.Data, alpha, b.mat.Data, a.mat.Data)
|
|
||||||
} else {
|
|
||||||
f64.AxpyIncTo(v.mat.Data, uintptr(v.mat.Inc), 0,
|
|
||||||
alpha, b.mat.Data, a.mat.Data,
|
|
||||||
uintptr(ar), uintptr(b.mat.Inc), uintptr(a.mat.Inc), 0, 0)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// AddVec adds the vectors a and b, placing the result in the receiver.
|
|
||||||
func (v *Vector) AddVec(a, b *Vector) {
|
|
||||||
ar := a.Len()
|
|
||||||
br := b.Len()
|
|
||||||
|
|
||||||
if ar != br {
|
|
||||||
panic(matrix.ErrShape)
|
|
||||||
}
|
|
||||||
|
|
||||||
if v != a {
|
|
||||||
v.checkOverlap(a.mat)
|
|
||||||
}
|
|
||||||
if v != b {
|
|
||||||
v.checkOverlap(b.mat)
|
|
||||||
}
|
|
||||||
|
|
||||||
v.reuseAs(ar)
|
|
||||||
|
|
||||||
if v.mat.Inc == 1 && a.mat.Inc == 1 && b.mat.Inc == 1 {
|
|
||||||
// Fast path for a common case.
|
|
||||||
f64.AxpyUnitaryTo(v.mat.Data, 1, b.mat.Data, a.mat.Data)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
f64.AxpyIncTo(v.mat.Data, uintptr(v.mat.Inc), 0,
|
|
||||||
1, b.mat.Data, a.mat.Data,
|
|
||||||
uintptr(ar), uintptr(b.mat.Inc), uintptr(a.mat.Inc), 0, 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
// SubVec subtracts the vector b from a, placing the result in the receiver.
|
|
||||||
func (v *Vector) SubVec(a, b *Vector) {
|
|
||||||
ar := a.Len()
|
|
||||||
br := b.Len()
|
|
||||||
|
|
||||||
if ar != br {
|
|
||||||
panic(matrix.ErrShape)
|
|
||||||
}
|
|
||||||
|
|
||||||
if v != a {
|
|
||||||
v.checkOverlap(a.mat)
|
|
||||||
}
|
|
||||||
if v != b {
|
|
||||||
v.checkOverlap(b.mat)
|
|
||||||
}
|
|
||||||
|
|
||||||
v.reuseAs(ar)
|
|
||||||
|
|
||||||
if v.mat.Inc == 1 && a.mat.Inc == 1 && b.mat.Inc == 1 {
|
|
||||||
// Fast path for a common case.
|
|
||||||
f64.AxpyUnitaryTo(v.mat.Data, -1, b.mat.Data, a.mat.Data)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
f64.AxpyIncTo(v.mat.Data, uintptr(v.mat.Inc), 0,
|
|
||||||
-1, b.mat.Data, a.mat.Data,
|
|
||||||
uintptr(ar), uintptr(b.mat.Inc), uintptr(a.mat.Inc), 0, 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
// MulElemVec performs element-wise multiplication of a and b, placing the result
|
|
||||||
// in the receiver.
|
|
||||||
func (v *Vector) MulElemVec(a, b *Vector) {
|
|
||||||
ar := a.Len()
|
|
||||||
br := b.Len()
|
|
||||||
|
|
||||||
if ar != br {
|
|
||||||
panic(matrix.ErrShape)
|
|
||||||
}
|
|
||||||
|
|
||||||
if v != a {
|
|
||||||
v.checkOverlap(a.mat)
|
|
||||||
}
|
|
||||||
if v != b {
|
|
||||||
v.checkOverlap(b.mat)
|
|
||||||
}
|
|
||||||
|
|
||||||
v.reuseAs(ar)
|
|
||||||
|
|
||||||
amat, bmat := a.RawVector(), b.RawVector()
|
|
||||||
for i := 0; i < v.n; i++ {
|
|
||||||
v.mat.Data[i*v.mat.Inc] = amat.Data[i*amat.Inc] * bmat.Data[i*bmat.Inc]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// DivElemVec performs element-wise division of a by b, placing the result
|
|
||||||
// in the receiver.
|
|
||||||
func (v *Vector) DivElemVec(a, b *Vector) {
|
|
||||||
ar := a.Len()
|
|
||||||
br := b.Len()
|
|
||||||
|
|
||||||
if ar != br {
|
|
||||||
panic(matrix.ErrShape)
|
|
||||||
}
|
|
||||||
|
|
||||||
if v != a {
|
|
||||||
v.checkOverlap(a.mat)
|
|
||||||
}
|
|
||||||
if v != b {
|
|
||||||
v.checkOverlap(b.mat)
|
|
||||||
}
|
|
||||||
|
|
||||||
v.reuseAs(ar)
|
|
||||||
|
|
||||||
amat, bmat := a.RawVector(), b.RawVector()
|
|
||||||
for i := 0; i < v.n; i++ {
|
|
||||||
v.mat.Data[i*v.mat.Inc] = amat.Data[i*amat.Inc] / bmat.Data[i*bmat.Inc]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// MulVec computes a * b. The result is stored into the receiver.
|
|
||||||
// MulVec panics if the number of columns in a does not equal the number of rows in b.
|
|
||||||
func (v *Vector) MulVec(a Matrix, b *Vector) {
|
|
||||||
r, c := a.Dims()
|
|
||||||
br := b.Len()
|
|
||||||
if c != br {
|
|
||||||
panic(matrix.ErrShape)
|
|
||||||
}
|
|
||||||
|
|
||||||
if v != b {
|
|
||||||
v.checkOverlap(b.mat)
|
|
||||||
}
|
|
||||||
|
|
||||||
a, trans := untranspose(a)
|
|
||||||
ar, ac := a.Dims()
|
|
||||||
v.reuseAs(r)
|
|
||||||
var restore func()
|
|
||||||
if v == a {
|
|
||||||
v, restore = v.isolatedWorkspace(a.(*Vector))
|
|
||||||
defer restore()
|
|
||||||
} else if v == b {
|
|
||||||
v, restore = v.isolatedWorkspace(b)
|
|
||||||
defer restore()
|
|
||||||
}
|
|
||||||
|
|
||||||
switch a := a.(type) {
|
|
||||||
case *Vector:
|
|
||||||
if v != a {
|
|
||||||
v.checkOverlap(a.mat)
|
|
||||||
}
|
|
||||||
|
|
||||||
if a.Len() == 1 {
|
|
||||||
// {1,1} x {1,n}
|
|
||||||
av := a.At(0, 0)
|
|
||||||
for i := 0; i < b.Len(); i++ {
|
|
||||||
v.mat.Data[i*v.mat.Inc] = av * b.mat.Data[i*b.mat.Inc]
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if b.Len() == 1 {
|
|
||||||
// {1,n} x {1,1}
|
|
||||||
bv := b.At(0, 0)
|
|
||||||
for i := 0; i < a.Len(); i++ {
|
|
||||||
v.mat.Data[i*v.mat.Inc] = bv * a.mat.Data[i*a.mat.Inc]
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
// {n,1} x {1,n}
|
|
||||||
var sum float64
|
|
||||||
for i := 0; i < c; i++ {
|
|
||||||
sum += a.At(i, 0) * b.At(i, 0)
|
|
||||||
}
|
|
||||||
v.SetVec(0, sum)
|
|
||||||
return
|
|
||||||
case RawSymmetricer:
|
|
||||||
amat := a.RawSymmetric()
|
|
||||||
blas64.Symv(1, amat, b.mat, 0, v.mat)
|
|
||||||
case RawTriangular:
|
|
||||||
v.CopyVec(b)
|
|
||||||
amat := a.RawTriangular()
|
|
||||||
ta := blas.NoTrans
|
|
||||||
if trans {
|
|
||||||
ta = blas.Trans
|
|
||||||
}
|
|
||||||
blas64.Trmv(ta, amat, v.mat)
|
|
||||||
case RawMatrixer:
|
|
||||||
amat := a.RawMatrix()
|
|
||||||
// We don't know that a is a *Dense, so make
|
|
||||||
// a temporary Dense to check overlap.
|
|
||||||
(&Dense{mat: amat}).checkOverlap(v.asGeneral())
|
|
||||||
t := blas.NoTrans
|
|
||||||
if trans {
|
|
||||||
t = blas.Trans
|
|
||||||
}
|
|
||||||
blas64.Gemv(t, 1, amat, b.mat, 0, v.mat)
|
|
||||||
default:
|
|
||||||
if trans {
|
|
||||||
col := make([]float64, ar)
|
|
||||||
for c := 0; c < ac; c++ {
|
|
||||||
for i := range col {
|
|
||||||
col[i] = a.At(i, c)
|
|
||||||
}
|
|
||||||
var f float64
|
|
||||||
for i, e := range col {
|
|
||||||
f += e * b.mat.Data[i*b.mat.Inc]
|
|
||||||
}
|
|
||||||
v.mat.Data[c*v.mat.Inc] = f
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
row := make([]float64, ac)
|
|
||||||
for r := 0; r < ar; r++ {
|
|
||||||
for i := range row {
|
|
||||||
row[i] = a.At(r, i)
|
|
||||||
}
|
|
||||||
var f float64
|
|
||||||
for i, e := range row {
|
|
||||||
f += e * b.mat.Data[i*b.mat.Inc]
|
|
||||||
}
|
|
||||||
v.mat.Data[r*v.mat.Inc] = f
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// reuseAs resizes an empty vector to a r×1 vector,
|
|
||||||
// or checks that a non-empty matrix is r×1.
|
|
||||||
func (v *Vector) reuseAs(r int) {
|
|
||||||
if v.isZero() {
|
|
||||||
v.mat = blas64.Vector{
|
|
||||||
Inc: 1,
|
|
||||||
Data: use(v.mat.Data, r),
|
|
||||||
}
|
|
||||||
v.n = r
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if r != v.n {
|
|
||||||
panic(matrix.ErrShape)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (v *Vector) isZero() bool {
|
|
||||||
// It must be the case that v.Dims() returns
|
|
||||||
// zeros in this case. See comment in Reset().
|
|
||||||
return v.mat.Inc == 0
|
|
||||||
}
|
|
||||||
|
|
||||||
func (v *Vector) isolatedWorkspace(a *Vector) (n *Vector, restore func()) {
|
|
||||||
l := a.Len()
|
|
||||||
n = getWorkspaceVec(l, false)
|
|
||||||
return n, func() {
|
|
||||||
v.CopyVec(n)
|
|
||||||
putWorkspaceVec(n)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// asDense returns a Dense representation of the receiver with the same
|
|
||||||
// underlying data.
|
|
||||||
func (v *Vector) asDense() *Dense {
|
|
||||||
return &Dense{
|
|
||||||
mat: v.asGeneral(),
|
|
||||||
capRows: v.n,
|
|
||||||
capCols: 1,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// asGeneral returns a blas64.General representation of the receiver with the
|
|
||||||
// same underlying data.
|
|
||||||
func (v *Vector) asGeneral() blas64.General {
|
|
||||||
return blas64.General{
|
|
||||||
Rows: v.n,
|
|
||||||
Cols: 1,
|
|
||||||
Stride: v.mat.Inc,
|
|
||||||
Data: v.mat.Data,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,118 +0,0 @@
|
||||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package plotter
|
|
||||||
|
|
||||||
import (
|
|
||||||
"errors"
|
|
||||||
"image/color"
|
|
||||||
"math"
|
|
||||||
|
|
||||||
"github.com/gonum/plot"
|
|
||||||
"github.com/gonum/plot/vg"
|
|
||||||
"github.com/gonum/plot/vg/draw"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Bubbles implements the Plotter interface, drawing
|
|
||||||
// a bubble plot of x, y, z triples where the z value
|
|
||||||
// determines the radius of the bubble.
|
|
||||||
type Bubbles struct {
|
|
||||||
XYZs
|
|
||||||
|
|
||||||
// Color is the color of the bubbles.
|
|
||||||
color.Color
|
|
||||||
|
|
||||||
// MinRadius and MaxRadius give the minimum
|
|
||||||
// and maximum bubble radius respectively.
|
|
||||||
// The radii of each bubble is interpolated linearly
|
|
||||||
// between these two values.
|
|
||||||
MinRadius, MaxRadius vg.Length
|
|
||||||
|
|
||||||
// MinZ and MaxZ are the minimum and
|
|
||||||
// maximum Z values from the data.
|
|
||||||
MinZ, MaxZ float64
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewBubbles creates as new bubble plot plotter for
|
|
||||||
// the given data, with a minimum and maximum
|
|
||||||
// bubble radius.
|
|
||||||
func NewBubbles(xyz XYZer, min, max vg.Length) (*Bubbles, error) {
|
|
||||||
cpy, err := CopyXYZs(xyz)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
if min > max {
|
|
||||||
return nil, errors.New("Min bubble radius is greater than the max radius")
|
|
||||||
}
|
|
||||||
minz := cpy[0].Z
|
|
||||||
maxz := cpy[0].Z
|
|
||||||
for _, d := range cpy {
|
|
||||||
minz = math.Min(minz, d.Z)
|
|
||||||
maxz = math.Max(maxz, d.Z)
|
|
||||||
}
|
|
||||||
return &Bubbles{
|
|
||||||
XYZs: cpy,
|
|
||||||
MinRadius: min,
|
|
||||||
MaxRadius: max,
|
|
||||||
MinZ: minz,
|
|
||||||
MaxZ: maxz,
|
|
||||||
}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Plot implements the Plot method of the plot.Plotter interface.
|
|
||||||
func (bs *Bubbles) Plot(c draw.Canvas, plt *plot.Plot) {
|
|
||||||
trX, trY := plt.Transforms(&c)
|
|
||||||
|
|
||||||
c.SetColor(bs.Color)
|
|
||||||
|
|
||||||
for _, d := range bs.XYZs {
|
|
||||||
x := trX(d.X)
|
|
||||||
y := trY(d.Y)
|
|
||||||
pt := vg.Point{X: x, Y: y}
|
|
||||||
if !c.Contains(pt) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
rad := bs.radius(d.Z)
|
|
||||||
|
|
||||||
// draw a circle centered at x, y
|
|
||||||
var p vg.Path
|
|
||||||
p.Move(vg.Point{X: x + rad, Y: y})
|
|
||||||
p.Arc(pt, rad, 0, 2*math.Pi)
|
|
||||||
p.Close()
|
|
||||||
c.Fill(p)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// radius returns the radius of a bubble by linear interpolation.
|
|
||||||
func (bs *Bubbles) radius(z float64) vg.Length {
|
|
||||||
rng := bs.MaxRadius - bs.MinRadius
|
|
||||||
if bs.MaxZ == bs.MinZ {
|
|
||||||
return rng/2 + bs.MinRadius
|
|
||||||
}
|
|
||||||
d := (z - bs.MinZ) / (bs.MaxZ - bs.MinZ)
|
|
||||||
return vg.Length(d)*rng + bs.MinRadius
|
|
||||||
}
|
|
||||||
|
|
||||||
// DataRange implements the DataRange method
|
|
||||||
// of the plot.DataRanger interface.
|
|
||||||
func (bs *Bubbles) DataRange() (xmin, xmax, ymin, ymax float64) {
|
|
||||||
return XYRange(XYValues{bs.XYZs})
|
|
||||||
}
|
|
||||||
|
|
||||||
// GlyphBoxes implements the GlyphBoxes method
|
|
||||||
// of the plot.GlyphBoxer interface.
|
|
||||||
func (bs *Bubbles) GlyphBoxes(plt *plot.Plot) []plot.GlyphBox {
|
|
||||||
boxes := make([]plot.GlyphBox, len(bs.XYZs))
|
|
||||||
for i, d := range bs.XYZs {
|
|
||||||
boxes[i].X = plt.X.Norm(d.X)
|
|
||||||
boxes[i].Y = plt.Y.Norm(d.Y)
|
|
||||||
r := bs.radius(d.Z)
|
|
||||||
boxes[i].Rectangle = vg.Rectangle{
|
|
||||||
Min: vg.Point{X: -r, Y: -r},
|
|
||||||
Max: vg.Point{X: +r, Y: +r},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return boxes
|
|
||||||
}
|
|
||||||
|
|
@ -0,0 +1,23 @@
|
||||||
|
Copyright ©2013 The Gonum Authors. All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the gonum project nor the names of its authors and
|
||||||
|
contributors may be used to endorse or promote products derived from this
|
||||||
|
software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
105
vendor/github.com/gonum/blas/blas.go → vendor/gonum.org/v1/gonum/blas/blas.go
generated
vendored
105
vendor/github.com/gonum/blas/blas.go → vendor/gonum.org/v1/gonum/blas/blas.go
generated
vendored
|
|
@ -1,110 +1,9 @@
|
||||||
// Copyright ©2013 The gonum Authors. All rights reserved.
|
// Copyright ©2013 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
/*
|
//go:generate ./conversions.bash
|
||||||
Package blas provides interfaces for the BLAS linear algebra standard.
|
|
||||||
|
|
||||||
All methods must perform appropriate parameter checking and panic if
|
|
||||||
provided parameters that do not conform to the requirements specified
|
|
||||||
by the BLAS standard.
|
|
||||||
|
|
||||||
Quick Reference Guide to the BLAS from http://www.netlib.org/lapack/lug/node145.html
|
|
||||||
|
|
||||||
This version is modified to remove the "order" option. All matrix operations are
|
|
||||||
on row-order matrices.
|
|
||||||
|
|
||||||
Level 1 BLAS
|
|
||||||
|
|
||||||
dim scalar vector vector scalars 5-element prefixes
|
|
||||||
struct
|
|
||||||
|
|
||||||
_rotg ( a, b ) S, D
|
|
||||||
_rotmg( d1, d2, a, b ) S, D
|
|
||||||
_rot ( n, x, incX, y, incY, c, s ) S, D
|
|
||||||
_rotm ( n, x, incX, y, incY, param ) S, D
|
|
||||||
_swap ( n, x, incX, y, incY ) S, D, C, Z
|
|
||||||
_scal ( n, alpha, x, incX ) S, D, C, Z, Cs, Zd
|
|
||||||
_copy ( n, x, incX, y, incY ) S, D, C, Z
|
|
||||||
_axpy ( n, alpha, x, incX, y, incY ) S, D, C, Z
|
|
||||||
_dot ( n, x, incX, y, incY ) S, D, Ds
|
|
||||||
_dotu ( n, x, incX, y, incY ) C, Z
|
|
||||||
_dotc ( n, x, incX, y, incY ) C, Z
|
|
||||||
__dot ( n, alpha, x, incX, y, incY ) Sds
|
|
||||||
_nrm2 ( n, x, incX ) S, D, Sc, Dz
|
|
||||||
_asum ( n, x, incX ) S, D, Sc, Dz
|
|
||||||
I_amax( n, x, incX ) s, d, c, z
|
|
||||||
|
|
||||||
Level 2 BLAS
|
|
||||||
|
|
||||||
options dim b-width scalar matrix vector scalar vector prefixes
|
|
||||||
|
|
||||||
_gemv ( trans, m, n, alpha, a, lda, x, incX, beta, y, incY ) S, D, C, Z
|
|
||||||
_gbmv ( trans, m, n, kL, kU, alpha, a, lda, x, incX, beta, y, incY ) S, D, C, Z
|
|
||||||
_hemv ( uplo, n, alpha, a, lda, x, incX, beta, y, incY ) C, Z
|
|
||||||
_hbmv ( uplo, n, k, alpha, a, lda, x, incX, beta, y, incY ) C, Z
|
|
||||||
_hpmv ( uplo, n, alpha, ap, x, incX, beta, y, incY ) C, Z
|
|
||||||
_symv ( uplo, n, alpha, a, lda, x, incX, beta, y, incY ) S, D
|
|
||||||
_sbmv ( uplo, n, k, alpha, a, lda, x, incX, beta, y, incY ) S, D
|
|
||||||
_spmv ( uplo, n, alpha, ap, x, incX, beta, y, incY ) S, D
|
|
||||||
_trmv ( uplo, trans, diag, n, a, lda, x, incX ) S, D, C, Z
|
|
||||||
_tbmv ( uplo, trans, diag, n, k, a, lda, x, incX ) S, D, C, Z
|
|
||||||
_tpmv ( uplo, trans, diag, n, ap, x, incX ) S, D, C, Z
|
|
||||||
_trsv ( uplo, trans, diag, n, a, lda, x, incX ) S, D, C, Z
|
|
||||||
_tbsv ( uplo, trans, diag, n, k, a, lda, x, incX ) S, D, C, Z
|
|
||||||
_tpsv ( uplo, trans, diag, n, ap, x, incX ) S, D, C, Z
|
|
||||||
|
|
||||||
options dim scalar vector vector matrix prefixes
|
|
||||||
|
|
||||||
_ger ( m, n, alpha, x, incX, y, incY, a, lda ) S, D
|
|
||||||
_geru ( m, n, alpha, x, incX, y, incY, a, lda ) C, Z
|
|
||||||
_gerc ( m, n, alpha, x, incX, y, incY, a, lda ) C, Z
|
|
||||||
_her ( uplo, n, alpha, x, incX, a, lda ) C, Z
|
|
||||||
_hpr ( uplo, n, alpha, x, incX, ap ) C, Z
|
|
||||||
_her2 ( uplo, n, alpha, x, incX, y, incY, a, lda ) C, Z
|
|
||||||
_hpr2 ( uplo, n, alpha, x, incX, y, incY, ap ) C, Z
|
|
||||||
_syr ( uplo, n, alpha, x, incX, a, lda ) S, D
|
|
||||||
_spr ( uplo, n, alpha, x, incX, ap ) S, D
|
|
||||||
_syr2 ( uplo, n, alpha, x, incX, y, incY, a, lda ) S, D
|
|
||||||
_spr2 ( uplo, n, alpha, x, incX, y, incY, ap ) S, D
|
|
||||||
|
|
||||||
Level 3 BLAS
|
|
||||||
|
|
||||||
options dim scalar matrix matrix scalar matrix prefixes
|
|
||||||
|
|
||||||
_gemm ( transA, transB, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) S, D, C, Z
|
|
||||||
_symm ( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc ) S, D, C, Z
|
|
||||||
_hemm ( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc ) C, Z
|
|
||||||
_syrk ( uplo, trans, n, k, alpha, a, lda, beta, c, ldc ) S, D, C, Z
|
|
||||||
_herk ( uplo, trans, n, k, alpha, a, lda, beta, c, ldc ) C, Z
|
|
||||||
_syr2k( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) S, D, C, Z
|
|
||||||
_her2k( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) C, Z
|
|
||||||
_trmm ( side, uplo, transA, diag, m, n, alpha, a, lda, b, ldb ) S, D, C, Z
|
|
||||||
_trsm ( side, uplo, transA, diag, m, n, alpha, a, lda, b, ldb ) S, D, C, Z
|
|
||||||
|
|
||||||
Meaning of prefixes
|
|
||||||
|
|
||||||
S - float32 C - complex64
|
|
||||||
D - float64 Z - complex128
|
|
||||||
|
|
||||||
Matrix types
|
|
||||||
|
|
||||||
GE - GEneral GB - General Band
|
|
||||||
SY - SYmmetric SB - Symmetric Band SP - Symmetric Packed
|
|
||||||
HE - HErmitian HB - Hermitian Band HP - Hermitian Packed
|
|
||||||
TR - TRiangular TB - Triangular Band TP - Triangular Packed
|
|
||||||
|
|
||||||
Options
|
|
||||||
|
|
||||||
trans = NoTrans, Trans, ConjTrans
|
|
||||||
uplo = Upper, Lower
|
|
||||||
diag = Nonunit, Unit
|
|
||||||
side = Left, Right (A or op(A) on the left, or A or op(A) on the right)
|
|
||||||
|
|
||||||
For real matrices, Trans and ConjTrans have the same meaning.
|
|
||||||
For Hermitian matrices, trans = Trans is not allowed.
|
|
||||||
For complex symmetric matrices, trans = ConjTrans is not allowed.
|
|
||||||
*/
|
|
||||||
package blas
|
package blas
|
||||||
|
|
||||||
// Flag constants indicate Givens transformation H matrix state.
|
// Flag constants indicate Givens transformation H matrix state.
|
||||||
|
|
@ -1,16 +1,15 @@
|
||||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
// Package blas64 provides a simple interface to the float64 BLAS API.
|
|
||||||
package blas64
|
package blas64
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/gonum/blas"
|
"gonum.org/v1/gonum/blas"
|
||||||
"github.com/gonum/blas/native"
|
"gonum.org/v1/gonum/blas/gonum"
|
||||||
)
|
)
|
||||||
|
|
||||||
var blas64 blas.Float64 = native.Implementation{}
|
var blas64 blas.Float64 = gonum.Implementation{}
|
||||||
|
|
||||||
// Use sets the BLAS float64 implementation to be used by subsequent BLAS calls.
|
// Use sets the BLAS float64 implementation to be used by subsequent BLAS calls.
|
||||||
// The default implementation is native.Implementation.
|
// The default implementation is native.Implementation.
|
||||||
|
|
@ -0,0 +1,277 @@
|
||||||
|
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package blas64
|
||||||
|
|
||||||
|
import "gonum.org/v1/gonum/blas"
|
||||||
|
|
||||||
|
// GeneralCols represents a matrix using the conventional column-major storage scheme.
|
||||||
|
type GeneralCols General
|
||||||
|
|
||||||
|
// From fills the receiver with elements from a. The receiver
|
||||||
|
// must have the same dimensions as a and have adequate backing
|
||||||
|
// data storage.
|
||||||
|
func (t GeneralCols) From(a General) {
|
||||||
|
if t.Rows != a.Rows || t.Cols != a.Cols {
|
||||||
|
panic("blas64: mismatched dimension")
|
||||||
|
}
|
||||||
|
if len(t.Data) < (t.Cols-1)*t.Stride+t.Rows {
|
||||||
|
panic("blas64: short data slice")
|
||||||
|
}
|
||||||
|
for i := 0; i < a.Rows; i++ {
|
||||||
|
for j, v := range a.Data[i*a.Stride : i*a.Stride+a.Cols] {
|
||||||
|
t.Data[i+j*t.Stride] = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// From fills the receiver with elements from a. The receiver
|
||||||
|
// must have the same dimensions as a and have adequate backing
|
||||||
|
// data storage.
|
||||||
|
func (t General) From(a GeneralCols) {
|
||||||
|
if t.Rows != a.Rows || t.Cols != a.Cols {
|
||||||
|
panic("blas64: mismatched dimension")
|
||||||
|
}
|
||||||
|
if len(t.Data) < (t.Rows-1)*t.Stride+t.Cols {
|
||||||
|
panic("blas64: short data slice")
|
||||||
|
}
|
||||||
|
for j := 0; j < a.Cols; j++ {
|
||||||
|
for i, v := range a.Data[j*a.Stride : j*a.Stride+a.Rows] {
|
||||||
|
t.Data[i*t.Stride+j] = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TriangularCols represents a matrix using the conventional column-major storage scheme.
|
||||||
|
type TriangularCols Triangular
|
||||||
|
|
||||||
|
// From fills the receiver with elements from a. The receiver
|
||||||
|
// must have the same dimensions, uplo and diag as a and have
|
||||||
|
// adequate backing data storage.
|
||||||
|
func (t TriangularCols) From(a Triangular) {
|
||||||
|
if t.N != a.N {
|
||||||
|
panic("blas64: mismatched dimension")
|
||||||
|
}
|
||||||
|
if t.Uplo != a.Uplo {
|
||||||
|
panic("blas64: mismatched BLAS uplo")
|
||||||
|
}
|
||||||
|
if t.Diag != a.Diag {
|
||||||
|
panic("blas64: mismatched BLAS diag")
|
||||||
|
}
|
||||||
|
switch a.Uplo {
|
||||||
|
default:
|
||||||
|
panic("blas64: bad BLAS uplo")
|
||||||
|
case blas.Upper:
|
||||||
|
for i := 0; i < a.N; i++ {
|
||||||
|
for j := i; j < a.N; j++ {
|
||||||
|
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case blas.Lower:
|
||||||
|
for i := 0; i < a.N; i++ {
|
||||||
|
for j := 0; j <= i; j++ {
|
||||||
|
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case blas.All:
|
||||||
|
for i := 0; i < a.N; i++ {
|
||||||
|
for j := 0; j < a.N; j++ {
|
||||||
|
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// From fills the receiver with elements from a. The receiver
|
||||||
|
// must have the same dimensions, uplo and diag as a and have
|
||||||
|
// adequate backing data storage.
|
||||||
|
func (t Triangular) From(a TriangularCols) {
|
||||||
|
if t.N != a.N {
|
||||||
|
panic("blas64: mismatched dimension")
|
||||||
|
}
|
||||||
|
if t.Uplo != a.Uplo {
|
||||||
|
panic("blas64: mismatched BLAS uplo")
|
||||||
|
}
|
||||||
|
if t.Diag != a.Diag {
|
||||||
|
panic("blas64: mismatched BLAS diag")
|
||||||
|
}
|
||||||
|
switch a.Uplo {
|
||||||
|
default:
|
||||||
|
panic("blas64: bad BLAS uplo")
|
||||||
|
case blas.Upper:
|
||||||
|
for i := 0; i < a.N; i++ {
|
||||||
|
for j := i; j < a.N; j++ {
|
||||||
|
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case blas.Lower:
|
||||||
|
for i := 0; i < a.N; i++ {
|
||||||
|
for j := 0; j <= i; j++ {
|
||||||
|
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case blas.All:
|
||||||
|
for i := 0; i < a.N; i++ {
|
||||||
|
for j := 0; j < a.N; j++ {
|
||||||
|
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// BandCols represents a matrix using the band column-major storage scheme.
|
||||||
|
type BandCols Band
|
||||||
|
|
||||||
|
// From fills the receiver with elements from a. The receiver
|
||||||
|
// must have the same dimensions and bandwidth as a and have
|
||||||
|
// adequate backing data storage.
|
||||||
|
func (t BandCols) From(a Band) {
|
||||||
|
if t.Rows != a.Rows || t.Cols != a.Cols {
|
||||||
|
panic("blas64: mismatched dimension")
|
||||||
|
}
|
||||||
|
if t.KL != a.KL || t.KU != a.KU {
|
||||||
|
panic("blas64: mismatched bandwidth")
|
||||||
|
}
|
||||||
|
if a.Stride < a.KL+a.KU+1 {
|
||||||
|
panic("blas64: short stride for source")
|
||||||
|
}
|
||||||
|
if t.Stride < t.KL+t.KU+1 {
|
||||||
|
panic("blas64: short stride for destination")
|
||||||
|
}
|
||||||
|
for i := 0; i < a.Rows; i++ {
|
||||||
|
for j := max(0, i-a.KL); j < min(i+a.KU+1, a.Cols); j++ {
|
||||||
|
t.Data[i+t.KU-j+j*t.Stride] = a.Data[j+a.KL-i+i*a.Stride]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// From fills the receiver with elements from a. The receiver
|
||||||
|
// must have the same dimensions and bandwidth as a and have
|
||||||
|
// adequate backing data storage.
|
||||||
|
func (t Band) From(a BandCols) {
|
||||||
|
if t.Rows != a.Rows || t.Cols != a.Cols {
|
||||||
|
panic("blas64: mismatched dimension")
|
||||||
|
}
|
||||||
|
if t.KL != a.KL || t.KU != a.KU {
|
||||||
|
panic("blas64: mismatched bandwidth")
|
||||||
|
}
|
||||||
|
if a.Stride < a.KL+a.KU+1 {
|
||||||
|
panic("blas64: short stride for source")
|
||||||
|
}
|
||||||
|
if t.Stride < t.KL+t.KU+1 {
|
||||||
|
panic("blas64: short stride for destination")
|
||||||
|
}
|
||||||
|
for j := 0; j < a.Cols; j++ {
|
||||||
|
for i := max(0, j-a.KU); i < min(j+a.KL+1, a.Rows); i++ {
|
||||||
|
t.Data[j+a.KL-i+i*a.Stride] = a.Data[i+t.KU-j+j*t.Stride]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TriangularBandCols represents a symmetric matrix using the band column-major storage scheme.
|
||||||
|
type TriangularBandCols TriangularBand
|
||||||
|
|
||||||
|
// From fills the receiver with elements from a. The receiver
|
||||||
|
// must have the same dimensions, bandwidth and uplo as a and
|
||||||
|
// have adequate backing data storage.
|
||||||
|
func (t TriangularBandCols) From(a TriangularBand) {
|
||||||
|
if t.N != a.N {
|
||||||
|
panic("blas64: mismatched dimension")
|
||||||
|
}
|
||||||
|
if t.K != a.K {
|
||||||
|
panic("blas64: mismatched bandwidth")
|
||||||
|
}
|
||||||
|
if a.Stride < a.K+1 {
|
||||||
|
panic("blas64: short stride for source")
|
||||||
|
}
|
||||||
|
if t.Stride < t.K+1 {
|
||||||
|
panic("blas64: short stride for destination")
|
||||||
|
}
|
||||||
|
if t.Uplo != a.Uplo {
|
||||||
|
panic("blas64: mismatched BLAS uplo")
|
||||||
|
}
|
||||||
|
if t.Diag != a.Diag {
|
||||||
|
panic("blas64: mismatched BLAS diag")
|
||||||
|
}
|
||||||
|
dst := BandCols{
|
||||||
|
Rows: t.N, Cols: t.N,
|
||||||
|
Stride: t.Stride,
|
||||||
|
Data: t.Data,
|
||||||
|
}
|
||||||
|
src := Band{
|
||||||
|
Rows: a.N, Cols: a.N,
|
||||||
|
Stride: a.Stride,
|
||||||
|
Data: a.Data,
|
||||||
|
}
|
||||||
|
switch a.Uplo {
|
||||||
|
default:
|
||||||
|
panic("blas64: bad BLAS uplo")
|
||||||
|
case blas.Upper:
|
||||||
|
dst.KU = t.K
|
||||||
|
src.KU = a.K
|
||||||
|
case blas.Lower:
|
||||||
|
dst.KL = t.K
|
||||||
|
src.KL = a.K
|
||||||
|
}
|
||||||
|
dst.From(src)
|
||||||
|
}
|
||||||
|
|
||||||
|
// From fills the receiver with elements from a. The receiver
|
||||||
|
// must have the same dimensions, bandwidth and uplo as a and
|
||||||
|
// have adequate backing data storage.
|
||||||
|
func (t TriangularBand) From(a TriangularBandCols) {
|
||||||
|
if t.N != a.N {
|
||||||
|
panic("blas64: mismatched dimension")
|
||||||
|
}
|
||||||
|
if t.K != a.K {
|
||||||
|
panic("blas64: mismatched bandwidth")
|
||||||
|
}
|
||||||
|
if a.Stride < a.K+1 {
|
||||||
|
panic("blas64: short stride for source")
|
||||||
|
}
|
||||||
|
if t.Stride < t.K+1 {
|
||||||
|
panic("blas64: short stride for destination")
|
||||||
|
}
|
||||||
|
if t.Uplo != a.Uplo {
|
||||||
|
panic("blas64: mismatched BLAS uplo")
|
||||||
|
}
|
||||||
|
if t.Diag != a.Diag {
|
||||||
|
panic("blas64: mismatched BLAS diag")
|
||||||
|
}
|
||||||
|
dst := Band{
|
||||||
|
Rows: t.N, Cols: t.N,
|
||||||
|
Stride: t.Stride,
|
||||||
|
Data: t.Data,
|
||||||
|
}
|
||||||
|
src := BandCols{
|
||||||
|
Rows: a.N, Cols: a.N,
|
||||||
|
Stride: a.Stride,
|
||||||
|
Data: a.Data,
|
||||||
|
}
|
||||||
|
switch a.Uplo {
|
||||||
|
default:
|
||||||
|
panic("blas64: bad BLAS uplo")
|
||||||
|
case blas.Upper:
|
||||||
|
dst.KU = t.K
|
||||||
|
src.KU = a.K
|
||||||
|
case blas.Lower:
|
||||||
|
dst.KL = t.K
|
||||||
|
src.KL = a.K
|
||||||
|
}
|
||||||
|
dst.From(src)
|
||||||
|
}
|
||||||
|
|
||||||
|
func min(a, b int) int {
|
||||||
|
if a < b {
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func max(a, b int) int {
|
||||||
|
if a > b {
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,153 @@
|
||||||
|
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package blas64
|
||||||
|
|
||||||
|
import "gonum.org/v1/gonum/blas"
|
||||||
|
|
||||||
|
// SymmetricCols represents a matrix using the conventional column-major storage scheme.
|
||||||
|
type SymmetricCols Symmetric
|
||||||
|
|
||||||
|
// From fills the receiver with elements from a. The receiver
|
||||||
|
// must have the same dimensions and uplo as a and have adequate
|
||||||
|
// backing data storage.
|
||||||
|
func (t SymmetricCols) From(a Symmetric) {
|
||||||
|
if t.N != a.N {
|
||||||
|
panic("blas64: mismatched dimension")
|
||||||
|
}
|
||||||
|
if t.Uplo != a.Uplo {
|
||||||
|
panic("blas64: mismatched BLAS uplo")
|
||||||
|
}
|
||||||
|
switch a.Uplo {
|
||||||
|
default:
|
||||||
|
panic("blas64: bad BLAS uplo")
|
||||||
|
case blas.Upper:
|
||||||
|
for i := 0; i < a.N; i++ {
|
||||||
|
for j := i; j < a.N; j++ {
|
||||||
|
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case blas.Lower:
|
||||||
|
for i := 0; i < a.N; i++ {
|
||||||
|
for j := 0; j <= i; j++ {
|
||||||
|
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// From fills the receiver with elements from a. The receiver
|
||||||
|
// must have the same dimensions and uplo as a and have adequate
|
||||||
|
// backing data storage.
|
||||||
|
func (t Symmetric) From(a SymmetricCols) {
|
||||||
|
if t.N != a.N {
|
||||||
|
panic("blas64: mismatched dimension")
|
||||||
|
}
|
||||||
|
if t.Uplo != a.Uplo {
|
||||||
|
panic("blas64: mismatched BLAS uplo")
|
||||||
|
}
|
||||||
|
switch a.Uplo {
|
||||||
|
default:
|
||||||
|
panic("blas64: bad BLAS uplo")
|
||||||
|
case blas.Upper:
|
||||||
|
for i := 0; i < a.N; i++ {
|
||||||
|
for j := i; j < a.N; j++ {
|
||||||
|
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case blas.Lower:
|
||||||
|
for i := 0; i < a.N; i++ {
|
||||||
|
for j := 0; j <= i; j++ {
|
||||||
|
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// SymmetricBandCols represents a symmetric matrix using the band column-major storage scheme.
|
||||||
|
type SymmetricBandCols SymmetricBand
|
||||||
|
|
||||||
|
// From fills the receiver with elements from a. The receiver
|
||||||
|
// must have the same dimensions, bandwidth and uplo as a and
|
||||||
|
// have adequate backing data storage.
|
||||||
|
func (t SymmetricBandCols) From(a SymmetricBand) {
|
||||||
|
if t.N != a.N {
|
||||||
|
panic("blas64: mismatched dimension")
|
||||||
|
}
|
||||||
|
if t.K != a.K {
|
||||||
|
panic("blas64: mismatched bandwidth")
|
||||||
|
}
|
||||||
|
if a.Stride < a.K+1 {
|
||||||
|
panic("blas64: short stride for source")
|
||||||
|
}
|
||||||
|
if t.Stride < t.K+1 {
|
||||||
|
panic("blas64: short stride for destination")
|
||||||
|
}
|
||||||
|
if t.Uplo != a.Uplo {
|
||||||
|
panic("blas64: mismatched BLAS uplo")
|
||||||
|
}
|
||||||
|
dst := BandCols{
|
||||||
|
Rows: t.N, Cols: t.N,
|
||||||
|
Stride: t.Stride,
|
||||||
|
Data: t.Data,
|
||||||
|
}
|
||||||
|
src := Band{
|
||||||
|
Rows: a.N, Cols: a.N,
|
||||||
|
Stride: a.Stride,
|
||||||
|
Data: a.Data,
|
||||||
|
}
|
||||||
|
switch a.Uplo {
|
||||||
|
default:
|
||||||
|
panic("blas64: bad BLAS uplo")
|
||||||
|
case blas.Upper:
|
||||||
|
dst.KU = t.K
|
||||||
|
src.KU = a.K
|
||||||
|
case blas.Lower:
|
||||||
|
dst.KL = t.K
|
||||||
|
src.KL = a.K
|
||||||
|
}
|
||||||
|
dst.From(src)
|
||||||
|
}
|
||||||
|
|
||||||
|
// From fills the receiver with elements from a. The receiver
|
||||||
|
// must have the same dimensions, bandwidth and uplo as a and
|
||||||
|
// have adequate backing data storage.
|
||||||
|
func (t SymmetricBand) From(a SymmetricBandCols) {
|
||||||
|
if t.N != a.N {
|
||||||
|
panic("blas64: mismatched dimension")
|
||||||
|
}
|
||||||
|
if t.K != a.K {
|
||||||
|
panic("blas64: mismatched bandwidth")
|
||||||
|
}
|
||||||
|
if a.Stride < a.K+1 {
|
||||||
|
panic("blas64: short stride for source")
|
||||||
|
}
|
||||||
|
if t.Stride < t.K+1 {
|
||||||
|
panic("blas64: short stride for destination")
|
||||||
|
}
|
||||||
|
if t.Uplo != a.Uplo {
|
||||||
|
panic("blas64: mismatched BLAS uplo")
|
||||||
|
}
|
||||||
|
dst := Band{
|
||||||
|
Rows: t.N, Cols: t.N,
|
||||||
|
Stride: t.Stride,
|
||||||
|
Data: t.Data,
|
||||||
|
}
|
||||||
|
src := BandCols{
|
||||||
|
Rows: a.N, Cols: a.N,
|
||||||
|
Stride: a.Stride,
|
||||||
|
Data: a.Data,
|
||||||
|
}
|
||||||
|
switch a.Uplo {
|
||||||
|
default:
|
||||||
|
panic("blas64: bad BLAS uplo")
|
||||||
|
case blas.Upper:
|
||||||
|
dst.KU = t.K
|
||||||
|
src.KU = a.K
|
||||||
|
case blas.Lower:
|
||||||
|
dst.KL = t.K
|
||||||
|
src.KL = a.K
|
||||||
|
}
|
||||||
|
dst.From(src)
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,6 @@
|
||||||
|
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// Package blas64 provides a simple interface to the float64 BLAS API.
|
||||||
|
package blas64 // import "gonum.org/v1/gonum/blas/blas64"
|
||||||
|
|
@ -0,0 +1,108 @@
|
||||||
|
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
/*
|
||||||
|
Package blas provides interfaces for the BLAS linear algebra standard.
|
||||||
|
|
||||||
|
All methods must perform appropriate parameter checking and panic if
|
||||||
|
provided parameters that do not conform to the requirements specified
|
||||||
|
by the BLAS standard.
|
||||||
|
|
||||||
|
Quick Reference Guide to the BLAS from http://www.netlib.org/lapack/lug/node145.html
|
||||||
|
|
||||||
|
This version is modified to remove the "order" option. All matrix operations are
|
||||||
|
on row-order matrices.
|
||||||
|
|
||||||
|
Level 1 BLAS
|
||||||
|
|
||||||
|
dim scalar vector vector scalars 5-element prefixes
|
||||||
|
struct
|
||||||
|
|
||||||
|
_rotg ( a, b ) S, D
|
||||||
|
_rotmg( d1, d2, a, b ) S, D
|
||||||
|
_rot ( n, x, incX, y, incY, c, s ) S, D
|
||||||
|
_rotm ( n, x, incX, y, incY, param ) S, D
|
||||||
|
_swap ( n, x, incX, y, incY ) S, D, C, Z
|
||||||
|
_scal ( n, alpha, x, incX ) S, D, C, Z, Cs, Zd
|
||||||
|
_copy ( n, x, incX, y, incY ) S, D, C, Z
|
||||||
|
_axpy ( n, alpha, x, incX, y, incY ) S, D, C, Z
|
||||||
|
_dot ( n, x, incX, y, incY ) S, D, Ds
|
||||||
|
_dotu ( n, x, incX, y, incY ) C, Z
|
||||||
|
_dotc ( n, x, incX, y, incY ) C, Z
|
||||||
|
__dot ( n, alpha, x, incX, y, incY ) Sds
|
||||||
|
_nrm2 ( n, x, incX ) S, D, Sc, Dz
|
||||||
|
_asum ( n, x, incX ) S, D, Sc, Dz
|
||||||
|
I_amax( n, x, incX ) s, d, c, z
|
||||||
|
|
||||||
|
Level 2 BLAS
|
||||||
|
|
||||||
|
options dim b-width scalar matrix vector scalar vector prefixes
|
||||||
|
|
||||||
|
_gemv ( trans, m, n, alpha, a, lda, x, incX, beta, y, incY ) S, D, C, Z
|
||||||
|
_gbmv ( trans, m, n, kL, kU, alpha, a, lda, x, incX, beta, y, incY ) S, D, C, Z
|
||||||
|
_hemv ( uplo, n, alpha, a, lda, x, incX, beta, y, incY ) C, Z
|
||||||
|
_hbmv ( uplo, n, k, alpha, a, lda, x, incX, beta, y, incY ) C, Z
|
||||||
|
_hpmv ( uplo, n, alpha, ap, x, incX, beta, y, incY ) C, Z
|
||||||
|
_symv ( uplo, n, alpha, a, lda, x, incX, beta, y, incY ) S, D
|
||||||
|
_sbmv ( uplo, n, k, alpha, a, lda, x, incX, beta, y, incY ) S, D
|
||||||
|
_spmv ( uplo, n, alpha, ap, x, incX, beta, y, incY ) S, D
|
||||||
|
_trmv ( uplo, trans, diag, n, a, lda, x, incX ) S, D, C, Z
|
||||||
|
_tbmv ( uplo, trans, diag, n, k, a, lda, x, incX ) S, D, C, Z
|
||||||
|
_tpmv ( uplo, trans, diag, n, ap, x, incX ) S, D, C, Z
|
||||||
|
_trsv ( uplo, trans, diag, n, a, lda, x, incX ) S, D, C, Z
|
||||||
|
_tbsv ( uplo, trans, diag, n, k, a, lda, x, incX ) S, D, C, Z
|
||||||
|
_tpsv ( uplo, trans, diag, n, ap, x, incX ) S, D, C, Z
|
||||||
|
|
||||||
|
options dim scalar vector vector matrix prefixes
|
||||||
|
|
||||||
|
_ger ( m, n, alpha, x, incX, y, incY, a, lda ) S, D
|
||||||
|
_geru ( m, n, alpha, x, incX, y, incY, a, lda ) C, Z
|
||||||
|
_gerc ( m, n, alpha, x, incX, y, incY, a, lda ) C, Z
|
||||||
|
_her ( uplo, n, alpha, x, incX, a, lda ) C, Z
|
||||||
|
_hpr ( uplo, n, alpha, x, incX, ap ) C, Z
|
||||||
|
_her2 ( uplo, n, alpha, x, incX, y, incY, a, lda ) C, Z
|
||||||
|
_hpr2 ( uplo, n, alpha, x, incX, y, incY, ap ) C, Z
|
||||||
|
_syr ( uplo, n, alpha, x, incX, a, lda ) S, D
|
||||||
|
_spr ( uplo, n, alpha, x, incX, ap ) S, D
|
||||||
|
_syr2 ( uplo, n, alpha, x, incX, y, incY, a, lda ) S, D
|
||||||
|
_spr2 ( uplo, n, alpha, x, incX, y, incY, ap ) S, D
|
||||||
|
|
||||||
|
Level 3 BLAS
|
||||||
|
|
||||||
|
options dim scalar matrix matrix scalar matrix prefixes
|
||||||
|
|
||||||
|
_gemm ( transA, transB, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) S, D, C, Z
|
||||||
|
_symm ( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc ) S, D, C, Z
|
||||||
|
_hemm ( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc ) C, Z
|
||||||
|
_syrk ( uplo, trans, n, k, alpha, a, lda, beta, c, ldc ) S, D, C, Z
|
||||||
|
_herk ( uplo, trans, n, k, alpha, a, lda, beta, c, ldc ) C, Z
|
||||||
|
_syr2k( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) S, D, C, Z
|
||||||
|
_her2k( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) C, Z
|
||||||
|
_trmm ( side, uplo, transA, diag, m, n, alpha, a, lda, b, ldb ) S, D, C, Z
|
||||||
|
_trsm ( side, uplo, transA, diag, m, n, alpha, a, lda, b, ldb ) S, D, C, Z
|
||||||
|
|
||||||
|
Meaning of prefixes
|
||||||
|
|
||||||
|
S - float32 C - complex64
|
||||||
|
D - float64 Z - complex128
|
||||||
|
|
||||||
|
Matrix types
|
||||||
|
|
||||||
|
GE - GEneral GB - General Band
|
||||||
|
SY - SYmmetric SB - Symmetric Band SP - Symmetric Packed
|
||||||
|
HE - HErmitian HB - Hermitian Band HP - Hermitian Packed
|
||||||
|
TR - TRiangular TB - Triangular Band TP - Triangular Packed
|
||||||
|
|
||||||
|
Options
|
||||||
|
|
||||||
|
trans = NoTrans, Trans, ConjTrans
|
||||||
|
uplo = Upper, Lower
|
||||||
|
diag = Nonunit, Unit
|
||||||
|
side = Left, Right (A or op(A) on the left, or A or op(A) on the right)
|
||||||
|
|
||||||
|
For real matrices, Trans and ConjTrans have the same meaning.
|
||||||
|
For Hermitian matrices, trans = Trans is not allowed.
|
||||||
|
For complex symmetric matrices, trans = ConjTrans is not allowed.
|
||||||
|
*/
|
||||||
|
package blas // import "gonum.org/v1/gonum/blas"
|
||||||
|
|
@ -0,0 +1,185 @@
|
||||||
|
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package gonum
|
||||||
|
|
||||||
|
import "gonum.org/v1/gonum/blas"
|
||||||
|
|
||||||
|
var (
|
||||||
|
_ blas.Complex64 = Implementation{}
|
||||||
|
_ blas.Complex128 = Implementation{}
|
||||||
|
)
|
||||||
|
|
||||||
|
// TODO(btracey): Replace this as complex routines are added, and instead
|
||||||
|
// automatically generate the complex64 routines from the complex128 ones.
|
||||||
|
|
||||||
|
var noComplex = "native: implementation does not implement this routine, see the cgo wrapper in gonum.org/v1/netlib/blas"
|
||||||
|
|
||||||
|
// Level 1 complex64 routines.
|
||||||
|
|
||||||
|
func (Implementation) Cdotu(n int, x []complex64, incX int, y []complex64, incY int) (dotu complex64) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Cdotc(n int, x []complex64, incX int, y []complex64, incY int) (dotc complex64) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Scnrm2(n int, x []complex64, incX int) float32 {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Scasum(n int, x []complex64, incX int) float32 {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Icamax(n int, x []complex64, incX int) int {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Cswap(n int, x []complex64, incX int, y []complex64, incY int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Ccopy(n int, x []complex64, incX int, y []complex64, incY int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Caxpy(n int, alpha complex64, x []complex64, incX int, y []complex64, incY int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Cscal(n int, alpha complex64, x []complex64, incX int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Csscal(n int, alpha float32, x []complex64, incX int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Level 2 complex64 routines.
|
||||||
|
|
||||||
|
func (Implementation) Cgemv(tA blas.Transpose, m, n int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Cgbmv(tA blas.Transpose, m, n, kL, kU int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Ctrmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []complex64, lda int, x []complex64, incX int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Ctbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex64, lda int, x []complex64, incX int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Ctpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []complex64, x []complex64, incX int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Ctrsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []complex64, lda int, x []complex64, incX int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Ctbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex64, lda int, x []complex64, incX int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Ctpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []complex64, x []complex64, incX int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Chemv(ul blas.Uplo, n int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Chbmv(ul blas.Uplo, n, k int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Chpmv(ul blas.Uplo, n int, alpha complex64, ap []complex64, x []complex64, incX int, beta complex64, y []complex64, incY int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Cgeru(m, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Cgerc(m, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Cher(ul blas.Uplo, n int, alpha float32, x []complex64, incX int, a []complex64, lda int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Chpr(ul blas.Uplo, n int, alpha float32, x []complex64, incX int, a []complex64) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Cher2(ul blas.Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Chpr2(ul blas.Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, ap []complex64) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Level 3 complex64 routines.
|
||||||
|
|
||||||
|
func (Implementation) Cgemm(tA, tB blas.Transpose, m, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Csymm(s blas.Side, ul blas.Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Csyrk(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, beta complex64, c []complex64, ldc int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Csyr2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Ctrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Ctrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Chemm(s blas.Side, ul blas.Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Cherk(ul blas.Uplo, t blas.Transpose, n, k int, alpha float32, a []complex64, lda int, beta float32, c []complex64, ldc int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Cher2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta float32, c []complex64, ldc int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Level 2 complex128 routines.
|
||||||
|
|
||||||
|
func (Implementation) Zgbmv(tA blas.Transpose, m, n int, kL int, kU int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Ztbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex128, lda int, x []complex128, incX int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Ztpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []complex128, x []complex128, incX int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Ztbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex128, lda int, x []complex128, incX int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Ztpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []complex128, x []complex128, incX int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Zhbmv(ul blas.Uplo, n, k int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Level 3 complex128 routines.
|
||||||
|
|
||||||
|
func (Implementation) Zgemm(tA, tB blas.Transpose, m, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Zsymm(s blas.Side, ul blas.Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Zsyrk(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, beta complex128, c []complex128, ldc int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Zsyr2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Ztrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Ztrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Zhemm(s blas.Side, ul blas.Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Zherk(ul blas.Uplo, t blas.Transpose, n, k int, alpha float64, a []complex128, lda int, beta float64, c []complex128, ldc int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
func (Implementation) Zher2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta float64, c []complex128, ldc int) {
|
||||||
|
panic(noComplex)
|
||||||
|
}
|
||||||
|
|
@ -1,15 +1,15 @@
|
||||||
// Copyright ©2014 The gonum Authors. All rights reserved.
|
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package native
|
package gonum
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"runtime"
|
"runtime"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"github.com/gonum/blas"
|
"gonum.org/v1/gonum/blas"
|
||||||
"github.com/gonum/internal/asm/f64"
|
"gonum.org/v1/gonum/internal/asm/f64"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Dgemm computes
|
// Dgemm computes
|
||||||
|
|
@ -25,17 +25,17 @@ func (Implementation) Dgemm(tA, tB blas.Transpose, m, n, k int, alpha float64, a
|
||||||
}
|
}
|
||||||
aTrans := tA == blas.Trans || tA == blas.ConjTrans
|
aTrans := tA == blas.Trans || tA == blas.ConjTrans
|
||||||
if aTrans {
|
if aTrans {
|
||||||
checkMatrix64(k, m, a, lda)
|
checkDMatrix('a', k, m, a, lda)
|
||||||
} else {
|
} else {
|
||||||
checkMatrix64(m, k, a, lda)
|
checkDMatrix('a', m, k, a, lda)
|
||||||
}
|
}
|
||||||
bTrans := tB == blas.Trans || tB == blas.ConjTrans
|
bTrans := tB == blas.Trans || tB == blas.ConjTrans
|
||||||
if bTrans {
|
if bTrans {
|
||||||
checkMatrix64(n, k, b, ldb)
|
checkDMatrix('b', n, k, b, ldb)
|
||||||
} else {
|
} else {
|
||||||
checkMatrix64(k, n, b, ldb)
|
checkDMatrix('b', k, n, b, ldb)
|
||||||
}
|
}
|
||||||
checkMatrix64(m, n, c, ldc)
|
checkDMatrix('c', m, n, c, ldc)
|
||||||
|
|
||||||
// scale c
|
// scale c
|
||||||
if beta != 1 {
|
if beta != 1 {
|
||||||
|
|
@ -121,7 +121,7 @@ func dgemmParallel(aTrans, bTrans bool, m, n, k int, a []float64, lda int, b []f
|
||||||
go func() {
|
go func() {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
// Make local copies of otherwise global variables to reduce shared memory.
|
// Make local copies of otherwise global variables to reduce shared memory.
|
||||||
// This has a noticable effect on benchmarks in some cases.
|
// This has a noticeable effect on benchmarks in some cases.
|
||||||
alpha := alpha
|
alpha := alpha
|
||||||
aTrans := aTrans
|
aTrans := aTrans
|
||||||
bTrans := bTrans
|
bTrans := bTrans
|
||||||
|
|
@ -259,18 +259,3 @@ func dgemmSerialTransTrans(m, n, k int, a []float64, lda int, b []float64, ldb i
|
||||||
func sliceView64(a []float64, lda, i, j, r, c int) []float64 {
|
func sliceView64(a []float64, lda, i, j, r, c int) []float64 {
|
||||||
return a[i*lda+j : (i+r-1)*lda+j+c]
|
return a[i*lda+j : (i+r-1)*lda+j+c]
|
||||||
}
|
}
|
||||||
|
|
||||||
func checkMatrix64(m, n int, a []float64, lda int) {
|
|
||||||
if m < 0 {
|
|
||||||
panic("blas: rows < 0")
|
|
||||||
}
|
|
||||||
if n < 0 {
|
|
||||||
panic("blas: cols < 0")
|
|
||||||
}
|
|
||||||
if lda < n {
|
|
||||||
panic("blas: illegal stride")
|
|
||||||
}
|
|
||||||
if len(a) < (m-1)*lda+n {
|
|
||||||
panic("blas: insufficient matrix slice length")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -7,12 +7,12 @@
|
||||||
/*
|
/*
|
||||||
Package native is a Go implementation of the BLAS API. This implementation
|
Package native is a Go implementation of the BLAS API. This implementation
|
||||||
panics when the input arguments are invalid as per the standard, for example
|
panics when the input arguments are invalid as per the standard, for example
|
||||||
if a vector increment is zero. Please note that the treatment of NaN values
|
if a vector increment is zero. Note that the treatment of NaN values
|
||||||
is not specified, and differs among the BLAS implementations.
|
is not specified, and differs among the BLAS implementations.
|
||||||
github.com/gonum/blas/blas64 provides helpful wrapper functions to the BLAS
|
gonum.org/v1/gonum/blas/blas64 provides helpful wrapper functions to the BLAS
|
||||||
interface. The rest of this text describes the layout of the data for the input types.
|
interface. The rest of this text describes the layout of the data for the input types.
|
||||||
|
|
||||||
Please note that in the function documentation, x[i] refers to the i^th element
|
Note that in the function documentation, x[i] refers to the i^th element
|
||||||
of the vector, which will be different from the i^th element of the slice if
|
of the vector, which will be different from the i^th element of the slice if
|
||||||
incX != 1.
|
incX != 1.
|
||||||
|
|
||||||
|
|
@ -85,4 +85,4 @@ which is given to the BLAS routine as [∗ 1 2 3 4 ...].
|
||||||
See http://www.crest.iu.edu/research/mtl/reference/html/banded.html
|
See http://www.crest.iu.edu/research/mtl/reference/html/banded.html
|
||||||
for more information
|
for more information
|
||||||
*/
|
*/
|
||||||
package native
|
package gonum // import "gonum.org/v1/gonum/blas/gonum"
|
||||||
|
|
@ -0,0 +1,50 @@
|
||||||
|
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package gonum
|
||||||
|
|
||||||
|
import (
|
||||||
|
"math"
|
||||||
|
)
|
||||||
|
|
||||||
|
type general64 struct {
|
||||||
|
data []float64
|
||||||
|
rows, cols int
|
||||||
|
stride int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (g general64) clone() general64 {
|
||||||
|
data := make([]float64, len(g.data))
|
||||||
|
copy(data, g.data)
|
||||||
|
return general64{
|
||||||
|
data: data,
|
||||||
|
rows: g.rows,
|
||||||
|
cols: g.cols,
|
||||||
|
stride: g.stride,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (g general64) equal(a general64) bool {
|
||||||
|
if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for i, v := range g.data {
|
||||||
|
if a.data[i] != v {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (g general64) equalWithinAbs(a general64, tol float64) bool {
|
||||||
|
if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for i, v := range g.data {
|
||||||
|
if math.Abs(a.data[i]-v) > tol {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,52 @@
|
||||||
|
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
|
||||||
|
|
||||||
|
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package gonum
|
||||||
|
|
||||||
|
import (
|
||||||
|
math "gonum.org/v1/gonum/internal/math32"
|
||||||
|
)
|
||||||
|
|
||||||
|
type general32 struct {
|
||||||
|
data []float32
|
||||||
|
rows, cols int
|
||||||
|
stride int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (g general32) clone() general32 {
|
||||||
|
data := make([]float32, len(g.data))
|
||||||
|
copy(data, g.data)
|
||||||
|
return general32{
|
||||||
|
data: data,
|
||||||
|
rows: g.rows,
|
||||||
|
cols: g.cols,
|
||||||
|
stride: g.stride,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (g general32) equal(a general32) bool {
|
||||||
|
if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for i, v := range g.data {
|
||||||
|
if a.data[i] != v {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (g general32) equalWithinAbs(a general32, tol float32) bool {
|
||||||
|
if g.rows != a.rows || g.cols != a.cols || g.stride != a.stride {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for i, v := range g.data {
|
||||||
|
if math.Abs(a.data[i]-v) > tol {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
@ -1,10 +1,12 @@
|
||||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:generate ./single_precision.bash
|
//go:generate ./single_precision.bash
|
||||||
|
|
||||||
package native
|
package gonum
|
||||||
|
|
||||||
|
import "math"
|
||||||
|
|
||||||
type Implementation struct{}
|
type Implementation struct{}
|
||||||
|
|
||||||
|
|
@ -43,9 +45,6 @@ const (
|
||||||
buffMul = 4 // how big is the buffer relative to the number of workers
|
buffMul = 4 // how big is the buffer relative to the number of workers
|
||||||
)
|
)
|
||||||
|
|
||||||
// [SD]gemm debugging constant.
|
|
||||||
const debug = false
|
|
||||||
|
|
||||||
// subMul is a common type shared by [SD]gemm.
|
// subMul is a common type shared by [SD]gemm.
|
||||||
type subMul struct {
|
type subMul struct {
|
||||||
i, j int // index of block
|
i, j int // index of block
|
||||||
|
|
@ -65,8 +64,70 @@ func min(a, b int) int {
|
||||||
return a
|
return a
|
||||||
}
|
}
|
||||||
|
|
||||||
// blocks returns the number of divisons of the dimension length with the given
|
func checkSMatrix(name byte, m, n int, a []float32, lda int) {
|
||||||
|
if m < 0 {
|
||||||
|
panic(mLT0)
|
||||||
|
}
|
||||||
|
if n < 0 {
|
||||||
|
panic(nLT0)
|
||||||
|
}
|
||||||
|
if lda < n {
|
||||||
|
panic("blas: illegal stride of " + string(name))
|
||||||
|
}
|
||||||
|
if len(a) < (m-1)*lda+n {
|
||||||
|
panic("blas: index of " + string(name) + " out of range")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func checkDMatrix(name byte, m, n int, a []float64, lda int) {
|
||||||
|
if m < 0 {
|
||||||
|
panic(mLT0)
|
||||||
|
}
|
||||||
|
if n < 0 {
|
||||||
|
panic(nLT0)
|
||||||
|
}
|
||||||
|
if lda < n {
|
||||||
|
panic("blas: illegal stride of " + string(name))
|
||||||
|
}
|
||||||
|
if len(a) < (m-1)*lda+n {
|
||||||
|
panic("blas: index of " + string(name) + " out of range")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func checkZMatrix(name byte, m, n int, a []complex128, lda int) {
|
||||||
|
if m < 0 {
|
||||||
|
panic(mLT0)
|
||||||
|
}
|
||||||
|
if n < 0 {
|
||||||
|
panic(nLT0)
|
||||||
|
}
|
||||||
|
if lda < max(1, n) {
|
||||||
|
panic("blas: illegal stride of " + string(name))
|
||||||
|
}
|
||||||
|
if len(a) < (m-1)*lda+n {
|
||||||
|
panic("blas: insufficient " + string(name) + " matrix slice length")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func checkZVector(name byte, n int, x []complex128, incX int) {
|
||||||
|
if n < 0 {
|
||||||
|
panic(nLT0)
|
||||||
|
}
|
||||||
|
if incX == 0 {
|
||||||
|
panic(zeroIncX)
|
||||||
|
}
|
||||||
|
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
|
||||||
|
panic("blas: insufficient " + string(name) + " vector slice length")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// blocks returns the number of divisions of the dimension length with the given
|
||||||
// block size.
|
// block size.
|
||||||
func blocks(dim, bsize int) int {
|
func blocks(dim, bsize int) int {
|
||||||
return (dim + bsize - 1) / bsize
|
return (dim + bsize - 1) / bsize
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// dcabs1 returns |real(z)|+|imag(z)|.
|
||||||
|
func dcabs1(z complex128) float64 {
|
||||||
|
return math.Abs(real(z)) + math.Abs(imag(z))
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,442 @@
|
||||||
|
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package gonum
|
||||||
|
|
||||||
|
import (
|
||||||
|
"math"
|
||||||
|
|
||||||
|
"gonum.org/v1/gonum/internal/asm/c128"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Dzasum returns the sum of the absolute values of the elements of x
|
||||||
|
// \sum_i |Re(x[i])| + |Im(x[i])|
|
||||||
|
// Dzasum returns 0 if incX is negative.
|
||||||
|
func (Implementation) Dzasum(n int, x []complex128, incX int) float64 {
|
||||||
|
if n < 0 {
|
||||||
|
panic(negativeN)
|
||||||
|
}
|
||||||
|
if incX < 1 {
|
||||||
|
if incX == 0 {
|
||||||
|
panic(zeroIncX)
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
var sum float64
|
||||||
|
if incX == 1 {
|
||||||
|
if len(x) < n {
|
||||||
|
panic(badX)
|
||||||
|
}
|
||||||
|
for _, v := range x[:n] {
|
||||||
|
sum += dcabs1(v)
|
||||||
|
}
|
||||||
|
return sum
|
||||||
|
}
|
||||||
|
if (n-1)*incX >= len(x) {
|
||||||
|
panic(badX)
|
||||||
|
}
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
v := x[i*incX]
|
||||||
|
sum += dcabs1(v)
|
||||||
|
}
|
||||||
|
return sum
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dznrm2 computes the Euclidean norm of the complex vector x,
|
||||||
|
// ‖x‖_2 = sqrt(\sum_i x[i] * conj(x[i])).
|
||||||
|
// This function returns 0 if incX is negative.
|
||||||
|
func (Implementation) Dznrm2(n int, x []complex128, incX int) float64 {
|
||||||
|
if incX < 1 {
|
||||||
|
if incX == 0 {
|
||||||
|
panic(zeroIncX)
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
if n < 1 {
|
||||||
|
if n == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
panic(negativeN)
|
||||||
|
}
|
||||||
|
if (n-1)*incX >= len(x) {
|
||||||
|
panic(badX)
|
||||||
|
}
|
||||||
|
var (
|
||||||
|
scale float64
|
||||||
|
ssq float64 = 1
|
||||||
|
)
|
||||||
|
if incX == 1 {
|
||||||
|
for _, v := range x[:n] {
|
||||||
|
re, im := math.Abs(real(v)), math.Abs(imag(v))
|
||||||
|
if re != 0 {
|
||||||
|
if re > scale {
|
||||||
|
ssq = 1 + ssq*(scale/re)*(scale/re)
|
||||||
|
scale = re
|
||||||
|
} else {
|
||||||
|
ssq += (re / scale) * (re / scale)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if im != 0 {
|
||||||
|
if im > scale {
|
||||||
|
ssq = 1 + ssq*(scale/im)*(scale/im)
|
||||||
|
scale = im
|
||||||
|
} else {
|
||||||
|
ssq += (im / scale) * (im / scale)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if math.IsInf(scale, 1) {
|
||||||
|
return math.Inf(1)
|
||||||
|
}
|
||||||
|
return scale * math.Sqrt(ssq)
|
||||||
|
}
|
||||||
|
for ix := 0; ix < n*incX; ix += incX {
|
||||||
|
re, im := math.Abs(real(x[ix])), math.Abs(imag(x[ix]))
|
||||||
|
if re != 0 {
|
||||||
|
if re > scale {
|
||||||
|
ssq = 1 + ssq*(scale/re)*(scale/re)
|
||||||
|
scale = re
|
||||||
|
} else {
|
||||||
|
ssq += (re / scale) * (re / scale)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if im != 0 {
|
||||||
|
if im > scale {
|
||||||
|
ssq = 1 + ssq*(scale/im)*(scale/im)
|
||||||
|
scale = im
|
||||||
|
} else {
|
||||||
|
ssq += (im / scale) * (im / scale)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if math.IsInf(scale, 1) {
|
||||||
|
return math.Inf(1)
|
||||||
|
}
|
||||||
|
return scale * math.Sqrt(ssq)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Izamax returns the index of the first element of x having largest |Re(·)|+|Im(·)|.
|
||||||
|
// Izamax returns -1 if n is 0 or incX is negative.
|
||||||
|
func (Implementation) Izamax(n int, x []complex128, incX int) int {
|
||||||
|
if incX < 1 {
|
||||||
|
if incX == 0 {
|
||||||
|
panic(zeroIncX)
|
||||||
|
}
|
||||||
|
// Return invalid index.
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
if n < 1 {
|
||||||
|
if n == 0 {
|
||||||
|
// Return invalid index.
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
panic(negativeN)
|
||||||
|
}
|
||||||
|
if len(x) <= (n-1)*incX {
|
||||||
|
panic(badX)
|
||||||
|
}
|
||||||
|
idx := 0
|
||||||
|
max := dcabs1(x[0])
|
||||||
|
if incX == 1 {
|
||||||
|
for i, v := range x[1:n] {
|
||||||
|
absV := dcabs1(v)
|
||||||
|
if absV > max {
|
||||||
|
max = absV
|
||||||
|
idx = i + 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return idx
|
||||||
|
}
|
||||||
|
ix := incX
|
||||||
|
for i := 1; i < n; i++ {
|
||||||
|
absV := dcabs1(x[ix])
|
||||||
|
if absV > max {
|
||||||
|
max = absV
|
||||||
|
idx = i
|
||||||
|
}
|
||||||
|
ix += incX
|
||||||
|
}
|
||||||
|
return idx
|
||||||
|
}
|
||||||
|
|
||||||
|
// Zaxpy adds alpha times x to y:
|
||||||
|
// y[i] += alpha * x[i] for all i
|
||||||
|
func (Implementation) Zaxpy(n int, alpha complex128, x []complex128, incX int, y []complex128, incY int) {
|
||||||
|
if incX == 0 {
|
||||||
|
panic(zeroIncX)
|
||||||
|
}
|
||||||
|
if incY == 0 {
|
||||||
|
panic(zeroIncY)
|
||||||
|
}
|
||||||
|
if n < 1 {
|
||||||
|
if n == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
panic(negativeN)
|
||||||
|
}
|
||||||
|
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
|
||||||
|
panic(badX)
|
||||||
|
}
|
||||||
|
if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
|
||||||
|
panic(badY)
|
||||||
|
}
|
||||||
|
if alpha == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if incX == 1 && incY == 1 {
|
||||||
|
c128.AxpyUnitary(alpha, x[:n], y[:n])
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var ix, iy int
|
||||||
|
if incX < 0 {
|
||||||
|
ix = (1 - n) * incX
|
||||||
|
}
|
||||||
|
if incY < 0 {
|
||||||
|
iy = (1 - n) * incY
|
||||||
|
}
|
||||||
|
c128.AxpyInc(alpha, x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Zcopy copies the vector x to vector y.
|
||||||
|
func (Implementation) Zcopy(n int, x []complex128, incX int, y []complex128, incY int) {
|
||||||
|
if incX == 0 {
|
||||||
|
panic(zeroIncX)
|
||||||
|
}
|
||||||
|
if incY == 0 {
|
||||||
|
panic(zeroIncY)
|
||||||
|
}
|
||||||
|
if n < 1 {
|
||||||
|
if n == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
panic(negativeN)
|
||||||
|
}
|
||||||
|
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
|
||||||
|
panic(badX)
|
||||||
|
}
|
||||||
|
if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
|
||||||
|
panic(badY)
|
||||||
|
}
|
||||||
|
if incX == 1 && incY == 1 {
|
||||||
|
copy(y[:n], x[:n])
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var ix, iy int
|
||||||
|
if incX < 0 {
|
||||||
|
ix = (-n + 1) * incX
|
||||||
|
}
|
||||||
|
if incY < 0 {
|
||||||
|
iy = (-n + 1) * incY
|
||||||
|
}
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
y[iy] = x[ix]
|
||||||
|
ix += incX
|
||||||
|
iy += incY
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Zdotc computes the dot product
|
||||||
|
// x^H · y
|
||||||
|
// of two complex vectors x and y.
|
||||||
|
func (Implementation) Zdotc(n int, x []complex128, incX int, y []complex128, incY int) complex128 {
|
||||||
|
if incX == 0 {
|
||||||
|
panic(zeroIncX)
|
||||||
|
}
|
||||||
|
if incY == 0 {
|
||||||
|
panic(zeroIncY)
|
||||||
|
}
|
||||||
|
if n <= 0 {
|
||||||
|
if n == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
panic(negativeN)
|
||||||
|
}
|
||||||
|
if incX == 1 && incY == 1 {
|
||||||
|
if len(x) < n {
|
||||||
|
panic(badX)
|
||||||
|
}
|
||||||
|
if len(y) < n {
|
||||||
|
panic(badY)
|
||||||
|
}
|
||||||
|
return c128.DotcUnitary(x[:n], y[:n])
|
||||||
|
}
|
||||||
|
var ix, iy int
|
||||||
|
if incX < 0 {
|
||||||
|
ix = (-n + 1) * incX
|
||||||
|
}
|
||||||
|
if incY < 0 {
|
||||||
|
iy = (-n + 1) * incY
|
||||||
|
}
|
||||||
|
if ix >= len(x) || (n-1)*incX >= len(x) {
|
||||||
|
panic(badX)
|
||||||
|
}
|
||||||
|
if iy >= len(y) || (n-1)*incY >= len(y) {
|
||||||
|
panic(badY)
|
||||||
|
}
|
||||||
|
return c128.DotcInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Zdotu computes the dot product
|
||||||
|
// x^T · y
|
||||||
|
// of two complex vectors x and y.
|
||||||
|
func (Implementation) Zdotu(n int, x []complex128, incX int, y []complex128, incY int) complex128 {
|
||||||
|
if incX == 0 {
|
||||||
|
panic(zeroIncX)
|
||||||
|
}
|
||||||
|
if incY == 0 {
|
||||||
|
panic(zeroIncY)
|
||||||
|
}
|
||||||
|
if n <= 0 {
|
||||||
|
if n == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
panic(negativeN)
|
||||||
|
}
|
||||||
|
if incX == 1 && incY == 1 {
|
||||||
|
if len(x) < n {
|
||||||
|
panic(badX)
|
||||||
|
}
|
||||||
|
if len(y) < n {
|
||||||
|
panic(badY)
|
||||||
|
}
|
||||||
|
return c128.DotuUnitary(x[:n], y[:n])
|
||||||
|
}
|
||||||
|
var ix, iy int
|
||||||
|
if incX < 0 {
|
||||||
|
ix = (-n + 1) * incX
|
||||||
|
}
|
||||||
|
if incY < 0 {
|
||||||
|
iy = (-n + 1) * incY
|
||||||
|
}
|
||||||
|
if ix >= len(x) || (n-1)*incX >= len(x) {
|
||||||
|
panic(badX)
|
||||||
|
}
|
||||||
|
if iy >= len(y) || (n-1)*incY >= len(y) {
|
||||||
|
panic(badY)
|
||||||
|
}
|
||||||
|
return c128.DotuInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Zdscal scales the vector x by a real scalar alpha.
|
||||||
|
// Zdscal has no effect if incX < 0.
|
||||||
|
func (Implementation) Zdscal(n int, alpha float64, x []complex128, incX int) {
|
||||||
|
if incX < 1 {
|
||||||
|
if incX == 0 {
|
||||||
|
panic(zeroIncX)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if (n-1)*incX >= len(x) {
|
||||||
|
panic(badX)
|
||||||
|
}
|
||||||
|
if n < 1 {
|
||||||
|
if n == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
panic(negativeN)
|
||||||
|
}
|
||||||
|
if alpha == 0 {
|
||||||
|
if incX == 1 {
|
||||||
|
x = x[:n]
|
||||||
|
for i := range x {
|
||||||
|
x[i] = 0
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for ix := 0; ix < n*incX; ix += incX {
|
||||||
|
x[ix] = 0
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if incX == 1 {
|
||||||
|
x = x[:n]
|
||||||
|
for i, v := range x {
|
||||||
|
x[i] = complex(alpha*real(v), alpha*imag(v))
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for ix := 0; ix < n*incX; ix += incX {
|
||||||
|
v := x[ix]
|
||||||
|
x[ix] = complex(alpha*real(v), alpha*imag(v))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Zscal scales the vector x by a complex scalar alpha.
|
||||||
|
// Zscal has no effect if incX < 0.
|
||||||
|
func (Implementation) Zscal(n int, alpha complex128, x []complex128, incX int) {
|
||||||
|
if incX < 1 {
|
||||||
|
if incX == 0 {
|
||||||
|
panic(zeroIncX)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if (n-1)*incX >= len(x) {
|
||||||
|
panic(badX)
|
||||||
|
}
|
||||||
|
if n < 1 {
|
||||||
|
if n == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
panic(negativeN)
|
||||||
|
}
|
||||||
|
if alpha == 0 {
|
||||||
|
if incX == 1 {
|
||||||
|
x = x[:n]
|
||||||
|
for i := range x {
|
||||||
|
x[i] = 0
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for ix := 0; ix < n*incX; ix += incX {
|
||||||
|
x[ix] = 0
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if incX == 1 {
|
||||||
|
c128.ScalUnitary(alpha, x[:n])
|
||||||
|
return
|
||||||
|
}
|
||||||
|
c128.ScalInc(alpha, x, uintptr(n), uintptr(incX))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Zswap exchanges the elements of two complex vectors x and y.
|
||||||
|
func (Implementation) Zswap(n int, x []complex128, incX int, y []complex128, incY int) {
|
||||||
|
if incX == 0 {
|
||||||
|
panic(zeroIncX)
|
||||||
|
}
|
||||||
|
if incY == 0 {
|
||||||
|
panic(zeroIncY)
|
||||||
|
}
|
||||||
|
if n < 1 {
|
||||||
|
if n == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
panic(negativeN)
|
||||||
|
}
|
||||||
|
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
|
||||||
|
panic(badX)
|
||||||
|
}
|
||||||
|
if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
|
||||||
|
panic(badY)
|
||||||
|
}
|
||||||
|
if incX == 1 && incY == 1 {
|
||||||
|
x = x[:n]
|
||||||
|
for i, v := range x {
|
||||||
|
x[i], y[i] = y[i], v
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var ix, iy int
|
||||||
|
if incX < 0 {
|
||||||
|
ix = (-n + 1) * incX
|
||||||
|
}
|
||||||
|
if incY < 0 {
|
||||||
|
iy = (-n + 1) * incY
|
||||||
|
}
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
x[ix], y[iy] = y[iy], x[ix]
|
||||||
|
ix += incX
|
||||||
|
iy += incY
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,14 +1,14 @@
|
||||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package native
|
package gonum
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"math"
|
"math"
|
||||||
|
|
||||||
"github.com/gonum/blas"
|
"gonum.org/v1/gonum/blas"
|
||||||
"github.com/gonum/internal/asm/f64"
|
"gonum.org/v1/gonum/internal/asm/f64"
|
||||||
)
|
)
|
||||||
|
|
||||||
var _ blas.Float64Level1 = Implementation{}
|
var _ blas.Float64Level1 = Implementation{}
|
||||||
|
|
@ -270,13 +270,7 @@ func (Implementation) Daxpy(n int, alpha float64, x []float64, incX int, y []flo
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if incX == 1 && incY == 1 {
|
if incX == 1 && incY == 1 {
|
||||||
if len(x) < n {
|
f64.AxpyUnitary(alpha, x[:n], y[:n])
|
||||||
panic(badLenX)
|
|
||||||
}
|
|
||||||
if len(y) < n {
|
|
||||||
panic(badLenY)
|
|
||||||
}
|
|
||||||
f64.AxpyUnitaryTo(y, alpha, x[:n], y)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
var ix, iy int
|
var ix, iy int
|
||||||
|
|
@ -286,12 +280,6 @@ func (Implementation) Daxpy(n int, alpha float64, x []float64, incX int, y []flo
|
||||||
if incY < 0 {
|
if incY < 0 {
|
||||||
iy = (-n + 1) * incY
|
iy = (-n + 1) * incY
|
||||||
}
|
}
|
||||||
if ix >= len(x) || ix+(n-1)*incX >= len(x) {
|
|
||||||
panic(badLenX)
|
|
||||||
}
|
|
||||||
if iy >= len(y) || iy+(n-1)*incY >= len(y) {
|
|
||||||
panic(badLenY)
|
|
||||||
}
|
|
||||||
f64.AxpyInc(alpha, x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
|
f64.AxpyInc(alpha, x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -565,7 +553,6 @@ func (Implementation) Drotm(n int, x []float64, incX int, y []float64, incY int,
|
||||||
ix += incX
|
ix += incX
|
||||||
iy += incY
|
iy += incY
|
||||||
}
|
}
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Dscal scales x by alpha.
|
// Dscal scales x by alpha.
|
||||||
|
|
@ -604,7 +591,5 @@ func (Implementation) Dscal(n int, alpha float64, x []float64, incX int) {
|
||||||
f64.ScalUnitary(alpha, x[:n])
|
f64.ScalUnitary(alpha, x[:n])
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
for ix := 0; ix < n*incX; ix += incX {
|
f64.ScalInc(alpha, x, uintptr(n), uintptr(incX))
|
||||||
x[ix] *= alpha
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
@ -1,11 +1,11 @@
|
||||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package native
|
package gonum
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/gonum/internal/asm/f64"
|
"gonum.org/v1/gonum/internal/asm/f64"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Ddot computes the dot product of the two vectors
|
// Ddot computes the dot product of the two vectors
|
||||||
|
|
@ -1,16 +1,16 @@
|
||||||
// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
|
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
|
||||||
|
|
||||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package native
|
package gonum
|
||||||
|
|
||||||
import (
|
import (
|
||||||
math "github.com/gonum/blas/native/internal/math32"
|
math "gonum.org/v1/gonum/internal/math32"
|
||||||
|
|
||||||
"github.com/gonum/blas"
|
"gonum.org/v1/gonum/blas"
|
||||||
"github.com/gonum/internal/asm/f32"
|
"gonum.org/v1/gonum/internal/asm/f32"
|
||||||
)
|
)
|
||||||
|
|
||||||
var _ blas.Float32Level1 = Implementation{}
|
var _ blas.Float32Level1 = Implementation{}
|
||||||
|
|
@ -284,13 +284,7 @@ func (Implementation) Saxpy(n int, alpha float32, x []float32, incX int, y []flo
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if incX == 1 && incY == 1 {
|
if incX == 1 && incY == 1 {
|
||||||
if len(x) < n {
|
f32.AxpyUnitary(alpha, x[:n], y[:n])
|
||||||
panic(badLenX)
|
|
||||||
}
|
|
||||||
if len(y) < n {
|
|
||||||
panic(badLenY)
|
|
||||||
}
|
|
||||||
f32.AxpyUnitaryTo(y, alpha, x[:n], y)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
var ix, iy int
|
var ix, iy int
|
||||||
|
|
@ -300,12 +294,6 @@ func (Implementation) Saxpy(n int, alpha float32, x []float32, incX int, y []flo
|
||||||
if incY < 0 {
|
if incY < 0 {
|
||||||
iy = (-n + 1) * incY
|
iy = (-n + 1) * incY
|
||||||
}
|
}
|
||||||
if ix >= len(x) || ix+(n-1)*incX >= len(x) {
|
|
||||||
panic(badLenX)
|
|
||||||
}
|
|
||||||
if iy >= len(y) || iy+(n-1)*incY >= len(y) {
|
|
||||||
panic(badLenY)
|
|
||||||
}
|
|
||||||
f32.AxpyInc(alpha, x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
|
f32.AxpyInc(alpha, x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -587,7 +575,6 @@ func (Implementation) Srotm(n int, x []float32, incX int, y []float32, incY int,
|
||||||
ix += incX
|
ix += incX
|
||||||
iy += incY
|
iy += incY
|
||||||
}
|
}
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sscal scales x by alpha.
|
// Sscal scales x by alpha.
|
||||||
|
|
@ -628,7 +615,5 @@ func (Implementation) Sscal(n int, alpha float32, x []float32, incX int) {
|
||||||
f32.ScalUnitary(alpha, x[:n])
|
f32.ScalUnitary(alpha, x[:n])
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
for ix := 0; ix < n*incX; ix += incX {
|
f32.ScalInc(alpha, x, uintptr(n), uintptr(incX))
|
||||||
x[ix] *= alpha
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
@ -1,13 +1,13 @@
|
||||||
// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
|
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
|
||||||
|
|
||||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package native
|
package gonum
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/gonum/internal/asm/f32"
|
"gonum.org/v1/gonum/internal/asm/f32"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Dsdot computes the dot product of the two vectors
|
// Dsdot computes the dot product of the two vectors
|
||||||
|
|
@ -1,13 +1,13 @@
|
||||||
// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
|
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
|
||||||
|
|
||||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package native
|
package gonum
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/gonum/internal/asm/f32"
|
"gonum.org/v1/gonum/internal/asm/f32"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Sdot computes the dot product of the two vectors
|
// Sdot computes the dot product of the two vectors
|
||||||
|
|
@ -1,13 +1,13 @@
|
||||||
// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
|
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
|
||||||
|
|
||||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package native
|
package gonum
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/gonum/internal/asm/f32"
|
"gonum.org/v1/gonum/internal/asm/f32"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Sdsdot computes the dot product of the two vectors plus a constant
|
// Sdsdot computes the dot product of the two vectors plus a constant
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -1,20 +1,20 @@
|
||||||
// Copyright ©2014 The gonum Authors. All rights reserved.
|
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package native
|
package gonum
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/gonum/blas"
|
"gonum.org/v1/gonum/blas"
|
||||||
"github.com/gonum/internal/asm/f64"
|
"gonum.org/v1/gonum/internal/asm/f64"
|
||||||
)
|
)
|
||||||
|
|
||||||
var _ blas.Float64Level2 = Implementation{}
|
var _ blas.Float64Level2 = Implementation{}
|
||||||
|
|
||||||
// Dgemv computes
|
// Dgemv computes
|
||||||
// y = alpha * a * x + beta * y if tA = blas.NoTrans
|
// y = alpha * A * x + beta * y if tA = blas.NoTrans
|
||||||
// y = alpha * A^T * x + beta * y if tA = blas.Trans or blas.ConjTrans
|
// y = alpha * A^T * x + beta * y if tA = blas.Trans or blas.ConjTrans
|
||||||
// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.
|
// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars.
|
||||||
func (Implementation) Dgemv(tA blas.Transpose, m, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) {
|
func (Implementation) Dgemv(tA blas.Transpose, m, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) {
|
||||||
if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
|
if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
|
||||||
panic(badTranspose)
|
panic(badTranspose)
|
||||||
|
|
@ -167,21 +167,14 @@ func (Implementation) Dger(m, n int, alpha float64, x []float64, incX int, y []f
|
||||||
x = x[:m]
|
x = x[:m]
|
||||||
y = y[:n]
|
y = y[:n]
|
||||||
for i, xv := range x {
|
for i, xv := range x {
|
||||||
tmp := alpha * xv
|
f64.AxpyUnitary(alpha*xv, y, a[i*lda:i*lda+n])
|
||||||
if tmp != 0 {
|
|
||||||
atmp := a[i*lda : i*lda+n]
|
|
||||||
f64.AxpyUnitaryTo(atmp, tmp, y, atmp)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
ix := kx
|
ix := kx
|
||||||
for i := 0; i < m; i++ {
|
for i := 0; i < m; i++ {
|
||||||
tmp := alpha * x[ix]
|
f64.AxpyInc(alpha*x[ix], y, a[i*lda:i*lda+n], uintptr(n), uintptr(incY), 1, uintptr(ky), 0)
|
||||||
if tmp != 0 {
|
|
||||||
f64.AxpyInc(tmp, y, a[i*lda:i*lda+n], uintptr(n), uintptr(incY), 1, uintptr(ky), 0)
|
|
||||||
}
|
|
||||||
ix += incX
|
ix += incX
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -230,7 +223,7 @@ func (Implementation) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64,
|
||||||
if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) {
|
if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) {
|
||||||
panic(badY)
|
panic(badY)
|
||||||
}
|
}
|
||||||
if lda*(m-1)+kL+kU+1 > len(a) || lda < kL+kU+1 {
|
if lda*(min(m, n+kL)-1)+kL+kU+1 > len(a) || lda < kL+kU+1 {
|
||||||
panic(badLdA)
|
panic(badLdA)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -269,7 +262,7 @@ func (Implementation) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64,
|
||||||
if tA == blas.NoTrans {
|
if tA == blas.NoTrans {
|
||||||
iy := ky
|
iy := ky
|
||||||
if incX == 1 {
|
if incX == 1 {
|
||||||
for i := 0; i < m; i++ {
|
for i := 0; i < min(m, n+kL); i++ {
|
||||||
l := max(0, kL-i)
|
l := max(0, kL-i)
|
||||||
u := min(nCol, ld+kL-i)
|
u := min(nCol, ld+kL-i)
|
||||||
off := max(0, i-kL)
|
off := max(0, i-kL)
|
||||||
|
|
@ -284,7 +277,7 @@ func (Implementation) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64,
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
for i := 0; i < m; i++ {
|
for i := 0; i < min(m, n+kL); i++ {
|
||||||
l := max(0, kL-i)
|
l := max(0, kL-i)
|
||||||
u := min(nCol, ld+kL-i)
|
u := min(nCol, ld+kL-i)
|
||||||
off := max(0, i-kL)
|
off := max(0, i-kL)
|
||||||
|
|
@ -301,7 +294,7 @@ func (Implementation) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64,
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if incX == 1 {
|
if incX == 1 {
|
||||||
for i := 0; i < m; i++ {
|
for i := 0; i < min(m, n+kL); i++ {
|
||||||
l := max(0, kL-i)
|
l := max(0, kL-i)
|
||||||
u := min(nCol, ld+kL-i)
|
u := min(nCol, ld+kL-i)
|
||||||
off := max(0, i-kL)
|
off := max(0, i-kL)
|
||||||
|
|
@ -316,7 +309,7 @@ func (Implementation) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64,
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
ix := kx
|
ix := kx
|
||||||
for i := 0; i < m; i++ {
|
for i := 0; i < min(m, n+kL); i++ {
|
||||||
l := max(0, kL-i)
|
l := max(0, kL-i)
|
||||||
u := min(nCol, ld+kL-i)
|
u := min(nCol, ld+kL-i)
|
||||||
off := max(0, i-kL)
|
off := max(0, i-kL)
|
||||||
|
|
@ -1530,7 +1523,6 @@ func (Implementation) Dsbmv(ul blas.Uplo, n, k int, alpha float64, a []float64,
|
||||||
ix += incX
|
ix += incX
|
||||||
iy += incY
|
iy += incY
|
||||||
}
|
}
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Dsyr performs the rank-one update
|
// Dsyr performs the rank-one update
|
||||||
|
|
@ -1718,7 +1710,6 @@ func (Implementation) Dsyr2(ul blas.Uplo, n int, alpha float64, x []float64, inc
|
||||||
ix += incX
|
ix += incX
|
||||||
iy += incY
|
iy += incY
|
||||||
}
|
}
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Dtpsv solves
|
// Dtpsv solves
|
||||||
|
|
@ -1,22 +1,22 @@
|
||||||
// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
|
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
|
||||||
|
|
||||||
// Copyright ©2014 The gonum Authors. All rights reserved.
|
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package native
|
package gonum
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/gonum/blas"
|
"gonum.org/v1/gonum/blas"
|
||||||
"github.com/gonum/internal/asm/f32"
|
"gonum.org/v1/gonum/internal/asm/f32"
|
||||||
)
|
)
|
||||||
|
|
||||||
var _ blas.Float32Level2 = Implementation{}
|
var _ blas.Float32Level2 = Implementation{}
|
||||||
|
|
||||||
// Sgemv computes
|
// Sgemv computes
|
||||||
// y = alpha * a * x + beta * y if tA = blas.NoTrans
|
// y = alpha * A * x + beta * y if tA = blas.NoTrans
|
||||||
// y = alpha * A^T * x + beta * y if tA = blas.Trans or blas.ConjTrans
|
// y = alpha * A^T * x + beta * y if tA = blas.Trans or blas.ConjTrans
|
||||||
// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.
|
// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars.
|
||||||
//
|
//
|
||||||
// Float32 implementations are autogenerated and not directly tested.
|
// Float32 implementations are autogenerated and not directly tested.
|
||||||
func (Implementation) Sgemv(tA blas.Transpose, m, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) {
|
func (Implementation) Sgemv(tA blas.Transpose, m, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) {
|
||||||
|
|
@ -173,21 +173,14 @@ func (Implementation) Sger(m, n int, alpha float32, x []float32, incX int, y []f
|
||||||
x = x[:m]
|
x = x[:m]
|
||||||
y = y[:n]
|
y = y[:n]
|
||||||
for i, xv := range x {
|
for i, xv := range x {
|
||||||
tmp := alpha * xv
|
f32.AxpyUnitary(alpha*xv, y, a[i*lda:i*lda+n])
|
||||||
if tmp != 0 {
|
|
||||||
atmp := a[i*lda : i*lda+n]
|
|
||||||
f32.AxpyUnitaryTo(atmp, tmp, y, atmp)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
ix := kx
|
ix := kx
|
||||||
for i := 0; i < m; i++ {
|
for i := 0; i < m; i++ {
|
||||||
tmp := alpha * x[ix]
|
f32.AxpyInc(alpha*x[ix], y, a[i*lda:i*lda+n], uintptr(n), uintptr(incY), 1, uintptr(ky), 0)
|
||||||
if tmp != 0 {
|
|
||||||
f32.AxpyInc(tmp, y, a[i*lda:i*lda+n], uintptr(n), uintptr(incY), 1, uintptr(ky), 0)
|
|
||||||
}
|
|
||||||
ix += incX
|
ix += incX
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -238,7 +231,7 @@ func (Implementation) Sgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float32,
|
||||||
if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) {
|
if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) {
|
||||||
panic(badY)
|
panic(badY)
|
||||||
}
|
}
|
||||||
if lda*(m-1)+kL+kU+1 > len(a) || lda < kL+kU+1 {
|
if lda*(min(m, n+kL)-1)+kL+kU+1 > len(a) || lda < kL+kU+1 {
|
||||||
panic(badLdA)
|
panic(badLdA)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -277,7 +270,7 @@ func (Implementation) Sgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float32,
|
||||||
if tA == blas.NoTrans {
|
if tA == blas.NoTrans {
|
||||||
iy := ky
|
iy := ky
|
||||||
if incX == 1 {
|
if incX == 1 {
|
||||||
for i := 0; i < m; i++ {
|
for i := 0; i < min(m, n+kL); i++ {
|
||||||
l := max(0, kL-i)
|
l := max(0, kL-i)
|
||||||
u := min(nCol, ld+kL-i)
|
u := min(nCol, ld+kL-i)
|
||||||
off := max(0, i-kL)
|
off := max(0, i-kL)
|
||||||
|
|
@ -292,7 +285,7 @@ func (Implementation) Sgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float32,
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
for i := 0; i < m; i++ {
|
for i := 0; i < min(m, n+kL); i++ {
|
||||||
l := max(0, kL-i)
|
l := max(0, kL-i)
|
||||||
u := min(nCol, ld+kL-i)
|
u := min(nCol, ld+kL-i)
|
||||||
off := max(0, i-kL)
|
off := max(0, i-kL)
|
||||||
|
|
@ -309,7 +302,7 @@ func (Implementation) Sgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float32,
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if incX == 1 {
|
if incX == 1 {
|
||||||
for i := 0; i < m; i++ {
|
for i := 0; i < min(m, n+kL); i++ {
|
||||||
l := max(0, kL-i)
|
l := max(0, kL-i)
|
||||||
u := min(nCol, ld+kL-i)
|
u := min(nCol, ld+kL-i)
|
||||||
off := max(0, i-kL)
|
off := max(0, i-kL)
|
||||||
|
|
@ -324,7 +317,7 @@ func (Implementation) Sgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float32,
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
ix := kx
|
ix := kx
|
||||||
for i := 0; i < m; i++ {
|
for i := 0; i < min(m, n+kL); i++ {
|
||||||
l := max(0, kL-i)
|
l := max(0, kL-i)
|
||||||
u := min(nCol, ld+kL-i)
|
u := min(nCol, ld+kL-i)
|
||||||
off := max(0, i-kL)
|
off := max(0, i-kL)
|
||||||
|
|
@ -1552,7 +1545,6 @@ func (Implementation) Ssbmv(ul blas.Uplo, n, k int, alpha float32, a []float32,
|
||||||
ix += incX
|
ix += incX
|
||||||
iy += incY
|
iy += incY
|
||||||
}
|
}
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ssyr performs the rank-one update
|
// Ssyr performs the rank-one update
|
||||||
|
|
@ -1744,7 +1736,6 @@ func (Implementation) Ssyr2(ul blas.Uplo, n int, alpha float32, x []float32, inc
|
||||||
ix += incX
|
ix += incX
|
||||||
iy += incY
|
iy += incY
|
||||||
}
|
}
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Stpsv solves
|
// Stpsv solves
|
||||||
|
|
@ -1,12 +1,12 @@
|
||||||
// Copyright ©2014 The gonum Authors. All rights reserved.
|
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package native
|
package gonum
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/gonum/blas"
|
"gonum.org/v1/gonum/blas"
|
||||||
"github.com/gonum/internal/asm/f64"
|
"gonum.org/v1/gonum/internal/asm/f64"
|
||||||
)
|
)
|
||||||
|
|
||||||
var _ blas.Float64Level3 = Implementation{}
|
var _ blas.Float64Level3 = Implementation{}
|
||||||
|
|
@ -1,14 +1,14 @@
|
||||||
// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
|
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
|
||||||
|
|
||||||
// Copyright ©2014 The gonum Authors. All rights reserved.
|
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package native
|
package gonum
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/gonum/blas"
|
"gonum.org/v1/gonum/blas"
|
||||||
"github.com/gonum/internal/asm/f32"
|
"gonum.org/v1/gonum/internal/asm/f32"
|
||||||
)
|
)
|
||||||
|
|
||||||
var _ blas.Float32Level3 = Implementation{}
|
var _ blas.Float32Level3 = Implementation{}
|
||||||
|
|
@ -1,17 +1,17 @@
|
||||||
// Code generated by "go generate github.com/gonum/blas/native"; DO NOT EDIT.
|
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
|
||||||
|
|
||||||
// Copyright ©2014 The gonum Authors. All rights reserved.
|
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package native
|
package gonum
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"runtime"
|
"runtime"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"github.com/gonum/blas"
|
"gonum.org/v1/gonum/blas"
|
||||||
"github.com/gonum/internal/asm/f32"
|
"gonum.org/v1/gonum/internal/asm/f32"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Sgemm computes
|
// Sgemm computes
|
||||||
|
|
@ -29,17 +29,17 @@ func (Implementation) Sgemm(tA, tB blas.Transpose, m, n, k int, alpha float32, a
|
||||||
}
|
}
|
||||||
aTrans := tA == blas.Trans || tA == blas.ConjTrans
|
aTrans := tA == blas.Trans || tA == blas.ConjTrans
|
||||||
if aTrans {
|
if aTrans {
|
||||||
checkMatrix32(k, m, a, lda)
|
checkSMatrix('a', k, m, a, lda)
|
||||||
} else {
|
} else {
|
||||||
checkMatrix32(m, k, a, lda)
|
checkSMatrix('a', m, k, a, lda)
|
||||||
}
|
}
|
||||||
bTrans := tB == blas.Trans || tB == blas.ConjTrans
|
bTrans := tB == blas.Trans || tB == blas.ConjTrans
|
||||||
if bTrans {
|
if bTrans {
|
||||||
checkMatrix32(n, k, b, ldb)
|
checkSMatrix('b', n, k, b, ldb)
|
||||||
} else {
|
} else {
|
||||||
checkMatrix32(k, n, b, ldb)
|
checkSMatrix('b', k, n, b, ldb)
|
||||||
}
|
}
|
||||||
checkMatrix32(m, n, c, ldc)
|
checkSMatrix('c', m, n, c, ldc)
|
||||||
|
|
||||||
// scale c
|
// scale c
|
||||||
if beta != 1 {
|
if beta != 1 {
|
||||||
|
|
@ -125,7 +125,7 @@ func sgemmParallel(aTrans, bTrans bool, m, n, k int, a []float32, lda int, b []f
|
||||||
go func() {
|
go func() {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
// Make local copies of otherwise global variables to reduce shared memory.
|
// Make local copies of otherwise global variables to reduce shared memory.
|
||||||
// This has a noticable effect on benchmarks in some cases.
|
// This has a noticeable effect on benchmarks in some cases.
|
||||||
alpha := alpha
|
alpha := alpha
|
||||||
aTrans := aTrans
|
aTrans := aTrans
|
||||||
bTrans := bTrans
|
bTrans := bTrans
|
||||||
|
|
@ -263,18 +263,3 @@ func sgemmSerialTransTrans(m, n, k int, a []float32, lda int, b []float32, ldb i
|
||||||
func sliceView32(a []float32, lda, i, j, r, c int) []float32 {
|
func sliceView32(a []float32, lda, i, j, r, c int) []float32 {
|
||||||
return a[i*lda+j : (i+r-1)*lda+j+c]
|
return a[i*lda+j : (i+r-1)*lda+j+c]
|
||||||
}
|
}
|
||||||
|
|
||||||
func checkMatrix32(m, n int, a []float32, lda int) {
|
|
||||||
if m < 0 {
|
|
||||||
panic("blas: rows < 0")
|
|
||||||
}
|
|
||||||
if n < 0 {
|
|
||||||
panic("blas: cols < 0")
|
|
||||||
}
|
|
||||||
if lda < n {
|
|
||||||
panic("blas: illegal stride")
|
|
||||||
}
|
|
||||||
if len(a) < (m-1)*lda+n {
|
|
||||||
panic("blas: insufficient matrix slice length")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -0,0 +1,11 @@
|
||||||
|
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// Package floats provides a set of helper routines for dealing with slices
|
||||||
|
// of float64. The functions avoid allocations to allow for use within tight
|
||||||
|
// loops without garbage collection overhead.
|
||||||
|
//
|
||||||
|
// The convention used is that when a slice is being modified in place, it has
|
||||||
|
// the name dst.
|
||||||
|
package floats // import "gonum.org/v1/gonum/floats"
|
||||||
50
vendor/github.com/gonum/floats/floats.go → vendor/gonum.org/v1/gonum/floats/floats.go
generated
vendored
50
vendor/github.com/gonum/floats/floats.go → vendor/gonum.org/v1/gonum/floats/floats.go
generated
vendored
|
|
@ -2,20 +2,15 @@
|
||||||
// Use of this code is governed by a BSD-style
|
// Use of this code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file
|
// license that can be found in the LICENSE file
|
||||||
|
|
||||||
// Package floats provides a set of helper routines for dealing with slices
|
|
||||||
// of float64. The functions avoid allocations to allow for use within tight
|
|
||||||
// loops without garbage collection overhead.
|
|
||||||
//
|
|
||||||
// The convention used is that when a slice is being modified in place, it has
|
|
||||||
// the name dst.
|
|
||||||
package floats
|
package floats
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"math"
|
"math"
|
||||||
"sort"
|
"sort"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
"github.com/gonum/internal/asm/f64"
|
"gonum.org/v1/gonum/internal/asm/f64"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Add adds, element-wise, the elements of s and dst, and stores in dst.
|
// Add adds, element-wise, the elements of s and dst, and stores in dst.
|
||||||
|
|
@ -89,8 +84,8 @@ func (a argsort) Swap(i, j int) {
|
||||||
a.inds[i], a.inds[j] = a.inds[j], a.inds[i]
|
a.inds[i], a.inds[j] = a.inds[j], a.inds[i]
|
||||||
}
|
}
|
||||||
|
|
||||||
// Argsort sorts the elements of s while tracking their original order.
|
// Argsort sorts the elements of dst while tracking their original order.
|
||||||
// At the conclusion of Argsort, s will contain the original elements of s
|
// At the conclusion of Argsort, dst will contain the original elements of dst
|
||||||
// but sorted in increasing order, and inds will contain the original position
|
// but sorted in increasing order, and inds will contain the original position
|
||||||
// of the elements in the slice such that dst[i] = origDst[inds[i]].
|
// of the elements in the slice such that dst[i] = origDst[inds[i]].
|
||||||
// It panics if the lengths of dst and inds do not match.
|
// It panics if the lengths of dst and inds do not match.
|
||||||
|
|
@ -342,7 +337,6 @@ func EqualLengths(slices ...[]float64) bool {
|
||||||
// all of the found elements will be returned along with an error.
|
// all of the found elements will be returned along with an error.
|
||||||
// At the return of the function, the input inds will be in an undetermined state.
|
// At the return of the function, the input inds will be in an undetermined state.
|
||||||
func Find(inds []int, f func(float64) bool, s []float64, k int) ([]int, error) {
|
func Find(inds []int, f func(float64) bool, s []float64, k int) ([]int, error) {
|
||||||
|
|
||||||
// inds is also returned to allow for calling with nil
|
// inds is also returned to allow for calling with nil
|
||||||
|
|
||||||
// Reslice inds to have zero length
|
// Reslice inds to have zero length
|
||||||
|
|
@ -495,6 +489,29 @@ func MulTo(dst, s, t []float64) []float64 {
|
||||||
return dst
|
return dst
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
nanBits = 0x7ff8000000000000
|
||||||
|
nanMask = 0xfff8000000000000
|
||||||
|
)
|
||||||
|
|
||||||
|
// NaNWith returns an IEEE 754 "quiet not-a-number" value with the
|
||||||
|
// payload specified in the low 51 bits of payload.
|
||||||
|
// The NaN returned by math.NaN has a bit pattern equal to NaNWith(1).
|
||||||
|
func NaNWith(payload uint64) float64 {
|
||||||
|
return math.Float64frombits(nanBits | (payload &^ nanMask))
|
||||||
|
}
|
||||||
|
|
||||||
|
// NaNPayload returns the lowest 51 bits payload of an IEEE 754 "quiet
|
||||||
|
// not-a-number". For values of f other than quiet-NaN, NaNPayload
|
||||||
|
// returns zero and false.
|
||||||
|
func NaNPayload(f float64) (payload uint64, ok bool) {
|
||||||
|
b := math.Float64bits(f)
|
||||||
|
if b&nanBits != nanBits {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
return b &^ nanMask, true
|
||||||
|
}
|
||||||
|
|
||||||
// Nearest returns the index of the element in s
|
// Nearest returns the index of the element in s
|
||||||
// whose value is nearest to v. If several such
|
// whose value is nearest to v. If several such
|
||||||
// elements exist, the lowest index is returned.
|
// elements exist, the lowest index is returned.
|
||||||
|
|
@ -567,6 +584,19 @@ func Norm(s []float64, L float64) float64 {
|
||||||
return math.Pow(norm, 1/L)
|
return math.Pow(norm, 1/L)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ParseWithNA converts the string s to a float64 in v.
|
||||||
|
// If s equals missing, w is returned as 0, otherwise 1.
|
||||||
|
func ParseWithNA(s, missing string) (v, w float64, err error) {
|
||||||
|
if s == missing {
|
||||||
|
return 0, 0, nil
|
||||||
|
}
|
||||||
|
v, err = strconv.ParseFloat(s, 64)
|
||||||
|
if err == nil {
|
||||||
|
w = 1
|
||||||
|
}
|
||||||
|
return v, w, err
|
||||||
|
}
|
||||||
|
|
||||||
// Prod returns the product of the elements of the slice.
|
// Prod returns the product of the elements of the slice.
|
||||||
// Returns 1 if len(s) = 0.
|
// Returns 1 if len(s) = 0.
|
||||||
func Prod(s []float64) float64 {
|
func Prod(s []float64) float64 {
|
||||||
|
|
@ -0,0 +1,134 @@
|
||||||
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
//+build !noasm,!appengine
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
// MOVDDUP X2, X3
|
||||||
|
#define MOVDDUP_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xDA
|
||||||
|
// MOVDDUP X4, X5
|
||||||
|
#define MOVDDUP_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xEC
|
||||||
|
// MOVDDUP X6, X7
|
||||||
|
#define MOVDDUP_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xFE
|
||||||
|
// MOVDDUP X8, X9
|
||||||
|
#define MOVDDUP_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC8
|
||||||
|
|
||||||
|
// ADDSUBPD X2, X3
|
||||||
|
#define ADDSUBPD_X2_X3 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA
|
||||||
|
// ADDSUBPD X4, X5
|
||||||
|
#define ADDSUBPD_X4_X5 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC
|
||||||
|
// ADDSUBPD X6, X7
|
||||||
|
#define ADDSUBPD_X6_X7 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE
|
||||||
|
// ADDSUBPD X8, X9
|
||||||
|
#define ADDSUBPD_X8_X9 BYTE $0x66; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8
|
||||||
|
|
||||||
|
// func AxpyInc(alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr)
|
||||||
|
TEXT ·AxpyInc(SB), NOSPLIT, $0
|
||||||
|
MOVQ x_base+16(FP), SI // SI = &x
|
||||||
|
MOVQ y_base+40(FP), DI // DI = &y
|
||||||
|
MOVQ n+64(FP), CX // CX = n
|
||||||
|
CMPQ CX, $0 // if n==0 { return }
|
||||||
|
JE axpyi_end
|
||||||
|
MOVQ ix+88(FP), R8 // R8 = ix // Load the first index
|
||||||
|
SHLQ $4, R8 // R8 *= sizeof(complex128)
|
||||||
|
MOVQ iy+96(FP), R9 // R9 = iy
|
||||||
|
SHLQ $4, R9 // R9 *= sizeof(complex128)
|
||||||
|
LEAQ (SI)(R8*1), SI // SI = &(x[ix])
|
||||||
|
LEAQ (DI)(R9*1), DI // DI = &(y[iy])
|
||||||
|
MOVQ DI, DX // DX = DI // Separate Read/Write pointers
|
||||||
|
MOVQ incX+72(FP), R8 // R8 = incX
|
||||||
|
SHLQ $4, R8 // R8 *= sizeof(complex128)
|
||||||
|
MOVQ incY+80(FP), R9 // R9 = iy
|
||||||
|
SHLQ $4, R9 // R9 *= sizeof(complex128)
|
||||||
|
MOVUPS alpha+0(FP), X0 // X0 = { imag(a), real(a) }
|
||||||
|
MOVAPS X0, X1
|
||||||
|
SHUFPD $0x1, X1, X1 // X1 = { real(a), imag(a) }
|
||||||
|
MOVAPS X0, X10 // Copy X0 and X1 for pipelining
|
||||||
|
MOVAPS X1, X11
|
||||||
|
MOVQ CX, BX
|
||||||
|
ANDQ $3, CX // CX = n % 4
|
||||||
|
SHRQ $2, BX // BX = floor( n / 4 )
|
||||||
|
JZ axpyi_tail // if BX == 0 { goto axpyi_tail }
|
||||||
|
|
||||||
|
axpyi_loop: // do {
|
||||||
|
MOVUPS (SI), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||||
|
MOVUPS (SI)(R8*1), X4
|
||||||
|
LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2])
|
||||||
|
MOVUPS (SI), X6
|
||||||
|
MOVUPS (SI)(R8*1), X8
|
||||||
|
|
||||||
|
// X_(i+1) = { real(x[i], real(x[i]) }
|
||||||
|
MOVDDUP_X2_X3
|
||||||
|
MOVDDUP_X4_X5
|
||||||
|
MOVDDUP_X6_X7
|
||||||
|
MOVDDUP_X8_X9
|
||||||
|
|
||||||
|
// X_i = { imag(x[i]), imag(x[i]) }
|
||||||
|
SHUFPD $0x3, X2, X2
|
||||||
|
SHUFPD $0x3, X4, X4
|
||||||
|
SHUFPD $0x3, X6, X6
|
||||||
|
SHUFPD $0x3, X8, X8
|
||||||
|
|
||||||
|
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||||
|
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||||
|
MULPD X1, X2
|
||||||
|
MULPD X0, X3
|
||||||
|
MULPD X11, X4
|
||||||
|
MULPD X10, X5
|
||||||
|
MULPD X1, X6
|
||||||
|
MULPD X0, X7
|
||||||
|
MULPD X11, X8
|
||||||
|
MULPD X10, X9
|
||||||
|
|
||||||
|
// X_(i+1) = {
|
||||||
|
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||||
|
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||||
|
// }
|
||||||
|
ADDSUBPD_X2_X3
|
||||||
|
ADDSUBPD_X4_X5
|
||||||
|
ADDSUBPD_X6_X7
|
||||||
|
ADDSUBPD_X8_X9
|
||||||
|
|
||||||
|
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
|
||||||
|
ADDPD (DX), X3
|
||||||
|
ADDPD (DX)(R9*1), X5
|
||||||
|
LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2])
|
||||||
|
ADDPD (DX), X7
|
||||||
|
ADDPD (DX)(R9*1), X9
|
||||||
|
MOVUPS X3, (DI) // dst[i] = X_(i+1)
|
||||||
|
MOVUPS X5, (DI)(R9*1)
|
||||||
|
LEAQ (DI)(R9*2), DI
|
||||||
|
MOVUPS X7, (DI)
|
||||||
|
MOVUPS X9, (DI)(R9*1)
|
||||||
|
LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2])
|
||||||
|
LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2])
|
||||||
|
LEAQ (DI)(R9*2), DI // DI = &(DI[incY*2])
|
||||||
|
DECQ BX
|
||||||
|
JNZ axpyi_loop // } while --BX > 0
|
||||||
|
CMPQ CX, $0 // if CX == 0 { return }
|
||||||
|
JE axpyi_end
|
||||||
|
|
||||||
|
axpyi_tail: // do {
|
||||||
|
MOVUPS (SI), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||||
|
MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||||
|
SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||||
|
MULPD X1, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||||
|
MULPD X0, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||||
|
|
||||||
|
// X_(i+1) = {
|
||||||
|
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||||
|
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||||
|
// }
|
||||||
|
ADDSUBPD_X2_X3
|
||||||
|
|
||||||
|
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
|
||||||
|
ADDPD (DI), X3
|
||||||
|
MOVUPS X3, (DI) // y[i] = X_i
|
||||||
|
ADDQ R8, SI // SI = &(SI[incX])
|
||||||
|
ADDQ R9, DI // DI = &(DI[incY])
|
||||||
|
LOOP axpyi_tail // } while --CX > 0
|
||||||
|
|
||||||
|
axpyi_end:
|
||||||
|
RET
|
||||||
|
|
@ -0,0 +1,141 @@
|
||||||
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
//+build !noasm,!appengine
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
// MOVDDUP X2, X3
|
||||||
|
#define MOVDDUP_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xDA
|
||||||
|
// MOVDDUP X4, X5
|
||||||
|
#define MOVDDUP_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xEC
|
||||||
|
// MOVDDUP X6, X7
|
||||||
|
#define MOVDDUP_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xFE
|
||||||
|
// MOVDDUP X8, X9
|
||||||
|
#define MOVDDUP_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC8
|
||||||
|
|
||||||
|
// ADDSUBPD X2, X3
|
||||||
|
#define ADDSUBPD_X2_X3 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA
|
||||||
|
// ADDSUBPD X4, X5
|
||||||
|
#define ADDSUBPD_X4_X5 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC
|
||||||
|
// ADDSUBPD X6, X7
|
||||||
|
#define ADDSUBPD_X6_X7 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE
|
||||||
|
// ADDSUBPD X8, X9
|
||||||
|
#define ADDSUBPD_X8_X9 BYTE $0x66; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8
|
||||||
|
|
||||||
|
// func AxpyIncTo(dst []complex128, incDst, idst uintptr, alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr)
|
||||||
|
TEXT ·AxpyIncTo(SB), NOSPLIT, $0
|
||||||
|
MOVQ dst_base+0(FP), DI // DI = &dst
|
||||||
|
MOVQ x_base+56(FP), SI // SI = &x
|
||||||
|
MOVQ y_base+80(FP), DX // DX = &y
|
||||||
|
MOVQ n+104(FP), CX // CX = n
|
||||||
|
CMPQ CX, $0 // if n==0 { return }
|
||||||
|
JE axpyi_end
|
||||||
|
MOVQ ix+128(FP), R8 // R8 = ix // Load the first index
|
||||||
|
SHLQ $4, R8 // R8 *= sizeof(complex128)
|
||||||
|
MOVQ iy+136(FP), R9 // R9 = iy
|
||||||
|
SHLQ $4, R9 // R9 *= sizeof(complex128)
|
||||||
|
MOVQ idst+32(FP), R10 // R10 = idst
|
||||||
|
SHLQ $4, R10 // R10 *= sizeof(complex128)
|
||||||
|
LEAQ (SI)(R8*1), SI // SI = &(x[ix])
|
||||||
|
LEAQ (DX)(R9*1), DX // DX = &(y[iy])
|
||||||
|
LEAQ (DI)(R10*1), DI // DI = &(dst[idst])
|
||||||
|
MOVQ incX+112(FP), R8 // R8 = incX
|
||||||
|
SHLQ $4, R8 // R8 *= sizeof(complex128)
|
||||||
|
MOVQ incY+120(FP), R9 // R9 = incY
|
||||||
|
SHLQ $4, R9 // R9 *= sizeof(complex128)
|
||||||
|
MOVQ incDst+24(FP), R10 // R10 = incDst
|
||||||
|
SHLQ $4, R10 // R10 *= sizeof(complex128)
|
||||||
|
MOVUPS alpha+40(FP), X0 // X0 = { imag(a), real(a) }
|
||||||
|
MOVAPS X0, X1
|
||||||
|
SHUFPD $0x1, X1, X1 // X1 = { real(a), imag(a) }
|
||||||
|
MOVAPS X0, X10 // Copy X0 and X1 for pipelining
|
||||||
|
MOVAPS X1, X11
|
||||||
|
MOVQ CX, BX
|
||||||
|
ANDQ $3, CX // CX = n % 4
|
||||||
|
SHRQ $2, BX // BX = floor( n / 4 )
|
||||||
|
JZ axpyi_tail // if BX == 0 { goto axpyi_tail }
|
||||||
|
|
||||||
|
axpyi_loop: // do {
|
||||||
|
MOVUPS (SI), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||||
|
MOVUPS (SI)(R8*1), X4
|
||||||
|
LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2])
|
||||||
|
|
||||||
|
MOVUPS (SI), X6
|
||||||
|
MOVUPS (SI)(R8*1), X8
|
||||||
|
|
||||||
|
// X_(i+1) = { real(x[i], real(x[i]) }
|
||||||
|
MOVDDUP_X2_X3
|
||||||
|
MOVDDUP_X4_X5
|
||||||
|
MOVDDUP_X6_X7
|
||||||
|
MOVDDUP_X8_X9
|
||||||
|
|
||||||
|
// X_i = { imag(x[i]), imag(x[i]) }
|
||||||
|
SHUFPD $0x3, X2, X2
|
||||||
|
SHUFPD $0x3, X4, X4
|
||||||
|
SHUFPD $0x3, X6, X6
|
||||||
|
SHUFPD $0x3, X8, X8
|
||||||
|
|
||||||
|
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||||
|
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||||
|
MULPD X1, X2
|
||||||
|
MULPD X0, X3
|
||||||
|
MULPD X11, X4
|
||||||
|
MULPD X10, X5
|
||||||
|
MULPD X1, X6
|
||||||
|
MULPD X0, X7
|
||||||
|
MULPD X11, X8
|
||||||
|
MULPD X10, X9
|
||||||
|
|
||||||
|
// X_(i+1) = {
|
||||||
|
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||||
|
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||||
|
// }
|
||||||
|
ADDSUBPD_X2_X3
|
||||||
|
ADDSUBPD_X4_X5
|
||||||
|
ADDSUBPD_X6_X7
|
||||||
|
ADDSUBPD_X8_X9
|
||||||
|
|
||||||
|
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
|
||||||
|
ADDPD (DX), X3
|
||||||
|
ADDPD (DX)(R9*1), X5
|
||||||
|
LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2])
|
||||||
|
ADDPD (DX), X7
|
||||||
|
ADDPD (DX)(R9*1), X9
|
||||||
|
MOVUPS X3, (DI) // dst[i] = X_(i+1)
|
||||||
|
MOVUPS X5, (DI)(R10*1)
|
||||||
|
LEAQ (DI)(R10*2), DI
|
||||||
|
MOVUPS X7, (DI)
|
||||||
|
MOVUPS X9, (DI)(R10*1)
|
||||||
|
LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2])
|
||||||
|
LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2])
|
||||||
|
LEAQ (DI)(R10*2), DI // DI = &(DI[incDst*2])
|
||||||
|
DECQ BX
|
||||||
|
JNZ axpyi_loop // } while --BX > 0
|
||||||
|
CMPQ CX, $0 // if CX == 0 { return }
|
||||||
|
JE axpyi_end
|
||||||
|
|
||||||
|
axpyi_tail: // do {
|
||||||
|
MOVUPS (SI), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||||
|
MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||||
|
SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||||
|
MULPD X1, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||||
|
MULPD X0, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||||
|
|
||||||
|
// X_(i+1) = {
|
||||||
|
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||||
|
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||||
|
// }
|
||||||
|
ADDSUBPD_X2_X3
|
||||||
|
|
||||||
|
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
|
||||||
|
ADDPD (DX), X3
|
||||||
|
MOVUPS X3, (DI) // y[i] X_(i+1)
|
||||||
|
ADDQ R8, SI // SI += incX
|
||||||
|
ADDQ R9, DX // DX += incY
|
||||||
|
ADDQ R10, DI // DI += incDst
|
||||||
|
LOOP axpyi_tail // } while --CX > 0
|
||||||
|
|
||||||
|
axpyi_end:
|
||||||
|
RET
|
||||||
|
|
@ -0,0 +1,122 @@
|
||||||
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
//+build !noasm,!appengine
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
// MOVDDUP X2, X3
|
||||||
|
#define MOVDDUP_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xDA
|
||||||
|
// MOVDDUP X4, X5
|
||||||
|
#define MOVDDUP_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xEC
|
||||||
|
// MOVDDUP X6, X7
|
||||||
|
#define MOVDDUP_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xFE
|
||||||
|
// MOVDDUP X8, X9
|
||||||
|
#define MOVDDUP_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC8
|
||||||
|
|
||||||
|
// ADDSUBPD X2, X3
|
||||||
|
#define ADDSUBPD_X2_X3 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA
|
||||||
|
// ADDSUBPD X4, X5
|
||||||
|
#define ADDSUBPD_X4_X5 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC
|
||||||
|
// ADDSUBPD X6, X7
|
||||||
|
#define ADDSUBPD_X6_X7 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE
|
||||||
|
// ADDSUBPD X8, X9
|
||||||
|
#define ADDSUBPD_X8_X9 BYTE $0x66; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8
|
||||||
|
|
||||||
|
// func AxpyUnitary(alpha complex128, x, y []complex128)
|
||||||
|
TEXT ·AxpyUnitary(SB), NOSPLIT, $0
|
||||||
|
MOVQ x_base+16(FP), SI // SI = &x
|
||||||
|
MOVQ y_base+40(FP), DI // DI = &y
|
||||||
|
MOVQ x_len+24(FP), CX // CX = min( len(x), len(y) )
|
||||||
|
CMPQ y_len+48(FP), CX
|
||||||
|
CMOVQLE y_len+48(FP), CX
|
||||||
|
CMPQ CX, $0 // if CX == 0 { return }
|
||||||
|
JE caxy_end
|
||||||
|
PXOR X0, X0 // Clear work registers and cache-align loop
|
||||||
|
PXOR X1, X1
|
||||||
|
MOVUPS alpha+0(FP), X0 // X0 = { imag(a), real(a) }
|
||||||
|
MOVAPS X0, X1
|
||||||
|
SHUFPD $0x1, X1, X1 // X1 = { real(a), imag(a) }
|
||||||
|
XORQ AX, AX // i = 0
|
||||||
|
MOVAPS X0, X10 // Copy X0 and X1 for pipelining
|
||||||
|
MOVAPS X1, X11
|
||||||
|
MOVQ CX, BX
|
||||||
|
ANDQ $3, CX // CX = n % 4
|
||||||
|
SHRQ $2, BX // BX = floor( n / 4 )
|
||||||
|
JZ caxy_tail // if BX == 0 { goto caxy_tail }
|
||||||
|
|
||||||
|
caxy_loop: // do {
|
||||||
|
MOVUPS (SI)(AX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||||
|
MOVUPS 16(SI)(AX*8), X4
|
||||||
|
MOVUPS 32(SI)(AX*8), X6
|
||||||
|
MOVUPS 48(SI)(AX*8), X8
|
||||||
|
|
||||||
|
// X_(i+1) = { real(x[i], real(x[i]) }
|
||||||
|
MOVDDUP_X2_X3
|
||||||
|
MOVDDUP_X4_X5
|
||||||
|
MOVDDUP_X6_X7
|
||||||
|
MOVDDUP_X8_X9
|
||||||
|
|
||||||
|
// X_i = { imag(x[i]), imag(x[i]) }
|
||||||
|
SHUFPD $0x3, X2, X2
|
||||||
|
SHUFPD $0x3, X4, X4
|
||||||
|
SHUFPD $0x3, X6, X6
|
||||||
|
SHUFPD $0x3, X8, X8
|
||||||
|
|
||||||
|
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||||
|
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||||
|
MULPD X1, X2
|
||||||
|
MULPD X0, X3
|
||||||
|
MULPD X11, X4
|
||||||
|
MULPD X10, X5
|
||||||
|
MULPD X1, X6
|
||||||
|
MULPD X0, X7
|
||||||
|
MULPD X11, X8
|
||||||
|
MULPD X10, X9
|
||||||
|
|
||||||
|
// X_(i+1) = {
|
||||||
|
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||||
|
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||||
|
// }
|
||||||
|
ADDSUBPD_X2_X3
|
||||||
|
ADDSUBPD_X4_X5
|
||||||
|
ADDSUBPD_X6_X7
|
||||||
|
ADDSUBPD_X8_X9
|
||||||
|
|
||||||
|
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
|
||||||
|
ADDPD (DI)(AX*8), X3
|
||||||
|
ADDPD 16(DI)(AX*8), X5
|
||||||
|
ADDPD 32(DI)(AX*8), X7
|
||||||
|
ADDPD 48(DI)(AX*8), X9
|
||||||
|
MOVUPS X3, (DI)(AX*8) // y[i] = X_(i+1)
|
||||||
|
MOVUPS X5, 16(DI)(AX*8)
|
||||||
|
MOVUPS X7, 32(DI)(AX*8)
|
||||||
|
MOVUPS X9, 48(DI)(AX*8)
|
||||||
|
ADDQ $8, AX // i += 8
|
||||||
|
DECQ BX
|
||||||
|
JNZ caxy_loop // } while --BX > 0
|
||||||
|
CMPQ CX, $0 // if CX == 0 { return }
|
||||||
|
JE caxy_end
|
||||||
|
|
||||||
|
caxy_tail: // do {
|
||||||
|
MOVUPS (SI)(AX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||||
|
MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||||
|
SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||||
|
MULPD X1, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||||
|
MULPD X0, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||||
|
|
||||||
|
// X_(i+1) = {
|
||||||
|
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||||
|
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||||
|
// }
|
||||||
|
ADDSUBPD_X2_X3
|
||||||
|
|
||||||
|
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
|
||||||
|
ADDPD (DI)(AX*8), X3
|
||||||
|
MOVUPS X3, (DI)(AX*8) // y[i] = X_(i+1)
|
||||||
|
ADDQ $2, AX // i += 2
|
||||||
|
LOOP caxy_tail // } while --CX > 0
|
||||||
|
|
||||||
|
caxy_end:
|
||||||
|
RET
|
||||||
123
vendor/gonum.org/v1/gonum/internal/asm/c128/axpyunitaryto_amd64.s
generated
vendored
Normal file
123
vendor/gonum.org/v1/gonum/internal/asm/c128/axpyunitaryto_amd64.s
generated
vendored
Normal file
|
|
@ -0,0 +1,123 @@
|
||||||
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
//+build !noasm,!appengine
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
// MOVDDUP X2, X3
|
||||||
|
#define MOVDDUP_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xDA
|
||||||
|
// MOVDDUP X4, X5
|
||||||
|
#define MOVDDUP_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xEC
|
||||||
|
// MOVDDUP X6, X7
|
||||||
|
#define MOVDDUP_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xFE
|
||||||
|
// MOVDDUP X8, X9
|
||||||
|
#define MOVDDUP_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC8
|
||||||
|
|
||||||
|
// ADDSUBPD X2, X3
|
||||||
|
#define ADDSUBPD_X2_X3 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA
|
||||||
|
// ADDSUBPD X4, X5
|
||||||
|
#define ADDSUBPD_X4_X5 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC
|
||||||
|
// ADDSUBPD X6, X7
|
||||||
|
#define ADDSUBPD_X6_X7 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE
|
||||||
|
// ADDSUBPD X8, X9
|
||||||
|
#define ADDSUBPD_X8_X9 BYTE $0x66; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8
|
||||||
|
|
||||||
|
// func AxpyUnitaryTo(dst []complex128, alpha complex64, x, y []complex128)
|
||||||
|
TEXT ·AxpyUnitaryTo(SB), NOSPLIT, $0
|
||||||
|
MOVQ dst_base+0(FP), DI // DI = &dst
|
||||||
|
MOVQ x_base+40(FP), SI // SI = &x
|
||||||
|
MOVQ y_base+64(FP), DX // DX = &y
|
||||||
|
MOVQ x_len+48(FP), CX // CX = min( len(x), len(y), len(dst) )
|
||||||
|
CMPQ y_len+72(FP), CX
|
||||||
|
CMOVQLE y_len+72(FP), CX
|
||||||
|
CMPQ dst_len+8(FP), CX
|
||||||
|
CMOVQLE dst_len+8(FP), CX
|
||||||
|
CMPQ CX, $0 // if CX == 0 { return }
|
||||||
|
JE caxy_end
|
||||||
|
MOVUPS alpha+24(FP), X0 // X0 = { imag(a), real(a) }
|
||||||
|
MOVAPS X0, X1
|
||||||
|
SHUFPD $0x1, X1, X1 // X1 = { real(a), imag(a) }
|
||||||
|
XORQ AX, AX // i = 0
|
||||||
|
MOVAPS X0, X10 // Copy X0 and X1 for pipelining
|
||||||
|
MOVAPS X1, X11
|
||||||
|
MOVQ CX, BX
|
||||||
|
ANDQ $3, CX // CX = n % 4
|
||||||
|
SHRQ $2, BX // BX = floor( n / 4 )
|
||||||
|
JZ caxy_tail // if BX == 0 { goto caxy_tail }
|
||||||
|
|
||||||
|
caxy_loop: // do {
|
||||||
|
MOVUPS (SI)(AX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||||
|
MOVUPS 16(SI)(AX*8), X4
|
||||||
|
MOVUPS 32(SI)(AX*8), X6
|
||||||
|
MOVUPS 48(SI)(AX*8), X8
|
||||||
|
|
||||||
|
// X_(i+1) = { real(x[i], real(x[i]) }
|
||||||
|
MOVDDUP_X2_X3 // Load and duplicate imag elements (xi, xi)
|
||||||
|
MOVDDUP_X4_X5
|
||||||
|
MOVDDUP_X6_X7
|
||||||
|
MOVDDUP_X8_X9
|
||||||
|
|
||||||
|
// X_i = { imag(x[i]), imag(x[i]) }
|
||||||
|
SHUFPD $0x3, X2, X2 // duplicate real elements (xr, xr)
|
||||||
|
SHUFPD $0x3, X4, X4
|
||||||
|
SHUFPD $0x3, X6, X6
|
||||||
|
SHUFPD $0x3, X8, X8
|
||||||
|
|
||||||
|
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||||
|
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||||
|
MULPD X1, X2
|
||||||
|
MULPD X0, X3
|
||||||
|
MULPD X11, X4
|
||||||
|
MULPD X10, X5
|
||||||
|
MULPD X1, X6
|
||||||
|
MULPD X0, X7
|
||||||
|
MULPD X11, X8
|
||||||
|
MULPD X10, X9
|
||||||
|
|
||||||
|
// X_(i+1) = {
|
||||||
|
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||||
|
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||||
|
// }
|
||||||
|
ADDSUBPD_X2_X3
|
||||||
|
ADDSUBPD_X4_X5
|
||||||
|
ADDSUBPD_X6_X7
|
||||||
|
ADDSUBPD_X8_X9
|
||||||
|
|
||||||
|
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
|
||||||
|
ADDPD (DX)(AX*8), X3
|
||||||
|
ADDPD 16(DX)(AX*8), X5
|
||||||
|
ADDPD 32(DX)(AX*8), X7
|
||||||
|
ADDPD 48(DX)(AX*8), X9
|
||||||
|
MOVUPS X3, (DI)(AX*8) // y[i] = X_(i+1)
|
||||||
|
MOVUPS X5, 16(DI)(AX*8)
|
||||||
|
MOVUPS X7, 32(DI)(AX*8)
|
||||||
|
MOVUPS X9, 48(DI)(AX*8)
|
||||||
|
ADDQ $8, AX // i += 8
|
||||||
|
DECQ BX
|
||||||
|
JNZ caxy_loop // } while --BX > 0
|
||||||
|
CMPQ CX, $0 // if CX == 0 { return }
|
||||||
|
JE caxy_end
|
||||||
|
|
||||||
|
caxy_tail: // Same calculation, but read in values to avoid trampling memory
|
||||||
|
MOVUPS (SI)(AX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||||
|
MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||||
|
SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||||
|
MULPD X1, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||||
|
MULPD X0, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||||
|
|
||||||
|
// X_(i+1) = {
|
||||||
|
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||||
|
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||||
|
// }
|
||||||
|
ADDSUBPD_X2_X3
|
||||||
|
|
||||||
|
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
|
||||||
|
ADDPD (DX)(AX*8), X3
|
||||||
|
MOVUPS X3, (DI)(AX*8) // y[i] = X_(i+1)
|
||||||
|
ADDQ $2, AX // i += 2
|
||||||
|
LOOP caxy_tail // } while --CX > 0
|
||||||
|
|
||||||
|
caxy_end:
|
||||||
|
RET
|
||||||
|
|
@ -0,0 +1,6 @@
|
||||||
|
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// Package c128 provides complex128 vector primitives.
|
||||||
|
package c128 // import "gonum.org/v1/gonum/internal/asm/c128"
|
||||||
|
|
@ -0,0 +1,153 @@
|
||||||
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
//+build !noasm,!appengine
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
#define MOVDDUP_XPTR__X3 LONG $0x1E120FF2 // MOVDDUP (SI), X3
|
||||||
|
#define MOVDDUP_XPTR_INCX__X5 LONG $0x120F42F2; WORD $0x062C // MOVDDUP (SI)(R8*1), X5
|
||||||
|
#define MOVDDUP_XPTR_INCX_2__X7 LONG $0x120F42F2; WORD $0x463C // MOVDDUP (SI)(R8*2), X7
|
||||||
|
#define MOVDDUP_XPTR_INCx3X__X9 LONG $0x120F46F2; WORD $0x0E0C // MOVDDUP (SI)(R9*1), X9
|
||||||
|
|
||||||
|
#define MOVDDUP_8_XPTR__X2 LONG $0x56120FF2; BYTE $0x08 // MOVDDUP 8(SI), X2
|
||||||
|
#define MOVDDUP_8_XPTR_INCX__X4 LONG $0x120F42F2; WORD $0x0664; BYTE $0x08 // MOVDDUP 8(SI)(R8*1), X4
|
||||||
|
#define MOVDDUP_8_XPTR_INCX_2__X6 LONG $0x120F42F2; WORD $0x4674; BYTE $0x08 // MOVDDUP 8(SI)(R8*2), X6
|
||||||
|
#define MOVDDUP_8_XPTR_INCx3X__X8 LONG $0x120F46F2; WORD $0x0E44; BYTE $0x08 // MOVDDUP 8(SI)(R9*1), X8
|
||||||
|
|
||||||
|
#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3
|
||||||
|
#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5
|
||||||
|
#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7
|
||||||
|
#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
|
||||||
|
|
||||||
|
#define X_PTR SI
|
||||||
|
#define Y_PTR DI
|
||||||
|
#define LEN CX
|
||||||
|
#define TAIL BX
|
||||||
|
#define SUM X0
|
||||||
|
#define P_SUM X1
|
||||||
|
#define INC_X R8
|
||||||
|
#define INCx3_X R9
|
||||||
|
#define INC_Y R10
|
||||||
|
#define INCx3_Y R11
|
||||||
|
#define NEG1 X15
|
||||||
|
#define P_NEG1 X14
|
||||||
|
|
||||||
|
// func DotcInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128)
|
||||||
|
TEXT ·DotcInc(SB), NOSPLIT, $0
|
||||||
|
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
|
||||||
|
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
|
||||||
|
MOVQ n+48(FP), LEN // LEN = n
|
||||||
|
PXOR SUM, SUM // SUM = 0
|
||||||
|
CMPQ LEN, $0 // if LEN == 0 { return }
|
||||||
|
JE dot_end
|
||||||
|
PXOR P_SUM, P_SUM // P_SUM = 0
|
||||||
|
MOVQ ix+72(FP), INC_X // INC_X = ix * sizeof(complex128)
|
||||||
|
SHLQ $4, INC_X
|
||||||
|
MOVQ iy+80(FP), INC_Y // INC_Y = iy * sizeof(complex128)
|
||||||
|
SHLQ $4, INC_Y
|
||||||
|
LEAQ (X_PTR)(INC_X*1), X_PTR // X_PTR = &(X_PTR[ix])
|
||||||
|
LEAQ (Y_PTR)(INC_Y*1), Y_PTR // Y_PTR = &(Y_PTR[iy])
|
||||||
|
MOVQ incX+56(FP), INC_X // INC_X = incX
|
||||||
|
SHLQ $4, INC_X // INC_X *= sizeof(complex128)
|
||||||
|
MOVQ incY+64(FP), INC_Y // INC_Y = incY
|
||||||
|
SHLQ $4, INC_Y // INC_Y *= sizeof(complex128)
|
||||||
|
MOVSD $(-1.0), NEG1
|
||||||
|
SHUFPD $0, NEG1, NEG1 // { -1, -1 }
|
||||||
|
MOVQ LEN, TAIL
|
||||||
|
ANDQ $3, TAIL // TAIL = n % 4
|
||||||
|
SHRQ $2, LEN // LEN = floor( n / 4 )
|
||||||
|
JZ dot_tail // if n <= 4 { goto dot_tail }
|
||||||
|
MOVAPS NEG1, P_NEG1 // Copy NEG1 to P_NEG1 for pipelining
|
||||||
|
LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = 3 * incX * sizeof(complex128)
|
||||||
|
LEAQ (INC_Y)(INC_Y*2), INCx3_Y // INCx3_Y = 3 * incY * sizeof(complex128)
|
||||||
|
|
||||||
|
dot_loop: // do {
|
||||||
|
MOVDDUP_XPTR__X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||||
|
MOVDDUP_XPTR_INCX__X5
|
||||||
|
MOVDDUP_XPTR_INCX_2__X7
|
||||||
|
MOVDDUP_XPTR_INCx3X__X9
|
||||||
|
|
||||||
|
MOVDDUP_8_XPTR__X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||||
|
MOVDDUP_8_XPTR_INCX__X4
|
||||||
|
MOVDDUP_8_XPTR_INCX_2__X6
|
||||||
|
MOVDDUP_8_XPTR_INCx3X__X8
|
||||||
|
|
||||||
|
// X_i = { -imag(x[i]), -imag(x[i]) }
|
||||||
|
MULPD NEG1, X2
|
||||||
|
MULPD P_NEG1, X4
|
||||||
|
MULPD NEG1, X6
|
||||||
|
MULPD P_NEG1, X8
|
||||||
|
|
||||||
|
// X_j = { imag(y[i]), real(y[i]) }
|
||||||
|
MOVUPS (Y_PTR), X10
|
||||||
|
MOVUPS (Y_PTR)(INC_Y*1), X11
|
||||||
|
MOVUPS (Y_PTR)(INC_Y*2), X12
|
||||||
|
MOVUPS (Y_PTR)(INCx3_Y*1), X13
|
||||||
|
|
||||||
|
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||||
|
MULPD X10, X3
|
||||||
|
MULPD X11, X5
|
||||||
|
MULPD X12, X7
|
||||||
|
MULPD X13, X9
|
||||||
|
|
||||||
|
// X_j = { real(y[i]), imag(y[i]) }
|
||||||
|
SHUFPD $0x1, X10, X10
|
||||||
|
SHUFPD $0x1, X11, X11
|
||||||
|
SHUFPD $0x1, X12, X12
|
||||||
|
SHUFPD $0x1, X13, X13
|
||||||
|
|
||||||
|
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||||
|
MULPD X10, X2
|
||||||
|
MULPD X11, X4
|
||||||
|
MULPD X12, X6
|
||||||
|
MULPD X13, X8
|
||||||
|
|
||||||
|
// X_(i+1) = {
|
||||||
|
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||||
|
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||||
|
// }
|
||||||
|
ADDSUBPD_X2_X3
|
||||||
|
ADDSUBPD_X4_X5
|
||||||
|
ADDSUBPD_X6_X7
|
||||||
|
ADDSUBPD_X8_X9
|
||||||
|
|
||||||
|
// psum += result[i]
|
||||||
|
ADDPD X3, SUM
|
||||||
|
ADDPD X5, P_SUM
|
||||||
|
ADDPD X7, SUM
|
||||||
|
ADDPD X9, P_SUM
|
||||||
|
|
||||||
|
LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[incX*4])
|
||||||
|
LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(Y_PTR[incY*4])
|
||||||
|
|
||||||
|
DECQ LEN
|
||||||
|
JNZ dot_loop // } while --LEN > 0
|
||||||
|
ADDPD P_SUM, SUM // sum += psum
|
||||||
|
CMPQ TAIL, $0 // if TAIL == 0 { return }
|
||||||
|
JE dot_end
|
||||||
|
|
||||||
|
dot_tail: // do {
|
||||||
|
MOVDDUP_XPTR__X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||||
|
MOVDDUP_8_XPTR__X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||||
|
MULPD NEG1, X2 // X_i = { -imag(x[i]) , -imag(x[i]) }
|
||||||
|
MOVUPS (Y_PTR), X10 // X_j = { imag(y[i]) , real(y[i]) }
|
||||||
|
MULPD X10, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||||
|
SHUFPD $0x1, X10, X10 // X_j = { real(y[i]) , imag(y[i]) }
|
||||||
|
MULPD X10, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||||
|
|
||||||
|
// X_(i+1) = {
|
||||||
|
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||||
|
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||||
|
// }
|
||||||
|
ADDSUBPD_X2_X3
|
||||||
|
ADDPD X3, SUM // sum += result[i]
|
||||||
|
ADDQ INC_X, X_PTR // X_PTR += incX
|
||||||
|
ADDQ INC_Y, Y_PTR // Y_PTR += incY
|
||||||
|
DECQ TAIL
|
||||||
|
JNZ dot_tail // } while --TAIL > 0
|
||||||
|
|
||||||
|
dot_end:
|
||||||
|
MOVUPS SUM, sum+88(FP)
|
||||||
|
RET
|
||||||
|
|
@ -0,0 +1,143 @@
|
||||||
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
//+build !noasm,!appengine
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
#define MOVDDUP_XPTR_IDX_8__X3 LONG $0x1C120FF2; BYTE $0xC6 // MOVDDUP (SI)(AX*8), X3
|
||||||
|
#define MOVDDUP_16_XPTR_IDX_8__X5 LONG $0x6C120FF2; WORD $0x10C6 // MOVDDUP 16(SI)(AX*8), X5
|
||||||
|
#define MOVDDUP_32_XPTR_IDX_8__X7 LONG $0x7C120FF2; WORD $0x20C6 // MOVDDUP 32(SI)(AX*8), X7
|
||||||
|
#define MOVDDUP_48_XPTR_IDX_8__X9 LONG $0x120F44F2; WORD $0xC64C; BYTE $0x30 // MOVDDUP 48(SI)(AX*8), X9
|
||||||
|
|
||||||
|
#define MOVDDUP_XPTR_IIDX_8__X2 LONG $0x14120FF2; BYTE $0xD6 // MOVDDUP (SI)(DX*8), X2
|
||||||
|
#define MOVDDUP_16_XPTR_IIDX_8__X4 LONG $0x64120FF2; WORD $0x10D6 // MOVDDUP 16(SI)(DX*8), X4
|
||||||
|
#define MOVDDUP_32_XPTR_IIDX_8__X6 LONG $0x74120FF2; WORD $0x20D6 // MOVDDUP 32(SI)(DX*8), X6
|
||||||
|
#define MOVDDUP_48_XPTR_IIDX_8__X8 LONG $0x120F44F2; WORD $0xD644; BYTE $0x30 // MOVDDUP 48(SI)(DX*8), X8
|
||||||
|
|
||||||
|
#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3
|
||||||
|
#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5
|
||||||
|
#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7
|
||||||
|
#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
|
||||||
|
|
||||||
|
#define X_PTR SI
|
||||||
|
#define Y_PTR DI
|
||||||
|
#define LEN CX
|
||||||
|
#define TAIL BX
|
||||||
|
#define SUM X0
|
||||||
|
#define P_SUM X1
|
||||||
|
#define IDX AX
|
||||||
|
#define I_IDX DX
|
||||||
|
#define NEG1 X15
|
||||||
|
#define P_NEG1 X14
|
||||||
|
|
||||||
|
// func DotcUnitary(x, y []complex128) (sum complex128)
|
||||||
|
TEXT ·DotcUnitary(SB), NOSPLIT, $0
|
||||||
|
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
|
||||||
|
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
|
||||||
|
MOVQ x_len+8(FP), LEN // LEN = min( len(x), len(y) )
|
||||||
|
CMPQ y_len+32(FP), LEN
|
||||||
|
CMOVQLE y_len+32(FP), LEN
|
||||||
|
PXOR SUM, SUM // sum = 0
|
||||||
|
CMPQ LEN, $0 // if LEN == 0 { return }
|
||||||
|
JE dot_end
|
||||||
|
XORPS P_SUM, P_SUM // psum = 0
|
||||||
|
MOVSD $(-1.0), NEG1
|
||||||
|
SHUFPD $0, NEG1, NEG1 // { -1, -1 }
|
||||||
|
XORQ IDX, IDX // i := 0
|
||||||
|
MOVQ $1, I_IDX // j := 1
|
||||||
|
MOVQ LEN, TAIL
|
||||||
|
ANDQ $3, TAIL // TAIL = floor( TAIL / 4 )
|
||||||
|
SHRQ $2, LEN // LEN = TAIL % 4
|
||||||
|
JZ dot_tail // if LEN == 0 { goto dot_tail }
|
||||||
|
|
||||||
|
MOVAPS NEG1, P_NEG1 // Copy NEG1 to P_NEG1 for pipelining
|
||||||
|
|
||||||
|
dot_loop: // do {
|
||||||
|
MOVDDUP_XPTR_IDX_8__X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||||
|
MOVDDUP_16_XPTR_IDX_8__X5
|
||||||
|
MOVDDUP_32_XPTR_IDX_8__X7
|
||||||
|
MOVDDUP_48_XPTR_IDX_8__X9
|
||||||
|
|
||||||
|
MOVDDUP_XPTR_IIDX_8__X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||||
|
MOVDDUP_16_XPTR_IIDX_8__X4
|
||||||
|
MOVDDUP_32_XPTR_IIDX_8__X6
|
||||||
|
MOVDDUP_48_XPTR_IIDX_8__X8
|
||||||
|
|
||||||
|
// X_i = { -imag(x[i]), -imag(x[i]) }
|
||||||
|
MULPD NEG1, X2
|
||||||
|
MULPD P_NEG1, X4
|
||||||
|
MULPD NEG1, X6
|
||||||
|
MULPD P_NEG1, X8
|
||||||
|
|
||||||
|
// X_j = { imag(y[i]), real(y[i]) }
|
||||||
|
MOVUPS (Y_PTR)(IDX*8), X10
|
||||||
|
MOVUPS 16(Y_PTR)(IDX*8), X11
|
||||||
|
MOVUPS 32(Y_PTR)(IDX*8), X12
|
||||||
|
MOVUPS 48(Y_PTR)(IDX*8), X13
|
||||||
|
|
||||||
|
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||||
|
MULPD X10, X3
|
||||||
|
MULPD X11, X5
|
||||||
|
MULPD X12, X7
|
||||||
|
MULPD X13, X9
|
||||||
|
|
||||||
|
// X_j = { real(y[i]), imag(y[i]) }
|
||||||
|
SHUFPD $0x1, X10, X10
|
||||||
|
SHUFPD $0x1, X11, X11
|
||||||
|
SHUFPD $0x1, X12, X12
|
||||||
|
SHUFPD $0x1, X13, X13
|
||||||
|
|
||||||
|
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||||
|
MULPD X10, X2
|
||||||
|
MULPD X11, X4
|
||||||
|
MULPD X12, X6
|
||||||
|
MULPD X13, X8
|
||||||
|
|
||||||
|
// X_(i+1) = {
|
||||||
|
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||||
|
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||||
|
// }
|
||||||
|
ADDSUBPD_X2_X3
|
||||||
|
ADDSUBPD_X4_X5
|
||||||
|
ADDSUBPD_X6_X7
|
||||||
|
ADDSUBPD_X8_X9
|
||||||
|
|
||||||
|
// psum += result[i]
|
||||||
|
ADDPD X3, SUM
|
||||||
|
ADDPD X5, P_SUM
|
||||||
|
ADDPD X7, SUM
|
||||||
|
ADDPD X9, P_SUM
|
||||||
|
|
||||||
|
ADDQ $8, IDX // IDX += 8
|
||||||
|
ADDQ $8, I_IDX // I_IDX += 8
|
||||||
|
DECQ LEN
|
||||||
|
JNZ dot_loop // } while --LEN > 0
|
||||||
|
ADDPD P_SUM, SUM // sum += psum
|
||||||
|
CMPQ TAIL, $0 // if TAIL == 0 { return }
|
||||||
|
JE dot_end
|
||||||
|
|
||||||
|
dot_tail: // do {
|
||||||
|
MOVDDUP_XPTR_IDX_8__X3 // X_(i+1) = { real(x[i]) , real(x[i]) }
|
||||||
|
MOVDDUP_XPTR_IIDX_8__X2 // X_i = { imag(x[i]) , imag(x[i]) }
|
||||||
|
MULPD NEG1, X2 // X_i = { -imag(x[i]) , -imag(x[i]) }
|
||||||
|
MOVUPS (Y_PTR)(IDX*8), X10 // X_j = { imag(y[i]) , real(y[i]) }
|
||||||
|
MULPD X10, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||||
|
SHUFPD $0x1, X10, X10 // X_j = { real(y[i]) , imag(y[i]) }
|
||||||
|
MULPD X10, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||||
|
|
||||||
|
// X_(i+1) = {
|
||||||
|
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||||
|
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||||
|
// }
|
||||||
|
ADDSUBPD_X2_X3
|
||||||
|
ADDPD X3, SUM // SUM += result[i]
|
||||||
|
ADDQ $2, IDX // IDX += 2
|
||||||
|
ADDQ $2, I_IDX // I_IDX += 2
|
||||||
|
DECQ TAIL
|
||||||
|
JNZ dot_tail // } while --TAIL > 0
|
||||||
|
|
||||||
|
dot_end:
|
||||||
|
MOVUPS SUM, sum+48(FP)
|
||||||
|
RET
|
||||||
|
|
@ -0,0 +1,141 @@
|
||||||
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
//+build !noasm,!appengine
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
#define MOVDDUP_XPTR__X3 LONG $0x1E120FF2 // MOVDDUP (SI), X3
|
||||||
|
#define MOVDDUP_XPTR_INCX__X5 LONG $0x120F42F2; WORD $0x062C // MOVDDUP (SI)(R8*1), X5
|
||||||
|
#define MOVDDUP_XPTR_INCX_2__X7 LONG $0x120F42F2; WORD $0x463C // MOVDDUP (SI)(R8*2), X7
|
||||||
|
#define MOVDDUP_XPTR_INCx3X__X9 LONG $0x120F46F2; WORD $0x0E0C // MOVDDUP (SI)(R9*1), X9
|
||||||
|
|
||||||
|
#define MOVDDUP_8_XPTR__X2 LONG $0x56120FF2; BYTE $0x08 // MOVDDUP 8(SI), X2
|
||||||
|
#define MOVDDUP_8_XPTR_INCX__X4 LONG $0x120F42F2; WORD $0x0664; BYTE $0x08 // MOVDDUP 8(SI)(R8*1), X4
|
||||||
|
#define MOVDDUP_8_XPTR_INCX_2__X6 LONG $0x120F42F2; WORD $0x4674; BYTE $0x08 // MOVDDUP 8(SI)(R8*2), X6
|
||||||
|
#define MOVDDUP_8_XPTR_INCx3X__X8 LONG $0x120F46F2; WORD $0x0E44; BYTE $0x08 // MOVDDUP 8(SI)(R9*1), X8
|
||||||
|
|
||||||
|
#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3
|
||||||
|
#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5
|
||||||
|
#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7
|
||||||
|
#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
|
||||||
|
|
||||||
|
#define X_PTR SI
|
||||||
|
#define Y_PTR DI
|
||||||
|
#define LEN CX
|
||||||
|
#define TAIL BX
|
||||||
|
#define SUM X0
|
||||||
|
#define P_SUM X1
|
||||||
|
#define INC_X R8
|
||||||
|
#define INCx3_X R9
|
||||||
|
#define INC_Y R10
|
||||||
|
#define INCx3_Y R11
|
||||||
|
|
||||||
|
// func DotuInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128)
|
||||||
|
TEXT ·DotuInc(SB), NOSPLIT, $0
|
||||||
|
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
|
||||||
|
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
|
||||||
|
MOVQ n+48(FP), LEN // LEN = n
|
||||||
|
PXOR SUM, SUM // sum = 0
|
||||||
|
CMPQ LEN, $0 // if LEN == 0 { return }
|
||||||
|
JE dot_end
|
||||||
|
MOVQ ix+72(FP), INC_X // INC_X = ix * sizeof(complex128)
|
||||||
|
SHLQ $4, INC_X
|
||||||
|
MOVQ iy+80(FP), INC_Y // INC_Y = iy * sizeof(complex128)
|
||||||
|
SHLQ $4, INC_Y
|
||||||
|
LEAQ (X_PTR)(INC_X*1), X_PTR // X_PTR = &(X_PTR[ix])
|
||||||
|
LEAQ (Y_PTR)(INC_Y*1), Y_PTR // Y_PTR = &(Y_PTR[iy])
|
||||||
|
MOVQ incX+56(FP), INC_X // INC_X = incX
|
||||||
|
SHLQ $4, INC_X // INC_X *= sizeof(complex128)
|
||||||
|
MOVQ incY+64(FP), INC_Y // INC_Y = incY
|
||||||
|
SHLQ $4, INC_Y // INC_Y *= sizeof(complex128)
|
||||||
|
MOVQ LEN, TAIL
|
||||||
|
ANDQ $3, TAIL // LEN = LEN % 4
|
||||||
|
SHRQ $2, LEN // LEN = floor( LEN / 4 )
|
||||||
|
JZ dot_tail // if LEN <= 4 { goto dot_tail }
|
||||||
|
PXOR P_SUM, P_SUM // psum = 0
|
||||||
|
LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = 3 * incX * sizeof(complex128)
|
||||||
|
LEAQ (INC_Y)(INC_Y*2), INCx3_Y // INCx3_Y = 3 * incY * sizeof(complex128)
|
||||||
|
|
||||||
|
dot_loop: // do {
|
||||||
|
MOVDDUP_XPTR__X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||||
|
MOVDDUP_XPTR_INCX__X5
|
||||||
|
MOVDDUP_XPTR_INCX_2__X7
|
||||||
|
MOVDDUP_XPTR_INCx3X__X9
|
||||||
|
|
||||||
|
MOVDDUP_8_XPTR__X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||||
|
MOVDDUP_8_XPTR_INCX__X4
|
||||||
|
MOVDDUP_8_XPTR_INCX_2__X6
|
||||||
|
MOVDDUP_8_XPTR_INCx3X__X8
|
||||||
|
|
||||||
|
// X_j = { imag(y[i]), real(y[i]) }
|
||||||
|
MOVUPS (Y_PTR), X10
|
||||||
|
MOVUPS (Y_PTR)(INC_Y*1), X11
|
||||||
|
MOVUPS (Y_PTR)(INC_Y*2), X12
|
||||||
|
MOVUPS (Y_PTR)(INCx3_Y*1), X13
|
||||||
|
|
||||||
|
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||||
|
MULPD X10, X3
|
||||||
|
MULPD X11, X5
|
||||||
|
MULPD X12, X7
|
||||||
|
MULPD X13, X9
|
||||||
|
|
||||||
|
// X_j = { real(y[i]), imag(y[i]) }
|
||||||
|
SHUFPD $0x1, X10, X10
|
||||||
|
SHUFPD $0x1, X11, X11
|
||||||
|
SHUFPD $0x1, X12, X12
|
||||||
|
SHUFPD $0x1, X13, X13
|
||||||
|
|
||||||
|
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||||
|
MULPD X10, X2
|
||||||
|
MULPD X11, X4
|
||||||
|
MULPD X12, X6
|
||||||
|
MULPD X13, X8
|
||||||
|
|
||||||
|
// X_(i+1) = {
|
||||||
|
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||||
|
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||||
|
// }
|
||||||
|
ADDSUBPD_X2_X3
|
||||||
|
ADDSUBPD_X4_X5
|
||||||
|
ADDSUBPD_X6_X7
|
||||||
|
ADDSUBPD_X8_X9
|
||||||
|
|
||||||
|
// psum += result[i]
|
||||||
|
ADDPD X3, SUM
|
||||||
|
ADDPD X5, P_SUM
|
||||||
|
ADDPD X7, SUM
|
||||||
|
ADDPD X9, P_SUM
|
||||||
|
|
||||||
|
LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[incX*4])
|
||||||
|
LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(Y_PTR[incY*4])
|
||||||
|
|
||||||
|
DECQ LEN
|
||||||
|
JNZ dot_loop // } while --BX > 0
|
||||||
|
ADDPD P_SUM, SUM // sum += psum
|
||||||
|
CMPQ TAIL, $0 // if TAIL == 0 { return }
|
||||||
|
JE dot_end
|
||||||
|
|
||||||
|
dot_tail: // do {
|
||||||
|
MOVDDUP_XPTR__X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||||
|
MOVDDUP_8_XPTR__X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||||
|
MOVUPS (Y_PTR), X10 // X_j = { imag(y[i]) , real(y[i]) }
|
||||||
|
MULPD X10, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||||
|
SHUFPD $0x1, X10, X10 // X_j = { real(y[i]) , imag(y[i]) }
|
||||||
|
MULPD X10, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||||
|
|
||||||
|
// X_(i+1) = {
|
||||||
|
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||||
|
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||||
|
// }
|
||||||
|
ADDSUBPD_X2_X3
|
||||||
|
ADDPD X3, SUM // sum += result[i]
|
||||||
|
ADDQ INC_X, X_PTR // X_PTR += incX
|
||||||
|
ADDQ INC_Y, Y_PTR // Y_PTR += incY
|
||||||
|
DECQ TAIL // --TAIL
|
||||||
|
JNZ dot_tail // } while TAIL > 0
|
||||||
|
|
||||||
|
dot_end:
|
||||||
|
MOVUPS SUM, sum+88(FP)
|
||||||
|
RET
|
||||||
|
|
@ -0,0 +1,130 @@
|
||||||
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
//+build !noasm,!appengine
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
#define MOVDDUP_XPTR_IDX_8__X3 LONG $0x1C120FF2; BYTE $0xC6 // MOVDDUP (SI)(AX*8), X3
|
||||||
|
#define MOVDDUP_16_XPTR_IDX_8__X5 LONG $0x6C120FF2; WORD $0x10C6 // MOVDDUP 16(SI)(AX*8), X5
|
||||||
|
#define MOVDDUP_32_XPTR_IDX_8__X7 LONG $0x7C120FF2; WORD $0x20C6 // MOVDDUP 32(SI)(AX*8), X7
|
||||||
|
#define MOVDDUP_48_XPTR_IDX_8__X9 LONG $0x120F44F2; WORD $0xC64C; BYTE $0x30 // MOVDDUP 48(SI)(AX*8), X9
|
||||||
|
|
||||||
|
#define MOVDDUP_XPTR_IIDX_8__X2 LONG $0x14120FF2; BYTE $0xD6 // MOVDDUP (SI)(DX*8), X2
|
||||||
|
#define MOVDDUP_16_XPTR_IIDX_8__X4 LONG $0x64120FF2; WORD $0x10D6 // MOVDDUP 16(SI)(DX*8), X4
|
||||||
|
#define MOVDDUP_32_XPTR_IIDX_8__X6 LONG $0x74120FF2; WORD $0x20D6 // MOVDDUP 32(SI)(DX*8), X6
|
||||||
|
#define MOVDDUP_48_XPTR_IIDX_8__X8 LONG $0x120F44F2; WORD $0xD644; BYTE $0x30 // MOVDDUP 48(SI)(DX*8), X8
|
||||||
|
|
||||||
|
#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3
|
||||||
|
#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5
|
||||||
|
#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7
|
||||||
|
#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
|
||||||
|
|
||||||
|
#define X_PTR SI
|
||||||
|
#define Y_PTR DI
|
||||||
|
#define LEN CX
|
||||||
|
#define TAIL BX
|
||||||
|
#define SUM X0
|
||||||
|
#define P_SUM X1
|
||||||
|
#define IDX AX
|
||||||
|
#define I_IDX DX
|
||||||
|
|
||||||
|
// func DotuUnitary(x, y []complex128) (sum complex128)
|
||||||
|
TEXT ·DotuUnitary(SB), NOSPLIT, $0
|
||||||
|
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
|
||||||
|
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
|
||||||
|
MOVQ x_len+8(FP), LEN // LEN = min( len(x), len(y) )
|
||||||
|
CMPQ y_len+32(FP), LEN
|
||||||
|
CMOVQLE y_len+32(FP), LEN
|
||||||
|
PXOR SUM, SUM // SUM = 0
|
||||||
|
CMPQ LEN, $0 // if LEN == 0 { return }
|
||||||
|
JE dot_end
|
||||||
|
PXOR P_SUM, P_SUM // P_SUM = 0
|
||||||
|
XORQ IDX, IDX // IDX = 0
|
||||||
|
MOVQ $1, DX // j = 1
|
||||||
|
MOVQ LEN, TAIL
|
||||||
|
ANDQ $3, TAIL // TAIL = floor( LEN / 4 )
|
||||||
|
SHRQ $2, LEN // LEN = LEN % 4
|
||||||
|
JZ dot_tail // if LEN == 0 { goto dot_tail }
|
||||||
|
|
||||||
|
dot_loop: // do {
|
||||||
|
MOVDDUP_XPTR_IDX_8__X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||||
|
MOVDDUP_16_XPTR_IDX_8__X5
|
||||||
|
MOVDDUP_32_XPTR_IDX_8__X7
|
||||||
|
MOVDDUP_48_XPTR_IDX_8__X9
|
||||||
|
|
||||||
|
MOVDDUP_XPTR_IIDX_8__X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||||
|
MOVDDUP_16_XPTR_IIDX_8__X4
|
||||||
|
MOVDDUP_32_XPTR_IIDX_8__X6
|
||||||
|
MOVDDUP_48_XPTR_IIDX_8__X8
|
||||||
|
|
||||||
|
// X_j = { imag(y[i]), real(y[i]) }
|
||||||
|
MOVUPS (Y_PTR)(IDX*8), X10
|
||||||
|
MOVUPS 16(Y_PTR)(IDX*8), X11
|
||||||
|
MOVUPS 32(Y_PTR)(IDX*8), X12
|
||||||
|
MOVUPS 48(Y_PTR)(IDX*8), X13
|
||||||
|
|
||||||
|
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||||
|
MULPD X10, X3
|
||||||
|
MULPD X11, X5
|
||||||
|
MULPD X12, X7
|
||||||
|
MULPD X13, X9
|
||||||
|
|
||||||
|
// X_j = { real(y[i]), imag(y[i]) }
|
||||||
|
SHUFPD $0x1, X10, X10
|
||||||
|
SHUFPD $0x1, X11, X11
|
||||||
|
SHUFPD $0x1, X12, X12
|
||||||
|
SHUFPD $0x1, X13, X13
|
||||||
|
|
||||||
|
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||||
|
MULPD X10, X2
|
||||||
|
MULPD X11, X4
|
||||||
|
MULPD X12, X6
|
||||||
|
MULPD X13, X8
|
||||||
|
|
||||||
|
// X_(i+1) = {
|
||||||
|
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||||
|
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||||
|
// }
|
||||||
|
ADDSUBPD_X2_X3
|
||||||
|
ADDSUBPD_X4_X5
|
||||||
|
ADDSUBPD_X6_X7
|
||||||
|
ADDSUBPD_X8_X9
|
||||||
|
|
||||||
|
// psum += result[i]
|
||||||
|
ADDPD X3, SUM
|
||||||
|
ADDPD X5, P_SUM
|
||||||
|
ADDPD X7, SUM
|
||||||
|
ADDPD X9, P_SUM
|
||||||
|
|
||||||
|
ADDQ $8, IDX // IDX += 8
|
||||||
|
ADDQ $8, I_IDX // I_IDX += 8
|
||||||
|
DECQ LEN
|
||||||
|
JNZ dot_loop // } while --LEN > 0
|
||||||
|
ADDPD P_SUM, SUM // SUM += P_SUM
|
||||||
|
CMPQ TAIL, $0 // if TAIL == 0 { return }
|
||||||
|
JE dot_end
|
||||||
|
|
||||||
|
dot_tail: // do {
|
||||||
|
MOVDDUP_XPTR_IDX_8__X3 // X_(i+1) = { real(x[i] , real(x[i]) }
|
||||||
|
MOVDDUP_XPTR_IIDX_8__X2 // X_i = { imag(x[i]) , imag(x[i]) }
|
||||||
|
MOVUPS (Y_PTR)(IDX*8), X10 // X_j = { imag(y[i]) , real(y[i]) }
|
||||||
|
MULPD X10, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
|
||||||
|
SHUFPD $0x1, X10, X10 // X_j = { real(y[i]) , imag(y[i]) }
|
||||||
|
MULPD X10, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
|
||||||
|
|
||||||
|
// X_(i+1) = {
|
||||||
|
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
|
||||||
|
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
|
||||||
|
// }
|
||||||
|
ADDSUBPD_X2_X3
|
||||||
|
ADDPD X3, SUM // psum += result[i]
|
||||||
|
ADDQ $2, IDX // IDX += 2
|
||||||
|
ADDQ $2, I_IDX // I_IDX += 2
|
||||||
|
DECQ TAIL // --TAIL
|
||||||
|
JNZ dot_tail // } while TAIL > 0
|
||||||
|
|
||||||
|
dot_end:
|
||||||
|
MOVUPS SUM, sum+48(FP)
|
||||||
|
RET
|
||||||
|
|
@ -0,0 +1,69 @@
|
||||||
|
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
//+build !noasm,!appengine
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
#define SRC SI
|
||||||
|
#define DST SI
|
||||||
|
#define LEN CX
|
||||||
|
#define TAIL BX
|
||||||
|
#define INC R9
|
||||||
|
#define INC3 R10
|
||||||
|
#define ALPHA X0
|
||||||
|
#define ALPHA_2 X1
|
||||||
|
|
||||||
|
#define MOVDDUP_ALPHA LONG $0x44120FF2; WORD $0x0824 // MOVDDUP 8(SP), X0
|
||||||
|
|
||||||
|
// func DscalInc(alpha float64, x []complex128, n, inc uintptr)
|
||||||
|
TEXT ·DscalInc(SB), NOSPLIT, $0
|
||||||
|
MOVQ x_base+8(FP), SRC // SRC = &x
|
||||||
|
MOVQ n+32(FP), LEN // LEN = n
|
||||||
|
CMPQ LEN, $0 // if LEN == 0 { return }
|
||||||
|
JE dscal_end
|
||||||
|
|
||||||
|
MOVDDUP_ALPHA // ALPHA = alpha
|
||||||
|
MOVQ inc+40(FP), INC // INC = inc
|
||||||
|
SHLQ $4, INC // INC = INC * sizeof(complex128)
|
||||||
|
LEAQ (INC)(INC*2), INC3 // INC3 = 3 * INC
|
||||||
|
MOVUPS ALPHA, ALPHA_2 // Copy ALPHA and ALPHA_2 for pipelining
|
||||||
|
MOVQ LEN, TAIL // TAIL = LEN
|
||||||
|
SHRQ $2, LEN // LEN = floor( n / 4 )
|
||||||
|
JZ dscal_tail // if LEN == 0 { goto dscal_tail }
|
||||||
|
|
||||||
|
dscal_loop: // do {
|
||||||
|
MOVUPS (SRC), X2 // X_i = x[i]
|
||||||
|
MOVUPS (SRC)(INC*1), X3
|
||||||
|
MOVUPS (SRC)(INC*2), X4
|
||||||
|
MOVUPS (SRC)(INC3*1), X5
|
||||||
|
|
||||||
|
MULPD ALPHA, X2 // X_i *= ALPHA
|
||||||
|
MULPD ALPHA_2, X3
|
||||||
|
MULPD ALPHA, X4
|
||||||
|
MULPD ALPHA_2, X5
|
||||||
|
|
||||||
|
MOVUPS X2, (DST) // x[i] = X_i
|
||||||
|
MOVUPS X3, (DST)(INC*1)
|
||||||
|
MOVUPS X4, (DST)(INC*2)
|
||||||
|
MOVUPS X5, (DST)(INC3*1)
|
||||||
|
|
||||||
|
LEAQ (SRC)(INC*4), SRC // SRC += INC*4
|
||||||
|
DECQ LEN
|
||||||
|
JNZ dscal_loop // } while --LEN > 0
|
||||||
|
|
||||||
|
dscal_tail:
|
||||||
|
ANDQ $3, TAIL // TAIL = TAIL % 4
|
||||||
|
JE dscal_end // if TAIL == 0 { return }
|
||||||
|
|
||||||
|
dscal_tail_loop: // do {
|
||||||
|
MOVUPS (SRC), X2 // X_i = x[i]
|
||||||
|
MULPD ALPHA, X2 // X_i *= ALPHA
|
||||||
|
MOVUPS X2, (DST) // x[i] = X_i
|
||||||
|
ADDQ INC, SRC // SRC += INC
|
||||||
|
DECQ TAIL
|
||||||
|
JNZ dscal_tail_loop // } while --TAIL > 0
|
||||||
|
|
||||||
|
dscal_end:
|
||||||
|
RET
|
||||||
|
|
@ -0,0 +1,66 @@
|
||||||
|
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
//+build !noasm,!appengine
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
#define SRC SI
|
||||||
|
#define DST SI
|
||||||
|
#define LEN CX
|
||||||
|
#define IDX AX
|
||||||
|
#define TAIL BX
|
||||||
|
#define ALPHA X0
|
||||||
|
#define ALPHA_2 X1
|
||||||
|
|
||||||
|
#define MOVDDUP_ALPHA LONG $0x44120FF2; WORD $0x0824 // MOVDDUP 8(SP), X0
|
||||||
|
|
||||||
|
// func DscalUnitary(alpha float64, x []complex128)
|
||||||
|
TEXT ·DscalUnitary(SB), NOSPLIT, $0
|
||||||
|
MOVQ x_base+8(FP), SRC // SRC = &x
|
||||||
|
MOVQ x_len+16(FP), LEN // LEN = len(x)
|
||||||
|
CMPQ LEN, $0 // if LEN == 0 { return }
|
||||||
|
JE dscal_end
|
||||||
|
|
||||||
|
MOVDDUP_ALPHA // ALPHA = alpha
|
||||||
|
XORQ IDX, IDX // IDX = 0
|
||||||
|
MOVUPS ALPHA, ALPHA_2 // Copy ALPHA to ALPHA_2 for pipelining
|
||||||
|
MOVQ LEN, TAIL // TAIL = LEN
|
||||||
|
SHRQ $2, LEN // LEN = floor( n / 4 )
|
||||||
|
JZ dscal_tail // if LEN == 0 { goto dscal_tail }
|
||||||
|
|
||||||
|
dscal_loop: // do {
|
||||||
|
MOVUPS (SRC)(IDX*8), X2 // X_i = x[i]
|
||||||
|
MOVUPS 16(SRC)(IDX*8), X3
|
||||||
|
MOVUPS 32(SRC)(IDX*8), X4
|
||||||
|
MOVUPS 48(SRC)(IDX*8), X5
|
||||||
|
|
||||||
|
MULPD ALPHA, X2 // X_i *= ALPHA
|
||||||
|
MULPD ALPHA_2, X3
|
||||||
|
MULPD ALPHA, X4
|
||||||
|
MULPD ALPHA_2, X5
|
||||||
|
|
||||||
|
MOVUPS X2, (DST)(IDX*8) // x[i] = X_i
|
||||||
|
MOVUPS X3, 16(DST)(IDX*8)
|
||||||
|
MOVUPS X4, 32(DST)(IDX*8)
|
||||||
|
MOVUPS X5, 48(DST)(IDX*8)
|
||||||
|
|
||||||
|
ADDQ $8, IDX // IDX += 8
|
||||||
|
DECQ LEN
|
||||||
|
JNZ dscal_loop // } while --LEN > 0
|
||||||
|
|
||||||
|
dscal_tail:
|
||||||
|
ANDQ $3, TAIL // TAIL = TAIL % 4
|
||||||
|
JZ dscal_end // if TAIL == 0 { return }
|
||||||
|
|
||||||
|
dscal_tail_loop: // do {
|
||||||
|
MOVUPS (SRC)(IDX*8), X2 // X_i = x[i]
|
||||||
|
MULPD ALPHA, X2 // X_i *= ALPHA
|
||||||
|
MOVUPS X2, (DST)(IDX*8) // x[i] = X_i
|
||||||
|
ADDQ $2, IDX // IDX += 2
|
||||||
|
DECQ TAIL
|
||||||
|
JNZ dscal_tail_loop // } while --TAIL > 0
|
||||||
|
|
||||||
|
dscal_end:
|
||||||
|
RET
|
||||||
|
|
@ -0,0 +1,31 @@
|
||||||
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package c128
|
||||||
|
|
||||||
|
// ScalUnitaryTo is
|
||||||
|
// for i, v := range x {
|
||||||
|
// dst[i] = alpha * v
|
||||||
|
// }
|
||||||
|
func ScalUnitaryTo(dst []complex128, alpha complex128, x []complex128) {
|
||||||
|
for i, v := range x {
|
||||||
|
dst[i] = alpha * v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ScalIncTo is
|
||||||
|
// var idst, ix uintptr
|
||||||
|
// for i := 0; i < int(n); i++ {
|
||||||
|
// dst[idst] = alpha * x[ix]
|
||||||
|
// ix += incX
|
||||||
|
// idst += incDst
|
||||||
|
// }
|
||||||
|
func ScalIncTo(dst []complex128, incDst uintptr, alpha complex128, x []complex128, n, incX uintptr) {
|
||||||
|
var idst, ix uintptr
|
||||||
|
for i := 0; i < int(n); i++ {
|
||||||
|
dst[idst] = alpha * x[ix]
|
||||||
|
ix += incX
|
||||||
|
idst += incDst
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,116 @@
|
||||||
|
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
//+build !noasm,!appengine
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
#define SRC SI
|
||||||
|
#define DST SI
|
||||||
|
#define LEN CX
|
||||||
|
#define IDX AX
|
||||||
|
#define TAIL BX
|
||||||
|
#define ALPHA X0
|
||||||
|
#define ALPHA_C X1
|
||||||
|
#define ALPHA2 X10
|
||||||
|
#define ALPHA_C2 X11
|
||||||
|
|
||||||
|
#define MOVDDUP_X2_X3 LONG $0xDA120FF2 // MOVDDUP X2, X3
|
||||||
|
#define MOVDDUP_X4_X5 LONG $0xEC120FF2 // MOVDDUP X4, X5
|
||||||
|
#define MOVDDUP_X6_X7 LONG $0xFE120FF2 // MOVDDUP X6, X7
|
||||||
|
#define MOVDDUP_X8_X9 LONG $0x120F45F2; BYTE $0xC8 // MOVDDUP X8, X9
|
||||||
|
|
||||||
|
#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3
|
||||||
|
#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5
|
||||||
|
#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7
|
||||||
|
#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
|
||||||
|
|
||||||
|
// func ScalUnitary(alpha complex128, x []complex128)
|
||||||
|
TEXT ·ScalUnitary(SB), NOSPLIT, $0
|
||||||
|
MOVQ x_base+16(FP), SRC // SRC = &x
|
||||||
|
MOVQ x_len+24(FP), LEN // LEN = len(x)
|
||||||
|
CMPQ LEN, $0 // if LEN == 0 { return }
|
||||||
|
JE scal_end
|
||||||
|
|
||||||
|
MOVUPS alpha+0(FP), ALPHA // ALPHA = { imag(alpha), real(alpha) }
|
||||||
|
MOVAPS ALPHA, ALPHA_C
|
||||||
|
SHUFPD $0x1, ALPHA_C, ALPHA_C // ALPHA_C = { real(alpha), imag(alpha) }
|
||||||
|
|
||||||
|
XORQ IDX, IDX // IDX = 0
|
||||||
|
MOVAPS ALPHA, ALPHA2 // Copy ALPHA and ALPHA_C for pipelining
|
||||||
|
MOVAPS ALPHA_C, ALPHA_C2
|
||||||
|
MOVQ LEN, TAIL
|
||||||
|
SHRQ $2, LEN // LEN = floor( n / 4 )
|
||||||
|
JZ scal_tail // if BX == 0 { goto scal_tail }
|
||||||
|
|
||||||
|
scal_loop: // do {
|
||||||
|
MOVUPS (SRC)(IDX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||||
|
MOVUPS 16(SRC)(IDX*8), X4
|
||||||
|
MOVUPS 32(SRC)(IDX*8), X6
|
||||||
|
MOVUPS 48(SRC)(IDX*8), X8
|
||||||
|
|
||||||
|
// X_(i+1) = { real(x[i], real(x[i]) }
|
||||||
|
MOVDDUP_X2_X3
|
||||||
|
MOVDDUP_X4_X5
|
||||||
|
MOVDDUP_X6_X7
|
||||||
|
MOVDDUP_X8_X9
|
||||||
|
|
||||||
|
// X_i = { imag(x[i]), imag(x[i]) }
|
||||||
|
SHUFPD $0x3, X2, X2
|
||||||
|
SHUFPD $0x3, X4, X4
|
||||||
|
SHUFPD $0x3, X6, X6
|
||||||
|
SHUFPD $0x3, X8, X8
|
||||||
|
|
||||||
|
// X_i = { real(ALPHA) * imag(x[i]), imag(ALPHA) * imag(x[i]) }
|
||||||
|
// X_(i+1) = { imag(ALPHA) * real(x[i]), real(ALPHA) * real(x[i]) }
|
||||||
|
MULPD ALPHA_C, X2
|
||||||
|
MULPD ALPHA, X3
|
||||||
|
MULPD ALPHA_C2, X4
|
||||||
|
MULPD ALPHA2, X5
|
||||||
|
MULPD ALPHA_C, X6
|
||||||
|
MULPD ALPHA, X7
|
||||||
|
MULPD ALPHA_C2, X8
|
||||||
|
MULPD ALPHA2, X9
|
||||||
|
|
||||||
|
// X_(i+1) = {
|
||||||
|
// imag(result[i]): imag(ALPHA)*real(x[i]) + real(ALPHA)*imag(x[i]),
|
||||||
|
// real(result[i]): real(ALPHA)*real(x[i]) - imag(ALPHA)*imag(x[i])
|
||||||
|
// }
|
||||||
|
ADDSUBPD_X2_X3
|
||||||
|
ADDSUBPD_X4_X5
|
||||||
|
ADDSUBPD_X6_X7
|
||||||
|
ADDSUBPD_X8_X9
|
||||||
|
|
||||||
|
MOVUPS X3, (DST)(IDX*8) // x[i] = X_(i+1)
|
||||||
|
MOVUPS X5, 16(DST)(IDX*8)
|
||||||
|
MOVUPS X7, 32(DST)(IDX*8)
|
||||||
|
MOVUPS X9, 48(DST)(IDX*8)
|
||||||
|
ADDQ $8, IDX // IDX += 8
|
||||||
|
DECQ LEN
|
||||||
|
JNZ scal_loop // } while --LEN > 0
|
||||||
|
|
||||||
|
scal_tail:
|
||||||
|
ANDQ $3, TAIL // TAIL = TAIL % 4
|
||||||
|
JZ scal_end // if TAIL == 0 { return }
|
||||||
|
|
||||||
|
scal_tail_loop: // do {
|
||||||
|
MOVUPS (SRC)(IDX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||||
|
MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||||
|
SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||||
|
MULPD ALPHA_C, X2 // X_i = { real(ALPHA) * imag(x[i]), imag(ALPHA) * imag(x[i]) }
|
||||||
|
MULPD ALPHA, X3 // X_(i+1) = { imag(ALPHA) * real(x[i]), real(ALPHA) * real(x[i]) }
|
||||||
|
|
||||||
|
// X_(i+1) = {
|
||||||
|
// imag(result[i]): imag(ALPHA)*real(x[i]) + real(ALPHA)*imag(x[i]),
|
||||||
|
// real(result[i]): real(ALPHA)*real(x[i]) - imag(ALPHA)*imag(x[i])
|
||||||
|
// }
|
||||||
|
ADDSUBPD_X2_X3
|
||||||
|
|
||||||
|
MOVUPS X3, (DST)(IDX*8) // x[i] = X_(i+1)
|
||||||
|
ADDQ $2, IDX // IDX += 2
|
||||||
|
DECQ TAIL
|
||||||
|
JNZ scal_tail_loop // } while --LEN > 0
|
||||||
|
|
||||||
|
scal_end:
|
||||||
|
RET
|
||||||
|
|
@ -0,0 +1,121 @@
|
||||||
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
//+build !noasm,!appengine
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
#define SRC SI
|
||||||
|
#define DST SI
|
||||||
|
#define LEN CX
|
||||||
|
#define TAIL BX
|
||||||
|
#define INC R9
|
||||||
|
#define INC3 R10
|
||||||
|
#define ALPHA X0
|
||||||
|
#define ALPHA_C X1
|
||||||
|
#define ALPHA2 X10
|
||||||
|
#define ALPHA_C2 X11
|
||||||
|
|
||||||
|
#define MOVDDUP_X2_X3 LONG $0xDA120FF2 // MOVDDUP X2, X3
|
||||||
|
#define MOVDDUP_X4_X5 LONG $0xEC120FF2 // MOVDDUP X4, X5
|
||||||
|
#define MOVDDUP_X6_X7 LONG $0xFE120FF2 // MOVDDUP X6, X7
|
||||||
|
#define MOVDDUP_X8_X9 LONG $0x120F45F2; BYTE $0xC8 // MOVDDUP X8, X9
|
||||||
|
|
||||||
|
#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3
|
||||||
|
#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5
|
||||||
|
#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7
|
||||||
|
#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
|
||||||
|
|
||||||
|
// func ScalInc(alpha complex128, x []complex128, n, inc uintptr)
|
||||||
|
TEXT ·ScalInc(SB), NOSPLIT, $0
|
||||||
|
MOVQ x_base+16(FP), SRC // SRC = &x
|
||||||
|
MOVQ n+40(FP), LEN // LEN = len(x)
|
||||||
|
CMPQ LEN, $0
|
||||||
|
JE scal_end // if LEN == 0 { return }
|
||||||
|
|
||||||
|
MOVQ inc+48(FP), INC // INC = inc
|
||||||
|
SHLQ $4, INC // INC = INC * sizeof(complex128)
|
||||||
|
LEAQ (INC)(INC*2), INC3 // INC3 = 3 * INC
|
||||||
|
|
||||||
|
MOVUPS alpha+0(FP), ALPHA // ALPHA = { imag(alpha), real(alpha) }
|
||||||
|
MOVAPS ALPHA, ALPHA_C
|
||||||
|
SHUFPD $0x1, ALPHA_C, ALPHA_C // ALPHA_C = { real(alpha), imag(alpha) }
|
||||||
|
|
||||||
|
MOVAPS ALPHA, ALPHA2 // Copy ALPHA and ALPHA_C for pipelining
|
||||||
|
MOVAPS ALPHA_C, ALPHA_C2
|
||||||
|
MOVQ LEN, TAIL
|
||||||
|
SHRQ $2, LEN // LEN = floor( n / 4 )
|
||||||
|
JZ scal_tail // if BX == 0 { goto scal_tail }
|
||||||
|
|
||||||
|
scal_loop: // do {
|
||||||
|
MOVUPS (SRC), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||||
|
MOVUPS (SRC)(INC*1), X4
|
||||||
|
MOVUPS (SRC)(INC*2), X6
|
||||||
|
MOVUPS (SRC)(INC3*1), X8
|
||||||
|
|
||||||
|
// X_(i+1) = { real(x[i], real(x[i]) }
|
||||||
|
MOVDDUP_X2_X3
|
||||||
|
MOVDDUP_X4_X5
|
||||||
|
MOVDDUP_X6_X7
|
||||||
|
MOVDDUP_X8_X9
|
||||||
|
|
||||||
|
// X_i = { imag(x[i]), imag(x[i]) }
|
||||||
|
SHUFPD $0x3, X2, X2
|
||||||
|
SHUFPD $0x3, X4, X4
|
||||||
|
SHUFPD $0x3, X6, X6
|
||||||
|
SHUFPD $0x3, X8, X8
|
||||||
|
|
||||||
|
// X_i = { real(ALPHA) * imag(x[i]), imag(ALPHA) * imag(x[i]) }
|
||||||
|
// X_(i+1) = { imag(ALPHA) * real(x[i]), real(ALPHA) * real(x[i]) }
|
||||||
|
MULPD ALPHA_C, X2
|
||||||
|
MULPD ALPHA, X3
|
||||||
|
MULPD ALPHA_C2, X4
|
||||||
|
MULPD ALPHA2, X5
|
||||||
|
MULPD ALPHA_C, X6
|
||||||
|
MULPD ALPHA, X7
|
||||||
|
MULPD ALPHA_C2, X8
|
||||||
|
MULPD ALPHA2, X9
|
||||||
|
|
||||||
|
// X_(i+1) = {
|
||||||
|
// imag(result[i]): imag(ALPHA)*real(x[i]) + real(ALPHA)*imag(x[i]),
|
||||||
|
// real(result[i]): real(ALPHA)*real(x[i]) - imag(ALPHA)*imag(x[i])
|
||||||
|
// }
|
||||||
|
ADDSUBPD_X2_X3
|
||||||
|
ADDSUBPD_X4_X5
|
||||||
|
ADDSUBPD_X6_X7
|
||||||
|
ADDSUBPD_X8_X9
|
||||||
|
|
||||||
|
MOVUPS X3, (DST) // x[i] = X_(i+1)
|
||||||
|
MOVUPS X5, (DST)(INC*1)
|
||||||
|
MOVUPS X7, (DST)(INC*2)
|
||||||
|
MOVUPS X9, (DST)(INC3*1)
|
||||||
|
|
||||||
|
LEAQ (SRC)(INC*4), SRC // SRC = &(SRC[inc*4])
|
||||||
|
DECQ LEN
|
||||||
|
JNZ scal_loop // } while --BX > 0
|
||||||
|
|
||||||
|
scal_tail:
|
||||||
|
ANDQ $3, TAIL // TAIL = TAIL % 4
|
||||||
|
JE scal_end // if TAIL == 0 { return }
|
||||||
|
|
||||||
|
scal_tail_loop: // do {
|
||||||
|
MOVUPS (SRC), X2 // X_i = { imag(x[i]), real(x[i]) }
|
||||||
|
MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) }
|
||||||
|
SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
|
||||||
|
MULPD ALPHA_C, X2 // X_i = { real(ALPHA) * imag(x[i]), imag(ALPHA) * imag(x[i]) }
|
||||||
|
MULPD ALPHA, X3 // X_(i+1) = { imag(ALPHA) * real(x[i]), real(ALPHA) * real(x[i]) }
|
||||||
|
|
||||||
|
// X_(i+1) = {
|
||||||
|
// imag(result[i]): imag(ALPHA)*real(x[i]) + real(ALPHA)*imag(x[i]),
|
||||||
|
// real(result[i]): real(ALPHA)*real(x[i]) - imag(ALPHA)*imag(x[i])
|
||||||
|
// }
|
||||||
|
ADDSUBPD_X2_X3
|
||||||
|
|
||||||
|
MOVUPS X3, (DST) // x[i] = X_i
|
||||||
|
ADDQ INC, SRC // SRC = &(SRC[incX])
|
||||||
|
DECQ TAIL
|
||||||
|
JNZ scal_tail_loop // } while --TAIL > 0
|
||||||
|
|
||||||
|
scal_end:
|
||||||
|
RET
|
||||||
|
|
@ -0,0 +1,96 @@
|
||||||
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
//+build !noasm,!appengine
|
||||||
|
|
||||||
|
package c128
|
||||||
|
|
||||||
|
// AxpyUnitary is
|
||||||
|
// for i, v := range x {
|
||||||
|
// y[i] += alpha * v
|
||||||
|
// }
|
||||||
|
func AxpyUnitary(alpha complex128, x, y []complex128)
|
||||||
|
|
||||||
|
// AxpyUnitaryTo is
|
||||||
|
// for i, v := range x {
|
||||||
|
// dst[i] = alpha*v + y[i]
|
||||||
|
// }
|
||||||
|
func AxpyUnitaryTo(dst []complex128, alpha complex128, x, y []complex128)
|
||||||
|
|
||||||
|
// AxpyInc is
|
||||||
|
// for i := 0; i < int(n); i++ {
|
||||||
|
// y[iy] += alpha * x[ix]
|
||||||
|
// ix += incX
|
||||||
|
// iy += incY
|
||||||
|
// }
|
||||||
|
func AxpyInc(alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr)
|
||||||
|
|
||||||
|
// AxpyIncTo is
|
||||||
|
// for i := 0; i < int(n); i++ {
|
||||||
|
// dst[idst] = alpha*x[ix] + y[iy]
|
||||||
|
// ix += incX
|
||||||
|
// iy += incY
|
||||||
|
// idst += incDst
|
||||||
|
// }
|
||||||
|
func AxpyIncTo(dst []complex128, incDst, idst uintptr, alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr)
|
||||||
|
|
||||||
|
// DscalUnitary is
|
||||||
|
// for i, v := range x {
|
||||||
|
// x[i] = complex(real(v)*alpha, imag(v)*alpha)
|
||||||
|
// }
|
||||||
|
func DscalUnitary(alpha float64, x []complex128)
|
||||||
|
|
||||||
|
// DscalInc is
|
||||||
|
// var ix uintptr
|
||||||
|
// for i := 0; i < int(n); i++ {
|
||||||
|
// x[ix] = complex(real(x[ix])*alpha, imag(x[ix])*alpha)
|
||||||
|
// ix += inc
|
||||||
|
// }
|
||||||
|
func DscalInc(alpha float64, x []complex128, n, inc uintptr)
|
||||||
|
|
||||||
|
// ScalInc is
|
||||||
|
// var ix uintptr
|
||||||
|
// for i := 0; i < int(n); i++ {
|
||||||
|
// x[ix] *= alpha
|
||||||
|
// ix += incX
|
||||||
|
// }
|
||||||
|
func ScalInc(alpha complex128, x []complex128, n, inc uintptr)
|
||||||
|
|
||||||
|
// ScalUnitary is
|
||||||
|
// for i := range x {
|
||||||
|
// x[i] *= alpha
|
||||||
|
// }
|
||||||
|
func ScalUnitary(alpha complex128, x []complex128)
|
||||||
|
|
||||||
|
// DotcUnitary is
|
||||||
|
// for i, v := range x {
|
||||||
|
// sum += y[i] * cmplx.Conj(v)
|
||||||
|
// }
|
||||||
|
// return sum
|
||||||
|
func DotcUnitary(x, y []complex128) (sum complex128)
|
||||||
|
|
||||||
|
// DotcInc is
|
||||||
|
// for i := 0; i < int(n); i++ {
|
||||||
|
// sum += y[iy] * cmplx.Conj(x[ix])
|
||||||
|
// ix += incX
|
||||||
|
// iy += incY
|
||||||
|
// }
|
||||||
|
// return sum
|
||||||
|
func DotcInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128)
|
||||||
|
|
||||||
|
// DotuUnitary is
|
||||||
|
// for i, v := range x {
|
||||||
|
// sum += y[i] * v
|
||||||
|
// }
|
||||||
|
// return sum
|
||||||
|
func DotuUnitary(x, y []complex128) (sum complex128)
|
||||||
|
|
||||||
|
// DotuInc is
|
||||||
|
// for i := 0; i < int(n); i++ {
|
||||||
|
// sum += y[iy] * x[ix]
|
||||||
|
// ix += incX
|
||||||
|
// iy += incY
|
||||||
|
// }
|
||||||
|
// return sum
|
||||||
|
func DotuInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128)
|
||||||
|
|
@ -0,0 +1,163 @@
|
||||||
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
//+build !amd64 noasm appengine
|
||||||
|
|
||||||
|
package c128
|
||||||
|
|
||||||
|
import "math/cmplx"
|
||||||
|
|
||||||
|
// AxpyUnitary is
|
||||||
|
// for i, v := range x {
|
||||||
|
// y[i] += alpha * v
|
||||||
|
// }
|
||||||
|
func AxpyUnitary(alpha complex128, x, y []complex128) {
|
||||||
|
for i, v := range x {
|
||||||
|
y[i] += alpha * v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// AxpyUnitaryTo is
|
||||||
|
// for i, v := range x {
|
||||||
|
// dst[i] = alpha*v + y[i]
|
||||||
|
// }
|
||||||
|
func AxpyUnitaryTo(dst []complex128, alpha complex128, x, y []complex128) {
|
||||||
|
for i, v := range x {
|
||||||
|
dst[i] = alpha*v + y[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// AxpyInc is
|
||||||
|
// for i := 0; i < int(n); i++ {
|
||||||
|
// y[iy] += alpha * x[ix]
|
||||||
|
// ix += incX
|
||||||
|
// iy += incY
|
||||||
|
// }
|
||||||
|
func AxpyInc(alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr) {
|
||||||
|
for i := 0; i < int(n); i++ {
|
||||||
|
y[iy] += alpha * x[ix]
|
||||||
|
ix += incX
|
||||||
|
iy += incY
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// AxpyIncTo is
|
||||||
|
// for i := 0; i < int(n); i++ {
|
||||||
|
// dst[idst] = alpha*x[ix] + y[iy]
|
||||||
|
// ix += incX
|
||||||
|
// iy += incY
|
||||||
|
// idst += incDst
|
||||||
|
// }
|
||||||
|
func AxpyIncTo(dst []complex128, incDst, idst uintptr, alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr) {
|
||||||
|
for i := 0; i < int(n); i++ {
|
||||||
|
dst[idst] = alpha*x[ix] + y[iy]
|
||||||
|
ix += incX
|
||||||
|
iy += incY
|
||||||
|
idst += incDst
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// DscalUnitary is
|
||||||
|
// for i, v := range x {
|
||||||
|
// x[i] = complex(real(v)*alpha, imag(v)*alpha)
|
||||||
|
// }
|
||||||
|
func DscalUnitary(alpha float64, x []complex128) {
|
||||||
|
for i, v := range x {
|
||||||
|
x[i] = complex(real(v)*alpha, imag(v)*alpha)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// DscalInc is
|
||||||
|
// var ix uintptr
|
||||||
|
// for i := 0; i < int(n); i++ {
|
||||||
|
// x[ix] = complex(real(x[ix])*alpha, imag(x[ix])*alpha)
|
||||||
|
// ix += inc
|
||||||
|
// }
|
||||||
|
func DscalInc(alpha float64, x []complex128, n, inc uintptr) {
|
||||||
|
var ix uintptr
|
||||||
|
for i := 0; i < int(n); i++ {
|
||||||
|
x[ix] = complex(real(x[ix])*alpha, imag(x[ix])*alpha)
|
||||||
|
ix += inc
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ScalInc is
|
||||||
|
// var ix uintptr
|
||||||
|
// for i := 0; i < int(n); i++ {
|
||||||
|
// x[ix] *= alpha
|
||||||
|
// ix += incX
|
||||||
|
// }
|
||||||
|
func ScalInc(alpha complex128, x []complex128, n, inc uintptr) {
|
||||||
|
var ix uintptr
|
||||||
|
for i := 0; i < int(n); i++ {
|
||||||
|
x[ix] *= alpha
|
||||||
|
ix += inc
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ScalUnitary is
|
||||||
|
// for i := range x {
|
||||||
|
// x[i] *= alpha
|
||||||
|
// }
|
||||||
|
func ScalUnitary(alpha complex128, x []complex128) {
|
||||||
|
for i := range x {
|
||||||
|
x[i] *= alpha
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// DotcUnitary is
|
||||||
|
// for i, v := range x {
|
||||||
|
// sum += y[i] * cmplx.Conj(v)
|
||||||
|
// }
|
||||||
|
// return sum
|
||||||
|
func DotcUnitary(x, y []complex128) (sum complex128) {
|
||||||
|
for i, v := range x {
|
||||||
|
sum += y[i] * cmplx.Conj(v)
|
||||||
|
}
|
||||||
|
return sum
|
||||||
|
}
|
||||||
|
|
||||||
|
// DotcInc is
|
||||||
|
// for i := 0; i < int(n); i++ {
|
||||||
|
// sum += y[iy] * cmplx.Conj(x[ix])
|
||||||
|
// ix += incX
|
||||||
|
// iy += incY
|
||||||
|
// }
|
||||||
|
// return sum
|
||||||
|
func DotcInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128) {
|
||||||
|
for i := 0; i < int(n); i++ {
|
||||||
|
sum += y[iy] * cmplx.Conj(x[ix])
|
||||||
|
ix += incX
|
||||||
|
iy += incY
|
||||||
|
}
|
||||||
|
return sum
|
||||||
|
}
|
||||||
|
|
||||||
|
// DotuUnitary is
|
||||||
|
// for i, v := range x {
|
||||||
|
// sum += y[i] * v
|
||||||
|
// }
|
||||||
|
// return sum
|
||||||
|
func DotuUnitary(x, y []complex128) (sum complex128) {
|
||||||
|
for i, v := range x {
|
||||||
|
sum += y[i] * v
|
||||||
|
}
|
||||||
|
return sum
|
||||||
|
}
|
||||||
|
|
||||||
|
// DotuInc is
|
||||||
|
// for i := 0; i < int(n); i++ {
|
||||||
|
// sum += y[iy] * x[ix]
|
||||||
|
// ix += incX
|
||||||
|
// iy += incY
|
||||||
|
// }
|
||||||
|
// return sum
|
||||||
|
func DotuInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128) {
|
||||||
|
for i := 0; i < int(n); i++ {
|
||||||
|
sum += y[iy] * x[ix]
|
||||||
|
ix += incX
|
||||||
|
iy += incY
|
||||||
|
}
|
||||||
|
return sum
|
||||||
|
}
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
|
@ -0,0 +1,91 @@
|
||||||
|
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
//+build !noasm,!appengine
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
#define X_PTR SI
|
||||||
|
#define Y_PTR DI
|
||||||
|
#define LEN CX
|
||||||
|
#define TAIL BX
|
||||||
|
#define INC_X R8
|
||||||
|
#define INCx3_X R10
|
||||||
|
#define INC_Y R9
|
||||||
|
#define INCx3_Y R11
|
||||||
|
#define SUM X0
|
||||||
|
#define P_SUM X1
|
||||||
|
|
||||||
|
// func DdotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float64)
|
||||||
|
TEXT ·DdotInc(SB), NOSPLIT, $0
|
||||||
|
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
|
||||||
|
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
|
||||||
|
MOVQ n+48(FP), LEN // LEN = n
|
||||||
|
PXOR SUM, SUM // SUM = 0
|
||||||
|
CMPQ LEN, $0
|
||||||
|
JE dot_end
|
||||||
|
|
||||||
|
MOVQ ix+72(FP), INC_X // INC_X = ix
|
||||||
|
MOVQ iy+80(FP), INC_Y // INC_Y = iy
|
||||||
|
LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(x[ix])
|
||||||
|
LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(y[iy])
|
||||||
|
|
||||||
|
MOVQ incX+56(FP), INC_X // INC_X = incX * sizeof(float32)
|
||||||
|
SHLQ $2, INC_X
|
||||||
|
MOVQ incY+64(FP), INC_Y // INC_Y = incY * sizeof(float32)
|
||||||
|
SHLQ $2, INC_Y
|
||||||
|
|
||||||
|
MOVQ LEN, TAIL
|
||||||
|
ANDQ $3, TAIL // TAIL = LEN % 4
|
||||||
|
SHRQ $2, LEN // LEN = floor( LEN / 4 )
|
||||||
|
JZ dot_tail // if LEN == 0 { goto dot_tail }
|
||||||
|
|
||||||
|
PXOR P_SUM, P_SUM // P_SUM = 0 for pipelining
|
||||||
|
LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = INC_X * 3
|
||||||
|
LEAQ (INC_Y)(INC_Y*2), INCx3_Y // INCx3_Y = INC_Y * 3
|
||||||
|
|
||||||
|
dot_loop: // Loop unrolled 4x do {
|
||||||
|
CVTSS2SD (X_PTR), X2 // X_i = x[i:i+1]
|
||||||
|
CVTSS2SD (X_PTR)(INC_X*1), X3
|
||||||
|
CVTSS2SD (X_PTR)(INC_X*2), X4
|
||||||
|
CVTSS2SD (X_PTR)(INCx3_X*1), X5
|
||||||
|
|
||||||
|
CVTSS2SD (Y_PTR), X6 // X_j = y[i:i+1]
|
||||||
|
CVTSS2SD (Y_PTR)(INC_Y*1), X7
|
||||||
|
CVTSS2SD (Y_PTR)(INC_Y*2), X8
|
||||||
|
CVTSS2SD (Y_PTR)(INCx3_Y*1), X9
|
||||||
|
|
||||||
|
MULSD X6, X2 // X_i *= X_j
|
||||||
|
MULSD X7, X3
|
||||||
|
MULSD X8, X4
|
||||||
|
MULSD X9, X5
|
||||||
|
|
||||||
|
ADDSD X2, SUM // SUM += X_i
|
||||||
|
ADDSD X3, P_SUM
|
||||||
|
ADDSD X4, SUM
|
||||||
|
ADDSD X5, P_SUM
|
||||||
|
|
||||||
|
LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[INC_X * 4])
|
||||||
|
LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(Y_PTR[INC_Y * 4])
|
||||||
|
|
||||||
|
DECQ LEN
|
||||||
|
JNZ dot_loop // } while --LEN > 0
|
||||||
|
|
||||||
|
ADDSD P_SUM, SUM // SUM += P_SUM
|
||||||
|
CMPQ TAIL, $0 // if TAIL == 0 { return }
|
||||||
|
JE dot_end
|
||||||
|
|
||||||
|
dot_tail: // do {
|
||||||
|
CVTSS2SD (X_PTR), X2 // X2 = x[i]
|
||||||
|
CVTSS2SD (Y_PTR), X3 // X2 *= y[i]
|
||||||
|
MULSD X3, X2
|
||||||
|
ADDSD X2, SUM // SUM += X2
|
||||||
|
ADDQ INC_X, X_PTR // X_PTR += INC_X
|
||||||
|
ADDQ INC_Y, Y_PTR // Y_PTR += INC_Y
|
||||||
|
DECQ TAIL
|
||||||
|
JNZ dot_tail // } while --TAIL > 0
|
||||||
|
|
||||||
|
dot_end:
|
||||||
|
MOVSD SUM, sum+88(FP) // return SUM
|
||||||
|
RET
|
||||||
|
|
@ -0,0 +1,110 @@
|
||||||
|
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
//+build !noasm,!appengine
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
#define HADDPD_SUM_SUM LONG $0xC07C0F66 // @ HADDPD X0, X0
|
||||||
|
|
||||||
|
#define X_PTR SI
|
||||||
|
#define Y_PTR DI
|
||||||
|
#define LEN CX
|
||||||
|
#define TAIL BX
|
||||||
|
#define IDX AX
|
||||||
|
#define SUM X0
|
||||||
|
#define P_SUM X1
|
||||||
|
|
||||||
|
// func DdotUnitary(x, y []float32) (sum float32)
|
||||||
|
TEXT ·DdotUnitary(SB), NOSPLIT, $0
|
||||||
|
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
|
||||||
|
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
|
||||||
|
MOVQ x_len+8(FP), LEN // LEN = min( len(x), len(y) )
|
||||||
|
CMPQ y_len+32(FP), LEN
|
||||||
|
CMOVQLE y_len+32(FP), LEN
|
||||||
|
PXOR SUM, SUM // psum = 0
|
||||||
|
CMPQ LEN, $0
|
||||||
|
JE dot_end
|
||||||
|
|
||||||
|
XORQ IDX, IDX
|
||||||
|
MOVQ Y_PTR, DX
|
||||||
|
ANDQ $0xF, DX // Align on 16-byte boundary for ADDPS
|
||||||
|
JZ dot_no_trim // if DX == 0 { goto dot_no_trim }
|
||||||
|
|
||||||
|
SUBQ $16, DX
|
||||||
|
|
||||||
|
dot_align: // Trim first value(s) in unaligned buffer do {
|
||||||
|
CVTSS2SD (X_PTR)(IDX*4), X2 // X2 = float64(x[i])
|
||||||
|
CVTSS2SD (Y_PTR)(IDX*4), X3 // X3 = float64(y[i])
|
||||||
|
MULSD X3, X2
|
||||||
|
ADDSD X2, SUM // SUM += X2
|
||||||
|
INCQ IDX // IDX++
|
||||||
|
DECQ LEN
|
||||||
|
JZ dot_end // if --TAIL == 0 { return }
|
||||||
|
ADDQ $4, DX
|
||||||
|
JNZ dot_align // } while --LEN > 0
|
||||||
|
|
||||||
|
dot_no_trim:
|
||||||
|
PXOR P_SUM, P_SUM // P_SUM = 0 for pipelining
|
||||||
|
MOVQ LEN, TAIL
|
||||||
|
ANDQ $0x7, TAIL // TAIL = LEN % 8
|
||||||
|
SHRQ $3, LEN // LEN = floor( LEN / 8 )
|
||||||
|
JZ dot_tail_start // if LEN == 0 { goto dot_tail_start }
|
||||||
|
|
||||||
|
dot_loop: // Loop unrolled 8x do {
|
||||||
|
CVTPS2PD (X_PTR)(IDX*4), X2 // X_i = x[i:i+1]
|
||||||
|
CVTPS2PD 8(X_PTR)(IDX*4), X3
|
||||||
|
CVTPS2PD 16(X_PTR)(IDX*4), X4
|
||||||
|
CVTPS2PD 24(X_PTR)(IDX*4), X5
|
||||||
|
|
||||||
|
CVTPS2PD (Y_PTR)(IDX*4), X6 // X_j = y[i:i+1]
|
||||||
|
CVTPS2PD 8(Y_PTR)(IDX*4), X7
|
||||||
|
CVTPS2PD 16(Y_PTR)(IDX*4), X8
|
||||||
|
CVTPS2PD 24(Y_PTR)(IDX*4), X9
|
||||||
|
|
||||||
|
MULPD X6, X2 // X_i *= X_j
|
||||||
|
MULPD X7, X3
|
||||||
|
MULPD X8, X4
|
||||||
|
MULPD X9, X5
|
||||||
|
|
||||||
|
ADDPD X2, SUM // SUM += X_i
|
||||||
|
ADDPD X3, P_SUM
|
||||||
|
ADDPD X4, SUM
|
||||||
|
ADDPD X5, P_SUM
|
||||||
|
|
||||||
|
ADDQ $8, IDX // IDX += 8
|
||||||
|
DECQ LEN
|
||||||
|
JNZ dot_loop // } while --LEN > 0
|
||||||
|
|
||||||
|
ADDPD P_SUM, SUM // SUM += P_SUM
|
||||||
|
CMPQ TAIL, $0 // if TAIL == 0 { return }
|
||||||
|
JE dot_end
|
||||||
|
|
||||||
|
dot_tail_start:
|
||||||
|
MOVQ TAIL, LEN
|
||||||
|
SHRQ $1, LEN
|
||||||
|
JZ dot_tail_one
|
||||||
|
|
||||||
|
dot_tail_two:
|
||||||
|
CVTPS2PD (X_PTR)(IDX*4), X2 // X_i = x[i:i+1]
|
||||||
|
CVTPS2PD (Y_PTR)(IDX*4), X6 // X_j = y[i:i+1]
|
||||||
|
MULPD X6, X2 // X_i *= X_j
|
||||||
|
ADDPD X2, SUM // SUM += X_i
|
||||||
|
ADDQ $2, IDX // IDX += 2
|
||||||
|
DECQ LEN
|
||||||
|
JNZ dot_tail_two // } while --LEN > 0
|
||||||
|
|
||||||
|
ANDQ $1, TAIL
|
||||||
|
JZ dot_end
|
||||||
|
|
||||||
|
dot_tail_one:
|
||||||
|
CVTSS2SD (X_PTR)(IDX*4), X2 // X2 = float64(x[i])
|
||||||
|
CVTSS2SD (Y_PTR)(IDX*4), X3 // X3 = float64(y[i])
|
||||||
|
MULSD X3, X2 // X2 *= X3
|
||||||
|
ADDSD X2, SUM // SUM += X2
|
||||||
|
|
||||||
|
dot_end:
|
||||||
|
HADDPD_SUM_SUM // SUM = \sum{ SUM[i] }
|
||||||
|
MOVSD SUM, sum+48(FP) // return SUM
|
||||||
|
RET
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
// Copyright ©2017 The gonum Authors. All rights reserved.
|
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
// Package f32 provides float32 vector primitives.
|
// Package f32 provides float32 vector primitives.
|
||||||
package f32
|
package f32 // import "gonum.org/v1/gonum/internal/asm/f32"
|
||||||
|
|
@ -0,0 +1,85 @@
|
||||||
|
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
//+build !noasm,!appengine
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
#define X_PTR SI
|
||||||
|
#define Y_PTR DI
|
||||||
|
#define LEN CX
|
||||||
|
#define TAIL BX
|
||||||
|
#define INC_X R8
|
||||||
|
#define INCx3_X R10
|
||||||
|
#define INC_Y R9
|
||||||
|
#define INCx3_Y R11
|
||||||
|
#define SUM X0
|
||||||
|
#define P_SUM X1
|
||||||
|
|
||||||
|
// func DotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float32)
|
||||||
|
TEXT ·DotInc(SB), NOSPLIT, $0
|
||||||
|
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
|
||||||
|
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
|
||||||
|
PXOR SUM, SUM // SUM = 0
|
||||||
|
MOVQ n+48(FP), LEN // LEN = n
|
||||||
|
CMPQ LEN, $0
|
||||||
|
JE dot_end
|
||||||
|
|
||||||
|
MOVQ ix+72(FP), INC_X // INC_X = ix
|
||||||
|
MOVQ iy+80(FP), INC_Y // INC_Y = iy
|
||||||
|
LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(x[ix])
|
||||||
|
LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(y[iy])
|
||||||
|
|
||||||
|
MOVQ incX+56(FP), INC_X // INC_X := incX * sizeof(float32)
|
||||||
|
SHLQ $2, INC_X
|
||||||
|
MOVQ incY+64(FP), INC_Y // INC_Y := incY * sizeof(float32)
|
||||||
|
SHLQ $2, INC_Y
|
||||||
|
|
||||||
|
MOVQ LEN, TAIL
|
||||||
|
ANDQ $0x3, TAIL // TAIL = LEN % 4
|
||||||
|
SHRQ $2, LEN // LEN = floor( LEN / 4 )
|
||||||
|
JZ dot_tail // if LEN == 0 { goto dot_tail }
|
||||||
|
|
||||||
|
PXOR P_SUM, P_SUM // P_SUM = 0 for pipelining
|
||||||
|
LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = INC_X * 3
|
||||||
|
LEAQ (INC_Y)(INC_Y*2), INCx3_Y // INCx3_Y = INC_Y * 3
|
||||||
|
|
||||||
|
dot_loop: // Loop unrolled 4x do {
|
||||||
|
MOVSS (X_PTR), X2 // X_i = x[i:i+1]
|
||||||
|
MOVSS (X_PTR)(INC_X*1), X3
|
||||||
|
MOVSS (X_PTR)(INC_X*2), X4
|
||||||
|
MOVSS (X_PTR)(INCx3_X*1), X5
|
||||||
|
|
||||||
|
MULSS (Y_PTR), X2 // X_i *= y[i:i+1]
|
||||||
|
MULSS (Y_PTR)(INC_Y*1), X3
|
||||||
|
MULSS (Y_PTR)(INC_Y*2), X4
|
||||||
|
MULSS (Y_PTR)(INCx3_Y*1), X5
|
||||||
|
|
||||||
|
ADDSS X2, SUM // SUM += X_i
|
||||||
|
ADDSS X3, P_SUM
|
||||||
|
ADDSS X4, SUM
|
||||||
|
ADDSS X5, P_SUM
|
||||||
|
|
||||||
|
LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[INC_X * 4])
|
||||||
|
LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(Y_PTR[INC_Y * 4])
|
||||||
|
|
||||||
|
DECQ LEN
|
||||||
|
JNZ dot_loop // } while --LEN > 0
|
||||||
|
|
||||||
|
ADDSS P_SUM, SUM // P_SUM += SUM
|
||||||
|
CMPQ TAIL, $0 // if TAIL == 0 { return }
|
||||||
|
JE dot_end
|
||||||
|
|
||||||
|
dot_tail: // do {
|
||||||
|
MOVSS (X_PTR), X2 // X2 = x[i]
|
||||||
|
MULSS (Y_PTR), X2 // X2 *= y[i]
|
||||||
|
ADDSS X2, SUM // SUM += X2
|
||||||
|
ADDQ INC_X, X_PTR // X_PTR += INC_X
|
||||||
|
ADDQ INC_Y, Y_PTR // Y_PTR += INC_Y
|
||||||
|
DECQ TAIL
|
||||||
|
JNZ dot_tail // } while --TAIL > 0
|
||||||
|
|
||||||
|
dot_end:
|
||||||
|
MOVSS SUM, sum+88(FP) // return SUM
|
||||||
|
RET
|
||||||
|
|
@ -0,0 +1,106 @@
|
||||||
|
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
//+build !noasm,!appengine
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
#define HADDPS_SUM_SUM LONG $0xC07C0FF2 // @ HADDPS X0, X0
|
||||||
|
|
||||||
|
#define X_PTR SI
|
||||||
|
#define Y_PTR DI
|
||||||
|
#define LEN CX
|
||||||
|
#define TAIL BX
|
||||||
|
#define IDX AX
|
||||||
|
#define SUM X0
|
||||||
|
#define P_SUM X1
|
||||||
|
|
||||||
|
// func DotUnitary(x, y []float32) (sum float32)
|
||||||
|
TEXT ·DotUnitary(SB), NOSPLIT, $0
|
||||||
|
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
|
||||||
|
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
|
||||||
|
PXOR SUM, SUM // SUM = 0
|
||||||
|
MOVQ x_len+8(FP), LEN // LEN = min( len(x), len(y) )
|
||||||
|
CMPQ y_len+32(FP), LEN
|
||||||
|
CMOVQLE y_len+32(FP), LEN
|
||||||
|
CMPQ LEN, $0
|
||||||
|
JE dot_end
|
||||||
|
|
||||||
|
XORQ IDX, IDX
|
||||||
|
MOVQ Y_PTR, DX
|
||||||
|
ANDQ $0xF, DX // Align on 16-byte boundary for MULPS
|
||||||
|
JZ dot_no_trim // if DX == 0 { goto dot_no_trim }
|
||||||
|
SUBQ $16, DX
|
||||||
|
|
||||||
|
dot_align: // Trim first value(s) in unaligned buffer do {
|
||||||
|
MOVSS (X_PTR)(IDX*4), X2 // X2 = x[i]
|
||||||
|
MULSS (Y_PTR)(IDX*4), X2 // X2 *= y[i]
|
||||||
|
ADDSS X2, SUM // SUM += X2
|
||||||
|
INCQ IDX // IDX++
|
||||||
|
DECQ LEN
|
||||||
|
JZ dot_end // if --TAIL == 0 { return }
|
||||||
|
ADDQ $4, DX
|
||||||
|
JNZ dot_align // } while --DX > 0
|
||||||
|
|
||||||
|
dot_no_trim:
|
||||||
|
PXOR P_SUM, P_SUM // P_SUM = 0 for pipelining
|
||||||
|
MOVQ LEN, TAIL
|
||||||
|
ANDQ $0xF, TAIL // TAIL = LEN % 16
|
||||||
|
SHRQ $4, LEN // LEN = floor( LEN / 16 )
|
||||||
|
JZ dot_tail4_start // if LEN == 0 { goto dot_tail4_start }
|
||||||
|
|
||||||
|
dot_loop: // Loop unrolled 16x do {
|
||||||
|
MOVUPS (X_PTR)(IDX*4), X2 // X_i = x[i:i+1]
|
||||||
|
MOVUPS 16(X_PTR)(IDX*4), X3
|
||||||
|
MOVUPS 32(X_PTR)(IDX*4), X4
|
||||||
|
MOVUPS 48(X_PTR)(IDX*4), X5
|
||||||
|
|
||||||
|
MULPS (Y_PTR)(IDX*4), X2 // X_i *= y[i:i+1]
|
||||||
|
MULPS 16(Y_PTR)(IDX*4), X3
|
||||||
|
MULPS 32(Y_PTR)(IDX*4), X4
|
||||||
|
MULPS 48(Y_PTR)(IDX*4), X5
|
||||||
|
|
||||||
|
ADDPS X2, SUM // SUM += X_i
|
||||||
|
ADDPS X3, P_SUM
|
||||||
|
ADDPS X4, SUM
|
||||||
|
ADDPS X5, P_SUM
|
||||||
|
|
||||||
|
ADDQ $16, IDX // IDX += 16
|
||||||
|
DECQ LEN
|
||||||
|
JNZ dot_loop // } while --LEN > 0
|
||||||
|
|
||||||
|
ADDPS P_SUM, SUM // SUM += P_SUM
|
||||||
|
CMPQ TAIL, $0 // if TAIL == 0 { return }
|
||||||
|
JE dot_end
|
||||||
|
|
||||||
|
dot_tail4_start: // Reset loop counter for 4-wide tail loop
|
||||||
|
MOVQ TAIL, LEN // LEN = floor( TAIL / 4 )
|
||||||
|
SHRQ $2, LEN
|
||||||
|
JZ dot_tail_start // if LEN == 0 { goto dot_tail_start }
|
||||||
|
|
||||||
|
dot_tail4_loop: // Loop unrolled 4x do {
|
||||||
|
MOVUPS (X_PTR)(IDX*4), X2 // X_i = x[i:i+1]
|
||||||
|
MULPS (Y_PTR)(IDX*4), X2 // X_i *= y[i:i+1]
|
||||||
|
ADDPS X2, SUM // SUM += X_i
|
||||||
|
ADDQ $4, IDX // i += 4
|
||||||
|
DECQ LEN
|
||||||
|
JNZ dot_tail4_loop // } while --LEN > 0
|
||||||
|
|
||||||
|
dot_tail_start: // Reset loop counter for 1-wide tail loop
|
||||||
|
ANDQ $3, TAIL // TAIL = TAIL % 4
|
||||||
|
JZ dot_end // if TAIL == 0 { return }
|
||||||
|
|
||||||
|
dot_tail: // do {
|
||||||
|
MOVSS (X_PTR)(IDX*4), X2 // X2 = x[i]
|
||||||
|
MULSS (Y_PTR)(IDX*4), X2 // X2 *= y[i]
|
||||||
|
ADDSS X2, SUM // psum += X2
|
||||||
|
INCQ IDX // IDX++
|
||||||
|
DECQ TAIL
|
||||||
|
JNZ dot_tail // } while --TAIL > 0
|
||||||
|
|
||||||
|
dot_end:
|
||||||
|
HADDPS_SUM_SUM // SUM = \sum{ SUM[i] }
|
||||||
|
HADDPS_SUM_SUM
|
||||||
|
MOVSS SUM, sum+48(FP) // return SUM
|
||||||
|
RET
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
|
@ -34,3 +34,35 @@ func AxpyInc(alpha float32, x, y []float32, n, incX, incY, ix, iy uintptr)
|
||||||
// idst += incDst
|
// idst += incDst
|
||||||
// }
|
// }
|
||||||
func AxpyIncTo(dst []float32, incDst, idst uintptr, alpha float32, x, y []float32, n, incX, incY, ix, iy uintptr)
|
func AxpyIncTo(dst []float32, incDst, idst uintptr, alpha float32, x, y []float32, n, incX, incY, ix, iy uintptr)
|
||||||
|
|
||||||
|
// DdotUnitary is
|
||||||
|
// for i, v := range x {
|
||||||
|
// sum += float64(y[i]) * float64(v)
|
||||||
|
// }
|
||||||
|
// return
|
||||||
|
func DdotUnitary(x, y []float32) (sum float64)
|
||||||
|
|
||||||
|
// DdotInc is
|
||||||
|
// for i := 0; i < int(n); i++ {
|
||||||
|
// sum += float64(y[iy]) * float64(x[ix])
|
||||||
|
// ix += incX
|
||||||
|
// iy += incY
|
||||||
|
// }
|
||||||
|
// return
|
||||||
|
func DdotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float64)
|
||||||
|
|
||||||
|
// DotUnitary is
|
||||||
|
// for i, v := range x {
|
||||||
|
// sum += y[i] * v
|
||||||
|
// }
|
||||||
|
// return sum
|
||||||
|
func DotUnitary(x, y []float32) (sum float32)
|
||||||
|
|
||||||
|
// DotInc is
|
||||||
|
// for i := 0; i < int(n); i++ {
|
||||||
|
// sum += y[iy] * x[ix]
|
||||||
|
// ix += incX
|
||||||
|
// iy += incY
|
||||||
|
// }
|
||||||
|
// return sum
|
||||||
|
func DotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float32)
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
|
@ -55,3 +55,59 @@ func AxpyIncTo(dst []float32, incDst, idst uintptr, alpha float32, x, y []float3
|
||||||
idst += incDst
|
idst += incDst
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DotUnitary is
|
||||||
|
// for i, v := range x {
|
||||||
|
// sum += y[i] * v
|
||||||
|
// }
|
||||||
|
// return sum
|
||||||
|
func DotUnitary(x, y []float32) (sum float32) {
|
||||||
|
for i, v := range x {
|
||||||
|
sum += y[i] * v
|
||||||
|
}
|
||||||
|
return sum
|
||||||
|
}
|
||||||
|
|
||||||
|
// DotInc is
|
||||||
|
// for i := 0; i < int(n); i++ {
|
||||||
|
// sum += y[iy] * x[ix]
|
||||||
|
// ix += incX
|
||||||
|
// iy += incY
|
||||||
|
// }
|
||||||
|
// return sum
|
||||||
|
func DotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float32) {
|
||||||
|
for i := 0; i < int(n); i++ {
|
||||||
|
sum += y[iy] * x[ix]
|
||||||
|
ix += incX
|
||||||
|
iy += incY
|
||||||
|
}
|
||||||
|
return sum
|
||||||
|
}
|
||||||
|
|
||||||
|
// DdotUnitary is
|
||||||
|
// for i, v := range x {
|
||||||
|
// sum += float64(y[i]) * float64(v)
|
||||||
|
// }
|
||||||
|
// return
|
||||||
|
func DdotUnitary(x, y []float32) (sum float64) {
|
||||||
|
for i, v := range x {
|
||||||
|
sum += float64(y[i]) * float64(v)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// DdotInc is
|
||||||
|
// for i := 0; i < int(n); i++ {
|
||||||
|
// sum += float64(y[iy]) * float64(x[ix])
|
||||||
|
// ix += incX
|
||||||
|
// iy += incY
|
||||||
|
// }
|
||||||
|
// return
|
||||||
|
func DdotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float64) {
|
||||||
|
for i := 0; i < int(n); i++ {
|
||||||
|
sum += float64(y[iy]) * float64(x[ix])
|
||||||
|
ix += incX
|
||||||
|
iy += incY
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
//
|
//
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
//
|
//
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
//
|
//
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
//
|
//
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
// Copyright ©2017 The gonum Authors. All rights reserved.
|
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
// Package f64 provides float64 vector primitives.
|
// Package f64 provides float64 vector primitives.
|
||||||
package f64
|
package f64 // import "gonum.org/v1/gonum/internal/asm/f64"
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
//
|
//
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
//
|
//
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
//
|
//
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
//
|
//
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
//
|
//
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright ©2016 The gonum Authors. All rights reserved.
|
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue