dbtester/vendor/github.com/gyuho/dataframe/dataframe.go

427 lines
8.6 KiB
Go

package dataframe
import (
"encoding/csv"
"fmt"
"os"
"sync"
)
// Frame contains data.
type Frame interface {
// Headers returns the slice of headers in order. Header name is unique among its Frame.
Headers() []string
// AddColumn adds a Column to Frame.
AddColumn(c Column) error
// Column returns the Column by its header name.
Column(header string) (Column, error)
// Columns returns all Columns.
Columns() []Column
// CountColumn returns the number of Columns in the Frame.
CountColumn() int
// UpdateHeader updates the header name of a Column.
UpdateHeader(origHeader, newHeader string) error
// MoveColumn moves the column right before the target index.
MoveColumn(header string, target int) error
// DeleteColumn deletes the Column by its header.
DeleteColumn(header string) bool
// CSV saves the Frame to a CSV file.
CSV(fpath string) error
// Rows returns the header and data slices.
Rows() ([]string, [][]string)
// Sort sorts the Frame.
Sort(header string, st SortType, so SortOption) error
}
type frame struct {
mu sync.Mutex
columns []Column
headerTo map[string]int
}
// New returns a new Frame.
func New() Frame {
return &frame{
columns: []Column{},
headerTo: make(map[string]int),
}
}
// NewFromRows creates Frame from rows.
func NewFromRows(header []string, rows [][]string) (Frame, error) {
if len(rows) < 1 {
return nil, fmt.Errorf("empty row %q", rows)
}
fr := New()
headerN := len(header)
if headerN > 0 { // use this as header
// assume no header string at top
cols := make([]Column, headerN)
for i := range cols {
cols[i] = NewColumn(header[i])
}
for _, row := range rows {
rowN := len(row)
if rowN > headerN {
return nil, fmt.Errorf("header %q is not specified correctly for %q", header, row)
}
for j, v := range row {
cols[j].PushBack(NewStringValue(v))
}
if rowN < headerN { // fill in empty values
for k := rowN; k < headerN; k++ {
cols[k].PushBack(NewStringValue(""))
}
}
}
for _, c := range cols {
if err := fr.AddColumn(c); err != nil {
return nil, err
}
}
return fr, nil
}
// use first row as header
// assume header string at top
header = rows[0]
headerN = len(header)
cols := make([]Column, headerN)
for i := range cols {
cols[i] = NewColumn(header[i])
}
for i, row := range rows {
if i == 0 {
continue
}
rowN := len(row)
if rowN > headerN {
return nil, fmt.Errorf("header %q is not specified correctly for %q", header, row)
}
for j, v := range row {
cols[j].PushBack(NewStringValue(v))
}
if rowN < headerN { // fill in empty values
for k := rowN; k < headerN; k++ {
cols[k].PushBack(NewStringValue(""))
}
}
}
for _, c := range cols {
if err := fr.AddColumn(c); err != nil {
return nil, err
}
}
return fr, nil
}
// NewFromCSV creates a new Frame from CSV.
func NewFromCSV(header []string, fpath string) (Frame, error) {
f, err := os.OpenFile(fpath, os.O_RDONLY, 0444)
if err != nil {
return nil, err
}
defer f.Close()
rd := csv.NewReader(f)
// TODO: make this configurable
rd.FieldsPerRecord = -1
rows, err := rd.ReadAll()
if err != nil {
return nil, err
}
return NewFromRows(header, rows)
}
func (f *frame) Headers() []string {
f.mu.Lock()
defer f.mu.Unlock()
rs := make([]string, len(f.headerTo))
for k, v := range f.headerTo {
rs[v] = k
}
return rs
}
func (f *frame) AddColumn(c Column) error {
f.mu.Lock()
defer f.mu.Unlock()
header := c.Header()
if _, ok := f.headerTo[header]; ok {
return fmt.Errorf("%q already exists", header)
}
f.columns = append(f.columns, c)
f.headerTo[header] = len(f.columns) - 1
return nil
}
func (f *frame) Column(header string) (Column, error) {
f.mu.Lock()
defer f.mu.Unlock()
idx, ok := f.headerTo[header]
if !ok {
return nil, fmt.Errorf("%q does not exist", header)
}
return f.columns[idx], nil
}
func (f *frame) Columns() []Column {
f.mu.Lock()
defer f.mu.Unlock()
return f.columns
}
func (f *frame) CountColumn() int {
f.mu.Lock()
defer f.mu.Unlock()
return len(f.columns)
}
func (f *frame) UpdateHeader(origHeader, newHeader string) error {
f.mu.Lock()
defer f.mu.Unlock()
idx, ok := f.headerTo[origHeader]
if !ok {
return fmt.Errorf("%q does not exist", origHeader)
}
if _, ok := f.headerTo[newHeader]; ok {
return fmt.Errorf("%q already exists", newHeader)
}
f.columns[idx].UpdateHeader(newHeader)
f.headerTo[newHeader] = idx
delete(f.headerTo, origHeader)
return nil
}
func (f *frame) MoveColumn(header string, target int) error {
f.mu.Lock()
defer f.mu.Unlock()
if target < 0 || target > len(f.headerTo) {
return fmt.Errorf("%d is out of range", target)
}
oldi, ok := f.headerTo[header]
if !ok {
return fmt.Errorf("%q does not exist", header)
}
if target == oldi {
// no need to insert
return nil
}
var copied []Column
switch {
case target < oldi: // move somewhere to left
// e.g. arr1, oldi 7, target 2
// 0 1 | 2 3 4 5 6 [7] 8 9
// 1. copy[:2]
// 2. arr2[2] = arr1[7]
// 3. copy[3:7]
// 4. copy[8:]
copied = make([]Column, target)
if target == 0 {
copied = []Column{}
} else {
copy(copied, f.columns[:target])
}
copied = append(copied, f.columns[oldi])
// at this point, moved until 'target' index
for i, c := range f.columns {
if i < target || i == oldi { // already moved
continue
}
copied = append(copied, c)
}
case oldi < target: // move somewhere to right
// e.g. arr1, oldi 2, target 8
// 0 1 [2] 3 4 5 6 7 | 8 9
// 1. copy[:2]
// 2. copy[3:8]
// 3. arr2[7] = arr1[2]
// 4. copy[8:]
copied = make([]Column, oldi)
if oldi == 0 {
copied = []Column{}
} else {
copy(copied, f.columns[:oldi])
}
copied = append(copied, f.columns[oldi+1:target]...)
for i, c := range f.columns {
if i != oldi && i < target { // already moved
continue
}
copied = append(copied, c)
}
}
f.columns = copied
// update column index
for i, col := range f.columns {
f.headerTo[col.Header()] = i
}
return nil
}
func (f *frame) DeleteColumn(header string) bool {
f.mu.Lock()
defer f.mu.Unlock()
idx, ok := f.headerTo[header]
if !ok {
return false
}
if idx == 0 && len(f.headerTo) == 1 {
f.headerTo = make(map[string]int)
f.columns = []Column{}
return true
}
copy(f.columns[idx:], f.columns[idx+1:])
f.columns = f.columns[:len(f.columns)-1 : len(f.columns)-1]
// update headerTo
f.headerTo = make(map[string]int)
for i, c := range f.columns {
f.headerTo[c.Header()] = i
}
return true
}
func (f *frame) Rows() ([]string, [][]string) {
f.mu.Lock()
defer f.mu.Unlock()
headers := make([]string, len(f.headerTo))
for k, v := range f.headerTo {
headers[v] = k
}
var rowN int
for _, col := range f.columns {
n := col.CountRow()
if rowN < n {
rowN = n
}
}
rows := make([][]string, rowN)
colN := len(f.columns)
for rowIdx := 0; rowIdx < rowN; rowIdx++ {
row := make([]string, colN)
for colIdx, col := range f.columns { // rowIdx * colIdx
v, err := col.Value(rowIdx)
var elem string
if err == nil {
elem, _ = v.String()
}
row[colIdx] = elem
}
rows[rowIdx] = row
}
return headers, rows
}
func (f *frame) CSV(fpath string) error {
fi, err := os.OpenFile(fpath, os.O_RDWR|os.O_TRUNC, 0777)
if err != nil {
fi, err = os.Create(fpath)
if err != nil {
return err
}
}
defer fi.Close()
wr := csv.NewWriter(fi)
headers, rows := f.Rows()
if err := wr.Write(headers); err != nil {
return err
}
if err := wr.WriteAll(rows); err != nil {
return err
}
wr.Flush()
return wr.Error()
}
// Sort sorts the data frame.
// TODO: use tree?
func (f *frame) Sort(header string, st SortType, so SortOption) error {
f.mu.Lock()
idx, ok := f.headerTo[header]
if !ok {
f.mu.Unlock()
return fmt.Errorf("%q does not exist", header)
}
f.mu.Unlock()
var lesses []LessFunc
switch st {
case SortType_String:
switch so {
case SortOption_Ascending:
lesses = []LessFunc{StringAscendingFunc(idx)}
case SortOption_Descending:
lesses = []LessFunc{StringDescendingFunc(idx)}
}
case SortType_Number:
switch so {
case SortOption_Ascending:
lesses = []LessFunc{NumberAscendingFunc(idx)}
case SortOption_Descending:
lesses = []LessFunc{NumberDescendingFunc(idx)}
}
case SortType_Duration:
switch so {
case SortOption_Ascending:
lesses = []LessFunc{DurationAscendingFunc(idx)}
case SortOption_Descending:
lesses = []LessFunc{DurationDescendingFunc(idx)}
}
}
headers, rows := f.Rows()
SortBy(
rows,
lesses...,
).Sort(rows)
nf, err := NewFromRows(headers, rows)
if err != nil {
return err
}
v, ok := nf.(*frame)
if !ok {
return fmt.Errorf("cannot type assert on frame")
}
*f = *v
return nil
}