Copy disabled (too large)
Download .txt
Showing preview only (11,415K chars total). Download the full file to get everything.
Repository: sjwhitworth/golearn
Branch: master
Commit: 74ae077eafb2
Files: 246
Total size: 68.5 MB
Directory structure:
gitextract_hhyq2fyg/
├── .gitignore
├── .travis.yml
├── Dockerfile
├── LICENSE.md
├── README.md
├── base/
│ ├── arff.go
│ ├── arff_test.go
│ ├── attributes.go
│ ├── attributes_test.go
│ ├── bag.go
│ ├── bag_test.go
│ ├── binary.go
│ ├── categorical.go
│ ├── classifier.go
│ ├── conversion.go
│ ├── csv.go
│ ├── csv_test.go
│ ├── data.go
│ ├── dataframe_go.go
│ ├── dense.go
│ ├── dense_test.go
│ ├── domain.go
│ ├── error.go
│ ├── error_test.go
│ ├── filewrapper.go
│ ├── filtered.go
│ ├── filters.go
│ ├── fixed.go
│ ├── float.go
│ ├── group.go
│ ├── lazy_sort_test.go
│ ├── logger.go
│ ├── mat.go
│ ├── mat_test.go
│ ├── serialize.go
│ ├── serialize_attributes.go
│ ├── serialize_instances.go
│ ├── serialize_test.go
│ ├── sort.go
│ ├── sort_test.go
│ ├── spec.go
│ ├── util.go
│ ├── util_attributes.go
│ ├── util_instances.go
│ ├── util_test.go
│ ├── view.go
│ └── view_test.go
├── clustering/
│ ├── cluster_extra_test.go
│ ├── cluster_test.go
│ ├── clustering.go
│ ├── dbscan.csv
│ ├── dbscan.go
│ ├── dbscan_labels.csv
│ ├── dbscan_test.go
│ ├── em.go
│ ├── em_test.go
│ ├── gaussian_mixture.csv
│ ├── gaussian_mixture.py
│ ├── gaussian_mixture_labels.csv
│ ├── gaussian_mixture_single_obs.csv
│ ├── gen_test.py
│ └── synthetic.csv
├── coverage.sh
├── doc/
│ ├── zh_CN/
│ │ ├── AddingAttributes.md
│ │ ├── AttributeSpecifications.md
│ │ ├── CSVFiles.md
│ │ ├── Classification/
│ │ │ ├── KNN.md
│ │ │ ├── Regression.md
│ │ │ ├── Trees.md
│ │ │ └── liblinear.md
│ │ ├── Contributing.md
│ │ ├── CustomDataGrids.md
│ │ ├── Filtering.md
│ │ ├── FloatAttributePrecision.md
│ │ ├── Home.md
│ │ ├── Installation.md
│ │ └── Instances.md
│ └── zh_TW/
│ ├── AddingAttributes.md
│ ├── AttributeSpecifications.md
│ ├── CSVFiles.md
│ ├── Classification/
│ │ ├── KNN.md
│ │ ├── Regression.md
│ │ ├── Trees.md
│ │ └── liblinear.md
│ ├── Contributing.md
│ ├── CustomDataGrids.md
│ ├── Filtering.md
│ ├── FloatAttributePrecision.md
│ ├── Home.md
│ ├── Installation.md
│ └── Instances.md
├── ensemble/
│ ├── ensemble.go
│ ├── multisvc.go
│ ├── multisvc_test.go
│ ├── randomforest.go
│ └── randomforest_test.go
├── evaluation/
│ ├── confusion.go
│ ├── confusion_test.go
│ ├── cross_fold.go
│ └── cross_fold_test.go
├── examples/
│ ├── averageperceptron/
│ │ └── averageperceptionexample.go
│ ├── crossfold/
│ │ └── rf.go
│ ├── datasets/
│ │ ├── articles.csv
│ │ ├── boston_house_prices.csv
│ │ ├── c45-numeric.csv
│ │ ├── chim.csv
│ │ ├── exam.csv
│ │ ├── exams.csv
│ │ ├── gaussian_outliers.csv
│ │ ├── house-votes-84.csv
│ │ ├── iris.arff
│ │ ├── iris.csv
│ │ ├── iris_binned.csv
│ │ ├── iris_headers.csv
│ │ ├── iris_headers_subset.csv
│ │ ├── iris_sorted_asc.csv
│ │ ├── iris_sorted_desc.csv
│ │ ├── mnist_test.csv
│ │ ├── mnist_train.csv
│ │ ├── randomdata.csv
│ │ ├── sources.txt
│ │ ├── tennis.csv
│ │ ├── titanic.csv
│ │ └── weather.arff
│ ├── instances/
│ │ └── instances.go
│ ├── knnclassifier/
│ │ └── knnclassifier_iris.go
│ ├── serialization/
│ │ └── attributes.go
│ └── trees/
│ ├── cart/
│ │ └── cart.go
│ ├── id3/
│ │ └── trees.go
│ └── isolationForest/
│ └── isolation_forest.go
├── filters/
│ ├── binary.go
│ ├── binary_test.csv
│ ├── binary_test.go
│ ├── binning.go
│ ├── binning_test.go
│ ├── chimerge.go
│ ├── chimerge_freq.go
│ ├── chimerge_funcs.go
│ ├── chimerge_test.go
│ ├── disc.go
│ ├── float.go
│ └── float_test.go
├── go.mod
├── go.sum
├── golearn.go
├── kdtree/
│ ├── heap.go
│ ├── heap_test.go
│ ├── kdtree.go
│ └── kdtree_test.go
├── knn/
│ ├── euclidean.c
│ ├── knn.go
│ ├── knn.h
│ ├── knn_bench_test.go
│ ├── knn_cov_test.go
│ ├── knn_kdtree_test.go
│ ├── knn_opt_euclidean.go
│ ├── knn_test.go
│ ├── knn_test_1.csv
│ ├── knn_test_2.csv
│ ├── knn_test_2_subset.csv
│ ├── knn_train_1.csv
│ ├── knn_train_2.csv
│ ├── knn_weighted_test.go
│ └── temp.cls
├── linear_models/
│ ├── blas.h
│ ├── blasp.h
│ ├── cfuncs.go
│ ├── daxpy.c
│ ├── ddot.c
│ ├── dnrm2.c
│ ├── doc.go
│ ├── dscal.c
│ ├── liblinear.go
│ ├── liblinear_print.go
│ ├── liblinear_print_11.go
│ ├── linear.cpp
│ ├── linear.h
│ ├── linear_models_test.go
│ ├── linear_regression.go
│ ├── linear_regression_test.go
│ ├── linearsvc.go
│ ├── linearsvc_test.go
│ ├── logistic.go
│ ├── logistic_test.go
│ ├── test.csv
│ ├── tmp
│ ├── train.csv
│ ├── tron.cpp
│ ├── tron.h
│ └── util.go
├── meta/
│ ├── bagging.go
│ ├── bagging_test.go
│ ├── meta.go
│ ├── one_v_all.go
│ └── one_v_all_test.go
├── metrics/
│ └── pairwise/
│ ├── chebyshev.go
│ ├── chebyshev_test.go
│ ├── cosine.go
│ ├── cosine_test.go
│ ├── cranberra.go
│ ├── cranberra_test.go
│ ├── euclidean.go
│ ├── euclidean_test.go
│ ├── manhattan.go
│ ├── manhattan_test.go
│ ├── pairwise.go
│ ├── poly_kernel.go
│ ├── poly_kernel_test.go
│ ├── rbf_kernel.go
│ └── rbf_kernel_test.go
├── naive/
│ ├── bernoulli_nb.go
│ ├── bernoulli_nb_test.go
│ ├── naive.go
│ └── test/
│ ├── simple_test.csv
│ └── simple_train.csv
├── neural/
│ ├── funcs.go
│ ├── layered.go
│ ├── layered_test.go
│ ├── network.go
│ ├── network_test.go
│ ├── neural.go
│ └── xor.csv
├── optimisation/
│ └── optimisation.go
├── pca/
│ ├── pca.go
│ └── pca_test.go
├── perceptron/
│ ├── average.go
│ └── average_test.go
├── trees/
│ ├── benchdata.csv
│ ├── cart_classifier.go
│ ├── cart_regressor.go
│ ├── cart_test.go
│ ├── cart_utils.go
│ ├── entropy.go
│ ├── gini.go
│ ├── gr.go
│ ├── id3.go
│ ├── id3_test.go
│ ├── isolation.go
│ ├── isolation_test.go
│ ├── onerow.csv
│ ├── random.go
│ ├── sorter.go
│ ├── tree_bench_test.go
│ ├── tree_test.go
│ └── trees.go
└── utilities/
└── utilities.go
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
# Mac
.DS_Store
# Intellij
.idea/
*.iml
*.iws
# C/C++
*.o
*.a
*.so
*.dll
# go test coverprofiles
*.coverprofile
#vim
*.sw*
================================================
FILE: .travis.yml
================================================
language: go
go:
- 1.13.x
- 1.14.x
env:
# Temporary workaround for Go 1.6+
- GODEBUG=cgocheck=0
before_install:
- sudo apt-get update -qq
- sudo apt-get install -qq libatlas-base-dev
- cd /tmp && wget http://www.csie.ntu.edu.tw/~cjlin/liblinear/oldfiles/liblinear-1.94.tar.gz && tar xf liblinear-1.94.tar.gz && cd liblinear-1.94 && make lib && sudo install -vm644 linear.h /usr/include && sudo install -vm755 liblinear.so.1 /usr/lib && sudo ln -sfv liblinear.so.1 /usr/lib/liblinear.so
- cd $TRAVIS_BUILD_DIR
install:
- go get github.com/smartystreets/goconvey/convey
- go get -v ./...
script:
- ./coverage.sh
after_success:
- bash <(curl -s https://codecov.io/bash)
================================================
FILE: Dockerfile
================================================
FROM alpine
RUN apk update && apk add make gcc linux-headers git perl musl-dev go
RUN git clone https://github.com/xianyi/OpenBLAS && cd OpenBLAS && make && make PREFIX=/usr install
RUN mkdir -p /go/src /go/bin /go/pkg
ENV GOPATH=/go
RUN go get github.com/gonum/blas github.com/sjwhitworth/golearn
================================================
FILE: LICENSE.md
================================================
The MIT License (MIT)
Copyright (c) {{{year}}} {{{fullname}}}
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
GoLearn
=======
<img src="http://talks.golang.org/2013/advconc/gopherhat.jpg" width=125><br>
[](https://godoc.org/github.com/sjwhitworth/golearn)
[](https://travis-ci.org/sjwhitworth/golearn)<br>
[](https://codecov.io/gh/sjwhitworth/golearn)
[](https://www.gittip.com/sjwhitworth/)
GoLearn is a 'batteries included' machine learning library for Go. **Simplicity**, paired with customisability, is the goal.
We are in active development, and would love comments from users out in the wild. Drop us a line on Twitter.
twitter: [@golearn_ml](http://www.twitter.com/golearn_ml)
Install
=======
See [here](https://github.com/sjwhitworth/golearn/wiki/Installation) for installation instructions.
Getting Started
=======
Data are loaded in as Instances. You can then perform matrix like operations on them, and pass them to estimators.
GoLearn implements the scikit-learn interface of Fit/Predict, so you can easily swap out estimators for trial and error.
GoLearn also includes helper functions for data, like cross validation, and train and test splitting.
```go
package main
import (
"fmt"
"github.com/sjwhitworth/golearn/base"
"github.com/sjwhitworth/golearn/evaluation"
"github.com/sjwhitworth/golearn/knn"
)
func main() {
// Load in a dataset, with headers. Header attributes will be stored.
// Think of instances as a Data Frame structure in R or Pandas.
// You can also create instances from scratch.
rawData, err := base.ParseCSVToInstances("datasets/iris.csv", true)
if err != nil {
panic(err)
}
// Print a pleasant summary of your data.
fmt.Println(rawData)
//Initialises a new KNN classifier
cls := knn.NewKnnClassifier("euclidean", "linear", 2)
//Do a training-test split
trainData, testData := base.InstancesTrainTestSplit(rawData, 0.50)
cls.Fit(trainData)
//Calculates the Euclidean distance and returns the most popular label
predictions, err := cls.Predict(testData)
if err != nil {
panic(err)
}
// Prints precision/recall metrics
confusionMat, err := evaluation.GetConfusionMatrix(testData, predictions)
if err != nil {
panic(fmt.Sprintf("Unable to get confusion matrix: %s", err.Error()))
}
fmt.Println(evaluation.GetSummary(confusionMat))
}
```
```
Iris-virginica 28 2 56 0.9333 0.9333 0.9333
Iris-setosa 29 0 59 1.0000 1.0000 1.0000
Iris-versicolor 27 2 57 0.9310 0.9310 0.9310
Overall accuracy: 0.9545
```
Examples
========
GoLearn comes with practical examples. Dive in and see what is going on.
```bash
cd $GOPATH/src/github.com/sjwhitworth/golearn/examples/knnclassifier
go run knnclassifier_iris.go
```
```bash
cd $GOPATH/src/github.com/sjwhitworth/golearn/examples/instances
go run instances.go
```
```bash
cd $GOPATH/src/github.com/sjwhitworth/golearn/examples/trees
go run trees.go
```
Docs
====
* [English](https://github.com/sjwhitworth/golearn/wiki)
* [中文文档(简体)](doc/zh_CN/Home.md)
* [中文文档(繁体)](doc/zh_TW/Home.md)
Join the team
=============
Please send me a mail at stephenjameswhitworth@gmail.com
================================================
FILE: base/arff.go
================================================
package base
import (
"bufio"
"bytes"
"encoding/csv"
"fmt"
"io"
"os"
"runtime"
"strings"
)
// SerializeInstancesToDenseARFF writes the given FixedDataGrid to a
// densely-formatted ARFF file.
func SerializeInstancesToDenseARFF(inst FixedDataGrid, path, relation string) error {
// Get all of the Attributes in a reasonable order
attrs := NonClassAttributes(inst)
cAttrs := inst.AllClassAttributes()
for _, c := range cAttrs {
attrs = append(attrs, c)
}
return SerializeInstancesToDenseARFFWithAttributes(inst, attrs, path, relation)
}
// SerializeInstancesToDenseARFFWithAttributes writes the given FixedDataGrid to a
// densely-formatted ARFF file with the header Attributes in the order given.
func SerializeInstancesToDenseARFFWithAttributes(inst FixedDataGrid, rawAttrs []Attribute, path, relation string) error {
// Open output file
f, err := os.OpenFile(path, os.O_RDWR, 0600)
if err != nil {
return err
}
defer f.Close()
return SerializeInstancesToWriterDenseARFFWithAttributes(f, inst, rawAttrs, relation)
}
func SerializeInstancesToWriterDenseARFFWithAttributes(w io.Writer, inst FixedDataGrid, rawAttrs []Attribute, relation string) error {
// Write @relation header
fmt.Fprintf(w, "@relation %s\n\n", relation)
// Get all Attribute specifications
attrs := ResolveAttributes(inst, rawAttrs)
// Write Attribute information
for _, s := range attrs {
attr := s.attr
t := "real"
if a, ok := attr.(*CategoricalAttribute); ok {
vals := a.GetValues()
t = fmt.Sprintf("{%s}", strings.Join(vals, ", "))
}
fmt.Fprintf(w, "@attribute %s %s\n", attr.GetName(), t)
}
fmt.Fprint(w, "\n@data\n")
buf := make([]string, len(attrs))
inst.MapOverRows(attrs, func(val [][]byte, row int) (bool, error) {
for i, v := range val {
buf[i] = attrs[i].attr.GetStringFromSysVal(v)
}
fmt.Fprint(w, strings.Join(buf, ","))
fmt.Fprint(w, "\n")
return true, nil
})
return nil
}
// ParseARFFGetRows returns the number of data rows in an ARFF file.
func ParseARFFGetRows(filepath string) (int, error) {
f, err := os.Open(filepath)
if err != nil {
return 0, err
}
defer f.Close()
counting := false
count := 0
scanner := bufio.NewScanner(f)
for scanner.Scan() {
line := scanner.Text()
if len(line) == 0 {
continue
}
if counting {
if line[0] == '@' {
continue
}
if line[0] == '%' {
continue
}
count++
continue
}
if line[0] == '@' {
line = strings.ToLower(line)
if line == "@data" {
counting = true
}
}
}
return count, nil
}
// ParseARFFGetAttributes returns the set of Attributes represented in this ARFF
func ParseARFFGetAttributes(filepath string) []Attribute {
var ret []Attribute
f, err := os.Open(filepath)
if err != nil {
panic(err)
}
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
var attr Attribute
line := scanner.Text()
if len(line) == 0 {
continue
}
if line[0] != '@' {
continue
}
fields := strings.Fields(line)
if len(fields) < 3 {
continue
}
fields[0] = strings.ToLower(fields[0])
attrType := strings.ToLower(fields[2])
if fields[0] != "@attribute" {
continue
}
switch attrType {
case "real":
attr = new(FloatAttribute)
break
default:
if fields[2][0] == '{' {
if strings.HasSuffix(fields[len(fields)-1], "}") {
var cats []string
if len(fields) > 3 {
cats = fields[2:len(fields)]
} else {
cats = strings.Split(fields[2], ",")
}
if len(cats) == 0 {
panic(fmt.Errorf("Empty categorical field on line '%s'", line))
}
cats[0] = cats[0][1:] // Remove leading '{'
cats[len(cats)-1] = cats[len(cats)-1][:len(cats[len(cats)-1])-1] // Remove trailing '}'
for i, v := range cats { // Miaow
cats[i] = strings.TrimSpace(v)
if strings.HasSuffix(cats[i], ",") {
// Strip end comma
cats[i] = cats[i][0 : len(cats[i])-1]
}
}
attr = NewCategoricalAttribute()
for _, v := range cats {
attr.GetSysValFromString(v)
}
} else {
panic(fmt.Errorf("Missing categorical bracket on line '%s'", line))
}
} else {
panic(fmt.Errorf("Unsupported Attribute type %s on line '%s'", fields[2], line))
}
}
if attr == nil {
panic(fmt.Errorf(line))
}
attr.SetName(fields[1])
ret = append(ret, attr)
}
maxPrecision, err := ParseCSVEstimateFilePrecision(filepath)
if err != nil {
panic(err)
}
for _, a := range ret {
if f, ok := a.(*FloatAttribute); ok {
f.Precision = maxPrecision
}
}
return ret
}
// ParseDenseARFFBuildInstancesFromReader updates an [[#UpdatableDataGrid]] from a io.Reader
func ParseDenseARFFBuildInstancesFromReader(r io.Reader, attrs []Attribute, u UpdatableDataGrid) (err error) {
var rowCounter int
defer func() {
if r := recover(); r != nil {
if _, ok := r.(runtime.Error); ok {
panic(err)
}
err = fmt.Errorf("Error at line %d (error %s)", rowCounter, r.(error))
}
}()
scanner := bufio.NewScanner(r)
reading := false
specs := ResolveAttributes(u, attrs)
for scanner.Scan() {
line := scanner.Text()
if strings.HasPrefix(line, "%") {
continue
}
if reading {
buf := bytes.NewBuffer([]byte(line))
reader := csv.NewReader(buf)
for {
r, err := reader.Read()
if err == io.EOF {
break
} else if err != nil {
return err
}
for i, v := range r {
v = strings.TrimSpace(v)
if a, ok := specs[i].attr.(*CategoricalAttribute); ok {
if val := a.GetSysVal(v); val == nil {
panic(fmt.Errorf("Unexpected class on line '%s'", line))
}
}
u.Set(specs[i], rowCounter, specs[i].attr.GetSysValFromString(v))
}
rowCounter++
}
} else {
line = strings.ToLower(line)
line = strings.TrimSpace(line)
if line == "@data" {
reading = true
}
}
}
return nil
}
// ParseDenseARFFToInstances parses the dense ARFF File into a FixedDataGrid
func ParseDenseARFFToInstances(filepath string) (ret *DenseInstances, err error) {
defer func() {
if r := recover(); r != nil {
if _, ok := r.(runtime.Error); ok {
panic(r)
}
err = r.(error)
}
}()
// Find the number of rows in the file
rows, err := ParseARFFGetRows(filepath)
if err != nil {
return nil, err
}
// Get the Attributes we want
attrs := ParseARFFGetAttributes(filepath)
// Allocate return value
ret = NewDenseInstances()
// Add all the Attributes
for _, a := range attrs {
ret.AddAttribute(a)
}
// Set the last Attribute as the class
ret.AddClassAttribute(attrs[len(attrs)-1])
ret.Extend(rows)
f, err := os.Open(filepath)
if err != nil {
return nil, err
}
defer f.Close()
// Read the data
// Seek past the header
err = ParseDenseARFFBuildInstancesFromReader(f, attrs, ret)
if err != nil {
ret = nil
}
return ret, err
}
================================================
FILE: base/arff_test.go
================================================
package base
import (
. "github.com/smartystreets/goconvey/convey"
"io/ioutil"
"testing"
)
func TestParseARFFGetRows(t *testing.T) {
Convey("Getting the number of rows for a ARFF file", t, func() {
Convey("With a valid file path", func() {
numNonHeaderRows := 150
lineCount, err := ParseARFFGetRows("../examples/datasets/iris.arff")
So(err, ShouldBeNil)
So(lineCount, ShouldEqual, numNonHeaderRows)
})
})
}
func TestParseARFFGetAttributes(t *testing.T) {
Convey("Getting the attributes in the headers of a CSV file", t, func() {
attributes := ParseARFFGetAttributes("../examples/datasets/iris.arff")
sepalLengthAttribute := attributes[0]
sepalWidthAttribute := attributes[1]
petalLengthAttribute := attributes[2]
petalWidthAttribute := attributes[3]
speciesAttribute := attributes[4]
Convey("It gets the correct types for the headers based on the column values", func() {
_, ok1 := sepalLengthAttribute.(*FloatAttribute)
_, ok2 := sepalWidthAttribute.(*FloatAttribute)
_, ok3 := petalLengthAttribute.(*FloatAttribute)
_, ok4 := petalWidthAttribute.(*FloatAttribute)
sA, ok5 := speciesAttribute.(*CategoricalAttribute)
So(ok1, ShouldBeTrue)
So(ok2, ShouldBeTrue)
So(ok3, ShouldBeTrue)
So(ok4, ShouldBeTrue)
So(ok5, ShouldBeTrue)
So(sA.GetValues(), ShouldResemble, []string{"Iris-setosa", "Iris-versicolor", "Iris-virginica"})
})
})
}
func TestParseARFF1(t *testing.T) {
Convey("Should just be able to load in an ARFF...", t, func() {
inst, err := ParseDenseARFFToInstances("../examples/datasets/iris.arff")
So(err, ShouldBeNil)
So(inst, ShouldNotBeNil)
So(inst.RowString(0), ShouldEqual, "5.1 3.5 1.4 0.2 Iris-setosa")
So(inst.RowString(50), ShouldEqual, "7.0 3.2 4.7 1.4 Iris-versicolor")
So(inst.RowString(100), ShouldEqual, "6.3 3.3 6.0 2.5 Iris-virginica")
})
}
func TestParseARFF2(t *testing.T) {
Convey("Loading the weather dataset...", t, func() {
inst, err := ParseDenseARFFToInstances("../examples/datasets/weather.arff")
So(err, ShouldBeNil)
Convey("Attributes should be right...", func() {
So(GetAttributeByName(inst, "outlook"), ShouldNotBeNil)
So(GetAttributeByName(inst, "temperature"), ShouldNotBeNil)
So(GetAttributeByName(inst, "humidity"), ShouldNotBeNil)
So(GetAttributeByName(inst, "windy"), ShouldNotBeNil)
So(GetAttributeByName(inst, "play"), ShouldNotBeNil)
Convey("outlook attribute values should match reference...", func() {
outlookAttr := GetAttributeByName(inst, "outlook").(*CategoricalAttribute)
So(outlookAttr.GetValues(), ShouldResemble, []string{"sunny", "overcast", "rainy"})
})
Convey("windy values should match reference...", func() {
windyAttr := GetAttributeByName(inst, "windy").(*CategoricalAttribute)
So(windyAttr.GetValues(), ShouldResemble, []string{"TRUE", "FALSE"})
})
Convey("play values should match reference...", func() {
playAttr := GetAttributeByName(inst, "play").(*CategoricalAttribute)
So(playAttr.GetValues(), ShouldResemble, []string{"yes", "no"})
})
})
})
}
func TestSerializeToARFF(t *testing.T) {
Convey("Loading the weather dataset...", t, func() {
inst, err := ParseDenseARFFToInstances("../examples/datasets/weather.arff")
So(err, ShouldBeNil)
Convey("Saving back should suceed...", func() {
attrs := ParseARFFGetAttributes("../examples/datasets/weather.arff")
f, err := ioutil.TempFile("", "inst")
So(err, ShouldBeNil)
err = SerializeInstancesToDenseARFFWithAttributes(inst, attrs, f.Name(), "weather")
So(err, ShouldBeNil)
Convey("Reading the file back should be lossless...", func() {
inst2, err := ParseDenseARFFToInstances(f.Name())
So(err, ShouldBeNil)
So(InstancesAreEqual(inst, inst2), ShouldBeTrue)
})
Convey("The file should be exactly the same as the original...", func() {
ref, err := ioutil.ReadFile("../examples/datasets/weather.arff")
So(err, ShouldBeNil)
gen, err := ioutil.ReadFile(f.Name())
So(err, ShouldBeNil)
So(string(gen), ShouldEqual, string(ref))
})
})
})
}
================================================
FILE: base/attributes.go
================================================
package base
import (
"encoding/json"
)
const (
// CategoricalType is for Attributes which represent values distinctly.
CategoricalType = iota
// Float64Type should be replaced with a FractionalNumeric type [DEPRECATED].
Float64Type
BinaryType
)
// Attributes disambiguate columns of the feature matrix and declare their types.
type Attribute interface {
json.Unmarshaler
json.Marshaler
// Returns the general characterstics of this Attribute .
// to avoid the overhead of casting
GetType() int
// Returns the human-readable name of this Attribute.
GetName() string
// Sets the human-readable name of this Attribute.
SetName(string)
// Gets a human-readable overview of this Attribute for debugging.
String() string
// Converts a value given from a human-readable string into a system
// representation. For example, a CategoricalAttribute with values
// ["iris-setosa", "iris-virginica"] would return the float64
// representation of 0 when given "iris-setosa".
GetSysValFromString(string) []byte
// Converts a given value from a system representation into a human
// representation. For example, a CategoricalAttribute with values
// ["iris-setosa", "iris-viriginica"] might return "iris-setosa"
// when given 0.0 as the argument.
GetStringFromSysVal([]byte) string
// Tests for equality with another Attribute. Other Attributes are
// considered equal if:
// * They have the same type (i.e. FloatAttribute <> CategoricalAttribute)
// * They have the same name
// * If applicable, they have the same categorical values (though not
// necessarily in the same order).
Equals(Attribute) bool
// Tests whether two Attributes can be represented in the same pond
// i.e. they're the same size, and their byte order makes them meaningful
// when considered together
Compatible(Attribute) bool
}
================================================
FILE: base/attributes_test.go
================================================
package base
import (
. "github.com/smartystreets/goconvey/convey"
"testing"
)
func TestFloatAttributeSysVal(t *testing.T) {
Convey("Given some float", t, func() {
x := "1.21"
attr := NewFloatAttribute("")
Convey("When the float gets packed", func() {
packed := attr.GetSysValFromString(x)
Convey("And then unpacked", func() {
unpacked := attr.GetStringFromSysVal(packed)
Convey("The unpacked version should be the same", func() {
So(unpacked, ShouldEqual, x)
})
})
})
})
}
func TestCategoricalAttributeVal(t *testing.T) {
attr := NewCategoricalAttribute()
Convey("Given some string", t, func() {
x := "hello world!"
Convey("When the string gets converted", func() {
packed := attr.GetSysValFromString(x)
Convey("And then unconverted", func() {
unpacked := attr.GetStringFromSysVal(packed)
Convey("The unpacked version should be the same", func() {
So(unpacked, ShouldEqual, x)
})
})
})
})
Convey("Given some second string", t, func() {
x := "hello world 1!"
Convey("When the string gets converted", func() {
packed := attr.GetSysValFromString(x)
So(packed[0], ShouldEqual, 0x1)
Convey("And then unconverted", func() {
unpacked := attr.GetStringFromSysVal(packed)
Convey("The unpacked version should be the same", func() {
So(unpacked, ShouldEqual, x)
})
})
})
})
}
func TestBinaryAttribute(t *testing.T) {
attr := new(BinaryAttribute)
Convey("Given some binary Attribute", t, func() {
Convey("SetName, GetName should be equal", func() {
attr.SetName("Hello")
So(attr.GetName(), ShouldEqual, "Hello")
})
Convey("Non-zero values should equal 1", func() {
sysVal := attr.GetSysValFromString("1")
So(sysVal[0], ShouldEqual, 1)
})
Convey("Zero values should equal 0", func() {
sysVal := attr.GetSysValFromString("0")
So(sysVal[0], ShouldEqual, 0)
})
})
}
================================================
FILE: base/bag.go
================================================
package base
import (
"bytes"
"fmt"
)
// BinaryAttributeGroups contain only BinaryAttributes
// Compact each Attribute to a bit for better storage
type BinaryAttributeGroup struct {
parent DataGrid
attributes []Attribute
size int
alloc []byte
maxRow int
}
// String returns a human-readable summary.
func (b *BinaryAttributeGroup) String() string {
return "BinaryAttributeGroup"
}
// RowSizeInBytes returns the size of each row in bytes
// (rounded up to nearest byte).
func (b *BinaryAttributeGroup) RowSizeInBytes() int {
return (len(b.attributes) + 7) / 8
}
// Attributes returns a slice of Attributes in this BinaryAttributeGroup.
func (b *BinaryAttributeGroup) Attributes() []Attribute {
ret := make([]Attribute, len(b.attributes))
for i, a := range b.attributes {
ret[i] = a
}
return ret
}
// AddAttribute adds an Attribute to this BinaryAttributeGroup
func (b *BinaryAttributeGroup) AddAttribute(a Attribute) error {
b.attributes = append(b.attributes, a)
return nil
}
// Storage returns a reference to the underlying storage.
//
// IMPORTANT: don't modify
func (b *BinaryAttributeGroup) Storage() []byte {
return b.alloc
}
//
// internal methods
//
func (b *BinaryAttributeGroup) setStorage(a []byte) {
b.alloc = a
}
func (b *BinaryAttributeGroup) getByteOffset(col, row int) int {
return row*b.RowSizeInBytes() + col/8
}
func (b *BinaryAttributeGroup) set(col, row int, val []byte) {
offset := b.getByteOffset(col, row)
// If the value is 1, OR it
if val[0] > 0 {
b.alloc[offset] |= (1 << (uint(col) % 8))
} else {
// Otherwise, AND its complement
b.alloc[offset] &= ^(1 << (uint(col) % 8))
}
row++
if row > b.maxRow {
b.maxRow = row
}
}
func (b *BinaryAttributeGroup) get(col, row int) []byte {
offset := b.getByteOffset(col, row)
if b.alloc[offset]&(1<<(uint(col%8))) > 0 {
return []byte{1}
} else {
return []byte{0}
}
}
func (b *BinaryAttributeGroup) appendToRowBuf(row int, buffer *bytes.Buffer) {
for i, a := range b.attributes {
postfix := " "
if i == len(b.attributes)-1 {
postfix = ""
}
buffer.WriteString(fmt.Sprintf("%s%s",
a.GetStringFromSysVal(b.get(i, row)), postfix))
}
}
func (b *BinaryAttributeGroup) resize(add int) {
newAlloc := make([]byte, len(b.alloc)+add)
copy(newAlloc, b.alloc)
b.alloc = newAlloc
}
================================================
FILE: base/bag_test.go
================================================
package base
import (
"fmt"
. "github.com/smartystreets/goconvey/convey"
"math/rand"
"testing"
)
func TestBAGSimple(t *testing.T) {
Convey("Given certain bit data", t, func() {
// Generate said bits
bVals := [][]byte{
[]byte{1, 0, 0},
[]byte{0, 1, 0},
[]byte{0, 0, 1},
}
// Create a new DenseInstances
inst := NewDenseInstances()
for i := 0; i < 3; i++ {
inst.AddAttribute(NewBinaryAttribute(fmt.Sprintf("%d", i)))
}
// Get and re-order the attributes
attrSpecsUnordered := ResolveAllAttributes(inst)
attrSpecs := make([]AttributeSpec, 3)
for _, a := range attrSpecsUnordered {
name := a.GetAttribute().GetName()
So(name, ShouldBeIn, []string{"0", "1", "2"})
if name == "0" {
attrSpecs[0] = a
} else if name == "1" {
attrSpecs[1] = a
} else if name == "2" {
attrSpecs[2] = a
}
}
inst.Extend(3)
for row, b := range bVals {
for col, c := range b {
inst.Set(attrSpecs[col], row, []byte{c})
}
}
Convey("All the row values should be the right length...", func() {
inst.MapOverRows(attrSpecs, func(row [][]byte, i int) (bool, error) {
for i := range attrSpecs {
So(len(row[i]), ShouldEqual, 1)
}
return true, nil
})
})
Convey("All the values should be the same...", func() {
inst.MapOverRows(attrSpecs, func(row [][]byte, i int) (bool, error) {
for j := range attrSpecs {
So(row[j][0], ShouldEqual, bVals[i][j])
}
return true, nil
})
})
})
}
func TestBAG(t *testing.T) {
Convey("Given randomly generated bit data", t, func() {
// Generate said bits
bVals := make([][]byte, 0)
for i := 0; i < 50; i++ {
b := make([]byte, 3)
for j := 0; j < 3; j++ {
if rand.NormFloat64() >= 0 {
b[j] = byte(1)
} else {
b[j] = byte(0)
}
}
bVals = append(bVals, b)
}
// Create a new DenseInstances
inst := NewDenseInstances()
for i := 0; i < 3; i++ {
inst.AddAttribute(NewBinaryAttribute(fmt.Sprintf("%d", i)))
}
// Get and re-order the attributes
attrSpecsUnordered := ResolveAllAttributes(inst)
attrSpecs := make([]AttributeSpec, 3)
for _, a := range attrSpecsUnordered {
name := a.GetAttribute().GetName()
So(name, ShouldBeIn, []string{"0", "1", "2"})
if name == "0" {
attrSpecs[0] = a
} else if name == "1" {
attrSpecs[1] = a
} else if name == "2" {
attrSpecs[2] = a
}
}
inst.Extend(50)
for row, b := range bVals {
for col, c := range b {
inst.Set(attrSpecs[col], row, []byte{c})
}
}
Convey("All the row values should be the right length...", func() {
inst.MapOverRows(attrSpecs, func(row [][]byte, i int) (bool, error) {
for i := range attrSpecs {
So(len(row[i]), ShouldEqual, 1)
}
return true, nil
})
})
Convey("All the values should be the same...", func() {
inst.MapOverRows(attrSpecs, func(row [][]byte, i int) (bool, error) {
for j := range attrSpecs {
So(row[j][0], ShouldEqual, bVals[i][j])
}
return true, nil
})
})
})
}
================================================
FILE: base/binary.go
================================================
package base
import (
"encoding/json"
"fmt"
"strconv"
)
// BinaryAttributes can only represent 1 or 0.
type BinaryAttribute struct {
Name string
}
// MarshalJSON returns a JSON version of this BinaryAttribute for serialisation.
func (b *BinaryAttribute) MarshalJSON() ([]byte, error) {
return json.Marshal(map[string]interface{}{
"type": "binary",
"name": b.Name,
})
}
// UnmarshalJSON unpacks a BinaryAttribute from serialisation.
// Usually, there's nothing to deserialize.
func (b *BinaryAttribute) UnmarshalJSON(data []byte) error {
return nil
}
// NewBinaryAttribute creates a BinaryAttribute with the given name
func NewBinaryAttribute(name string) *BinaryAttribute {
return &BinaryAttribute{
name,
}
}
// GetName returns the name of this Attribute.
func (b *BinaryAttribute) GetName() string {
return b.Name
}
// SetName sets the name of this Attribute.
func (b *BinaryAttribute) SetName(name string) {
b.Name = name
}
// GetType returns BinaryType.
func (b *BinaryAttribute) GetType() int {
return BinaryType
}
// GetSysValFromString returns either 1 or 0 in a single byte.
func (b *BinaryAttribute) GetSysValFromString(userVal string) []byte {
f, err := strconv.ParseFloat(userVal, 64)
if err != nil {
panic(err)
}
ret := make([]byte, 1)
if f > 0 {
ret[0] = 1
}
return ret
}
// GetStringFromSysVal returns either 1 or 0.
func (b *BinaryAttribute) GetStringFromSysVal(val []byte) string {
if val[0] > 0 {
return "1"
}
return "0"
}
// Equals checks for equality with another BinaryAttribute.
func (b *BinaryAttribute) Equals(other Attribute) bool {
if a, ok := other.(*BinaryAttribute); !ok {
return false
} else {
return a.Name == b.Name
}
}
// Compatible checks whether this Attribute can be represented
// in the same pond as another.
func (b *BinaryAttribute) Compatible(other Attribute) bool {
if _, ok := other.(*BinaryAttribute); !ok {
return false
} else {
return true
}
}
// String returns a human-redable representation.
func (b *BinaryAttribute) String() string {
return fmt.Sprintf("BinaryAttribute(%s)", b.Name)
}
================================================
FILE: base/categorical.go
================================================
package base
import (
"encoding/json"
"fmt"
)
// CategoricalAttribute is an Attribute implementation
// which stores discrete string values
// - useful for representing classes.
type CategoricalAttribute struct {
Name string
values []string
}
// MarshalJSON returns a JSON version of this Attribute.
func (Attr *CategoricalAttribute) MarshalJSON() ([]byte, error) {
return json.Marshal(map[string]interface{}{
"type": "categorical",
"name": Attr.Name,
"attr": map[string]interface{}{
"values": Attr.values,
},
})
}
// UnmarshalJSON returns a JSON version of this Attribute.
func (Attr *CategoricalAttribute) UnmarshalJSON(data []byte) error {
var d map[string]interface{}
err := json.Unmarshal(data, &d)
if err != nil {
return err
}
for _, v := range d["values"].([]interface{}) {
Attr.values = append(Attr.values, v.(string))
}
return nil
}
// NewCategoricalAttribute creates a blank CategoricalAttribute.
func NewCategoricalAttribute() *CategoricalAttribute {
return &CategoricalAttribute{
"",
make([]string, 0),
}
}
// GetValues returns all the values currently defined
func (Attr *CategoricalAttribute) GetValues() []string {
return Attr.values
}
// GetName returns the human-readable name assigned to this attribute.
func (Attr *CategoricalAttribute) GetName() string {
return Attr.Name
}
// SetName sets the human-readable name on this attribute.
func (Attr *CategoricalAttribute) SetName(name string) {
Attr.Name = name
}
// GetType returns CategoricalType to avoid casting overhead.
func (Attr *CategoricalAttribute) GetType() int {
return CategoricalType
}
// GetSysVal returns the system representation of userVal as an index into the Values slice
// If the userVal can't be found, it returns nothing.
func (Attr *CategoricalAttribute) GetSysVal(userVal string) []byte {
for idx, val := range Attr.values {
if val == userVal {
return PackU64ToBytes(uint64(idx))
}
}
return nil
}
// GetUsrVal returns a human-readable representation of the given sysVal.
//
// IMPORTANT: this function doesn't check the boundaries of the array.
func (Attr *CategoricalAttribute) GetUsrVal(sysVal []byte) string {
idx := UnpackBytesToU64(sysVal)
return Attr.values[idx]
}
// GetSysValFromString returns the system representation of rawVal
// as an index into the Values slice. If rawVal is not inside
// the Values slice, it is appended.
//
// IMPORTANT: If no system representation yet exists, this functions adds it.
// If you need to determine whether rawVal exists: use GetSysVal and check
// for a zero-length return value.
//
// Example: if the CategoricalAttribute contains the values ["iris-setosa",
// "iris-virginica"] and "iris-versicolor" is provided as the argument,
// the Values slide becomes ["iris-setosa", "iris-virginica", "iris-versicolor"]
// and 2.00 is returned as the system representation.
func (Attr *CategoricalAttribute) GetSysValFromString(rawVal string) []byte {
// Match in raw values
catIndex := -1
for i, s := range Attr.values {
if s == rawVal {
catIndex = i
break
}
}
if catIndex == -1 {
Attr.values = append(Attr.values, rawVal)
catIndex = len(Attr.values) - 1
}
ret := PackU64ToBytes(uint64(catIndex))
return ret
}
// String returns a human-readable summary of this Attribute.
//
// Returns a string containing the list of human-readable values this
// CategoricalAttribute can take.
func (Attr *CategoricalAttribute) String() string {
return fmt.Sprintf("CategoricalAttribute(\"%s\", %s)", Attr.Name, Attr.values)
}
// GetStringFromSysVal returns a human-readable value from the given system-representation
// value val.
//
// IMPORTANT: This function calls panic() if the value is greater than
// the length of the array.
// TODO: Return a user-configurable default instead.
func (Attr *CategoricalAttribute) GetStringFromSysVal(rawVal []byte) string {
convVal := int(UnpackBytesToU64(rawVal))
if convVal >= len(Attr.values) {
panic(fmt.Sprintf("Out of range: %d in %d (%s)", convVal, len(Attr.values), Attr))
}
return Attr.values[convVal]
}
// Equals checks equality against another Attribute.
//
// Two CategoricalAttributes are considered equal if they contain
// the same values and have the same name. Otherwise, this function
// returns false.
func (Attr *CategoricalAttribute) Equals(other Attribute) bool {
attribute, ok := other.(*CategoricalAttribute)
if !ok {
// Not the same type, so can't be equal
return false
}
if Attr.GetName() != attribute.GetName() {
return false
}
// Check that this CategoricalAttribute has the same
// values as the other, in the same order
if len(attribute.values) != len(Attr.values) {
return false
}
for i, a := range Attr.values {
if a != attribute.values[i] {
return false
}
}
return true
}
// Compatible checks that this CategoricalAttribute has the same
// values as another, in the same order.
func (Attr *CategoricalAttribute) Compatible(other Attribute) bool {
attribute, ok := other.(*CategoricalAttribute)
if !ok {
return false
}
// Check that this CategoricalAttribute has the same
// values as the other, in the same order
if len(attribute.values) != len(Attr.values) {
return false
}
for i, a := range Attr.values {
if a != attribute.values[i] {
return false
}
}
return true
}
================================================
FILE: base/classifier.go
================================================
package base
import (
"gonum.org/v1/gonum/mat"
)
// Classifier implementations predict categorical class labels.
type Classifier interface {
// Takes a set of Instances, copies the class Attribute
// and constructs a new set of Instances of equivalent
// length with only the class Attribute and fills it in
// with predictions.
Predict(FixedDataGrid) (FixedDataGrid, error)
// Takes a set of instances and updates the Classifier's
// internal structures to enable prediction
Fit(FixedDataGrid) error
// Why not make every classifier return a nice-looking string?
String() string
// Save the classifier to a file
Save(string) error
// Read recreates the classifier from a file
Load(string) error
// Retrieves the metadata associated with this classifer
// (required for Ensembles)
GetMetadata() ClassifierMetadataV1
// Used when something is saved as part of an ensemble
SaveWithPrefix(*ClassifierSerializer, string) error
LoadWithPrefix(*ClassifierDeserializer, string) error
}
// BaseClassifier stores options common to every classifier.
type BaseClassifier struct {
TrainingData *DataGrid
}
type BaseRegressor struct {
Data mat.Dense
Name string
Labels []float64
}
================================================
FILE: base/conversion.go
================================================
package base
import (
"fmt"
"gonum.org/v1/gonum/mat"
)
func checkAllAttributesAreFloat(attrs []Attribute) error {
// Check that all the attributes are float
for _, a := range attrs {
if _, ok := a.(*FloatAttribute); !ok {
return fmt.Errorf("All []Attributes to this method must be FloatAttributes")
}
}
return nil
}
// ConvertRowToMat64 takes a list of Attributes, a FixedDataGrid
// and a row number, and returns the float values of that row
// in a mat.Dense format.
func ConvertRowToMat64(attrs []Attribute, f FixedDataGrid, r int) (*mat.Dense, error) {
err := checkAllAttributesAreFloat(attrs)
if err != nil {
return nil, err
}
// Allocate the return value
ret := mat.NewDense(1, len(attrs), nil)
// Resolve all the attributes
attrSpecs := ResolveAttributes(f, attrs)
// Get the results
for i, a := range attrSpecs {
ret.Set(0, i, UnpackBytesToFloat(f.Get(a, r)))
}
// Return the result
return ret, nil
}
// ConvertAllRowsToMat64 takes a list of Attributes and returns a vector
// of all rows in a mat.Dense format.
func ConvertAllRowsToMat64(attrs []Attribute, f FixedDataGrid) ([]*mat.Dense, error) {
// Check for floats
err := checkAllAttributesAreFloat(attrs)
if err != nil {
return nil, err
}
// Return value
_, rows := f.Size()
ret := make([]*mat.Dense, rows)
// Resolve all attributes
attrSpecs := ResolveAttributes(f, attrs)
// Set the values in each return value
for i := 0; i < rows; i++ {
cur := mat.NewDense(1, len(attrs), nil)
for j, a := range attrSpecs {
cur.Set(0, j, UnpackBytesToFloat(f.Get(a, i)))
}
ret[i] = cur
}
return ret, nil
}
================================================
FILE: base/csv.go
================================================
package base
import (
"bufio"
"encoding/csv"
"fmt"
"io"
"regexp"
"runtime"
"strings"
)
// ParseCSVGetRowsFromReader returns the number of rows in a given reader.
func ParseCSVGetRowsFromReader(r io.ReadSeeker) (int, error) {
r.Seek(0, 0)
reader := csv.NewReader(r)
counter := 0
for {
_, err := reader.Read()
if err == io.EOF {
break
} else if err != nil {
return 0, err
}
counter++
}
return counter, nil
}
// ParseCSVEstimateFilePrecisionFromReader determines what the maximum number of
// digits occuring anywhere after the decimal point within the reader.
func ParseCSVEstimateFilePrecisionFromReader(r io.ReadSeeker) (int, error) {
// Creat a basic regexp
rexp := regexp.MustCompile("[0-9]+(.[0-9]+)?")
// Scan through the file line-by-line
maxL := 0
r.Seek(0, 0)
scanner := bufio.NewScanner(r)
lineCount := 0
for scanner.Scan() {
if lineCount > 5 {
break
}
line := scanner.Text()
if len(line) == 0 {
continue
}
if line[0] == '@' {
continue
}
if line[0] == '%' {
continue
}
matches := rexp.FindAllString(line, -1)
for _, m := range matches {
p := strings.Split(m, ".")
if len(p) == 2 {
l := len(p[len(p)-1])
if l > maxL {
maxL = l
}
}
}
lineCount++
}
return maxL, nil
}
// ParseCSVGetAttributesFromReader returns an ordered slice of appropriate-ly typed
// and named Attributes.
func ParseCSVGetAttributesFromReader(r io.ReadSeeker, hasHeaders bool) []Attribute {
attrs := ParseCSVSniffAttributeTypesFromReader(r, hasHeaders)
names := ParseCSVSniffAttributeNamesFromReader(r, hasHeaders)
for i, attr := range attrs {
attr.SetName(names[i])
}
return attrs
}
// ParseCSVSniffAttributeNamesFromReader returns a slice containing the top row
// of a given reader with CSV-contents, or placeholders if hasHeaders is false.
func ParseCSVSniffAttributeNamesFromReader(r io.ReadSeeker, hasHeaders bool) []string {
r.Seek(0, 0)
reader := csv.NewReader(r)
headers, err := reader.Read()
if err != nil {
panic(err)
}
if hasHeaders {
for i, h := range headers {
headers[i] = strings.TrimSpace(h)
}
return headers
}
for i := range headers {
headers[i] = fmt.Sprintf("%d", i)
}
return headers
}
// ParseCSVSniffAttributeTypesFromReader returns a slice of appropriately-typed Attributes.
//
// The type of a given attribute is determined by looking at the first data row
// of the CSV.
func ParseCSVSniffAttributeTypesFromReader(r io.ReadSeeker, hasHeaders bool) []Attribute {
var attrs []Attribute
// Create the CSV reader
r.Seek(0, 0)
reader := csv.NewReader(r)
if hasHeaders {
// Skip the headers
_, err := reader.Read()
if err != nil {
panic(err)
}
}
// Read the first line of the file
columns, err := reader.Read()
if err != nil {
panic(err)
}
for _, entry := range columns {
// Match the Attribute type with regular expressions
entry = strings.Trim(entry, " ")
matched, err := regexp.MatchString("^[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?$", entry)
if err != nil {
panic(err)
}
if matched {
attrs = append(attrs, NewFloatAttribute(""))
} else {
attrs = append(attrs, new(CategoricalAttribute))
}
}
// Estimate file precision
maxP, err := ParseCSVEstimateFilePrecisionFromReader(r)
if err != nil {
panic(err)
}
for _, a := range attrs {
if f, ok := a.(*FloatAttribute); ok {
f.Precision = maxP
}
}
return attrs
}
// ParseCSVBuildInstancesFromReader updates an [[#UpdatableDataGrid]] from a io.Reader
func ParseCSVBuildInstancesFromReader(r io.ReadSeeker, attrs []Attribute, hasHeader bool, u UpdatableDataGrid) (err error) {
var rowCounter int
defer func() {
if r := recover(); r != nil {
if _, ok := r.(runtime.Error); ok {
panic(err)
}
err = fmt.Errorf("error at line %d (error %s)", rowCounter, r.(error))
}
}()
specs := ResolveAttributes(u, attrs)
r.Seek(0, 0)
reader := csv.NewReader(r)
for {
record, err := reader.Read()
if err == io.EOF {
break
} else if err != nil {
return err
}
if rowCounter == 0 {
if hasHeader {
hasHeader = false
continue
}
}
for i, v := range record {
// support missing values
if v == "" {
continue
}
u.Set(specs[i], rowCounter, specs[i].attr.GetSysValFromString(strings.TrimSpace(v)))
}
rowCounter++
}
return nil
}
// ParseCSVToInstancesFromReader reads the reader containing CSV and returns
// the read Instances.
func ParseCSVToInstancesFromReader(r io.ReadSeeker, hasHeaders bool) (instances *DenseInstances, err error) {
// Read the number of rows in the file
rowCount, err := ParseCSVGetRowsFromReader(r)
if err != nil {
return nil, err
}
if hasHeaders {
rowCount--
}
// Read the row headers
attrs := ParseCSVGetAttributesFromReader(r, hasHeaders)
specs := make([]AttributeSpec, len(attrs))
// Allocate the Instances to return
instances = NewDenseInstances()
for i, a := range attrs {
spec := instances.AddAttribute(a)
specs[i] = spec
}
instances.Extend(rowCount)
err = ParseCSVBuildInstancesFromReader(r, attrs, hasHeaders, instances)
if err != nil {
return nil, err
}
instances.AddClassAttribute(attrs[len(attrs)-1])
return instances, nil
}
// ParseUtilsMatchAttrs tries to match the set of Attributes read from one file with
// those read from another, and writes the matching Attributes back to the original set.
func ParseMatchAttributes(attrs, templateAttrs []Attribute) {
for i, a := range attrs {
for _, b := range templateAttrs {
if a.Equals(b) {
attrs[i] = b
} else if a.GetName() == b.GetName() {
attrs[i] = b
}
}
}
}
// ParseCSVToTemplatedInstancesFromReader reads the reader containing CSV and returns
// the read Instances, using another already read DenseInstances as a template.
func ParseCSVToTemplatedInstancesFromReader(r io.ReadSeeker, hasHeaders bool, template *DenseInstances) (instances *DenseInstances, err error) {
// Read the number of rows in the file
rowCount, err := ParseCSVGetRowsFromReader(r)
if err != nil {
return nil, err
}
if hasHeaders {
rowCount--
}
// Read the row headers
attrs := ParseCSVGetAttributesFromReader(r, hasHeaders)
templateAttrs := template.AllAttributes()
ParseMatchAttributes(attrs, templateAttrs)
// Allocate the Instances to return
instances = CopyDenseInstances(template, templateAttrs)
instances.Extend(rowCount)
err = ParseCSVBuildInstancesFromReader(r, attrs, hasHeaders, instances)
if err != nil {
return nil, err
}
for _, a := range template.AllClassAttributes() {
err = instances.AddClassAttribute(a)
if err != nil {
return nil, err
}
}
return instances, nil
}
// ParseCSVToInstancesWithAttributeGroupsFromReader reads the CSV file given by filepath,
// and returns the read DenseInstances, but also makes sure to group any Attributes
// specified in the first argument and also any class Attributes specified in the second
func ParseCSVToInstancesWithAttributeGroupsFromReader(r io.ReadSeeker, attrGroups, classAttrGroups map[string]string, attrOverrides map[int]Attribute, hasHeaders bool) (instances *DenseInstances, err error) {
// Read row count
rowCount, err := ParseCSVGetRowsFromReader(r)
if err != nil {
return nil, err
}
// Read the row headers
attrs := ParseCSVGetAttributesFromReader(r, hasHeaders)
for i := range attrs {
if a, ok := attrOverrides[i]; ok {
attrs[i] = a
}
}
specs := make([]AttributeSpec, len(attrs))
// Allocate the Instances to return
instances = NewDenseInstances()
//
// Create all AttributeGroups
agsToCreate := make(map[string]int)
combinedAgs := make(map[string]string)
for a := range attrGroups {
agsToCreate[attrGroups[a]] = 0
combinedAgs[a] = attrGroups[a]
}
for a := range classAttrGroups {
agsToCreate[classAttrGroups[a]] = 8
combinedAgs[a] = classAttrGroups[a]
}
// Decide the sizes
for _, a := range attrs {
if ag, ok := combinedAgs[a.GetName()]; ok {
if _, ok := a.(*BinaryAttribute); ok {
agsToCreate[ag] = 0
} else {
agsToCreate[ag] = 8
}
}
}
// Create them
for i := range agsToCreate {
size := agsToCreate[i]
err = instances.CreateAttributeGroup(i, size)
if err != nil {
panic(err)
}
}
// Add the Attributes to them
for i, a := range attrs {
var spec AttributeSpec
if ag, ok := combinedAgs[a.GetName()]; ok {
spec, err = instances.AddAttributeToAttributeGroup(a, ag)
if err != nil {
panic(err)
}
specs[i] = spec
} else {
spec = instances.AddAttribute(a)
}
specs[i] = spec
if _, ok := classAttrGroups[a.GetName()]; ok {
err = instances.AddClassAttribute(a)
if err != nil {
panic(err)
}
}
}
// Allocate
instances.Extend(rowCount)
err = ParseCSVBuildInstancesFromReader(r, attrs, hasHeaders, instances)
if err != nil {
return nil, err
}
return instances, nil
}
================================================
FILE: base/csv_test.go
================================================
package base
import (
"testing"
. "github.com/smartystreets/goconvey/convey"
)
func TestParseCSVGetRows(t *testing.T) {
Convey("Getting the number of rows for a CSV file", t, func() {
Convey("With a valid file path", func() {
numNonHeaderRows := 150
Convey("When the CSV file doesn't have a header row", func() {
lineCount, err := ParseCSVGetRows("../examples/datasets/iris.csv")
So(err, ShouldBeNil)
Convey("It counts the correct number of rows", func() {
So(lineCount, ShouldEqual, numNonHeaderRows)
})
})
Convey("When the CSV file has a header row", func() {
lineCount, err := ParseCSVGetRows("../examples/datasets/iris_headers.csv")
So(err, ShouldBeNil)
Convey("It counts the correct number of rows, *including* the header row", func() {
So(lineCount, ShouldEqual, numNonHeaderRows+1)
})
})
})
Convey("With a path to a non-existent file", func() {
_, err := ParseCSVGetRows("../examples/datasets/non-existent.csv")
Convey("It returns an error", func() {
So(err, ShouldNotBeNil)
})
})
})
}
func TestParseCSVGetAttributes(t *testing.T) {
Convey("Getting the attributes in the headers of a CSV file", t, func() {
attributes := ParseCSVGetAttributes("../examples/datasets/iris_headers.csv", true)
sepalLengthAttribute := attributes[0]
speciesAttribute := attributes[4]
Convey("It gets the correct types for the headers based on the column values", func() {
So(sepalLengthAttribute.GetType(), ShouldEqual, Float64Type)
So(speciesAttribute.GetType(), ShouldEqual, CategoricalType)
})
Convey("It gets the correct attribute names", func() {
So(sepalLengthAttribute.GetName(), ShouldEqual, "Sepal length")
So(speciesAttribute.GetName(), ShouldEqual, "Species")
})
})
}
func TestParseCSVSniffAttributeTypes(t *testing.T) {
Convey("Getting just the attribute types for the columns in the CSV", t, func() {
attributes := ParseCSVSniffAttributeTypes("../examples/datasets/iris_headers.csv", true)
Convey("It gets the correct types", func() {
So(attributes[0].GetType(), ShouldEqual, Float64Type)
So(attributes[1].GetType(), ShouldEqual, Float64Type)
So(attributes[2].GetType(), ShouldEqual, Float64Type)
So(attributes[3].GetType(), ShouldEqual, Float64Type)
So(attributes[4].GetType(), ShouldEqual, CategoricalType)
})
})
}
func TestParseCSVSniffAttributeNames(t *testing.T) {
Convey("Getting just the attribute name for the columns in the CSV", t, func() {
attributeNames := ParseCSVSniffAttributeNames("../examples/datasets/iris_headers.csv", true)
Convey("It gets the correct names", func() {
So(attributeNames[0], ShouldEqual, "Sepal length")
So(attributeNames[1], ShouldEqual, "Sepal width")
So(attributeNames[2], ShouldEqual, "Petal length")
So(attributeNames[3], ShouldEqual, "Petal width")
So(attributeNames[4], ShouldEqual, "Species")
})
})
}
func TestParseCSVToInstances(t *testing.T) {
Convey("Parsing a CSV file to Instances", t, func() {
Convey("Given a path to a reasonable CSV file", func() {
instances, err := ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
So(err, ShouldBeNil)
Convey("Should parse the rows correctly", func() {
So(instances.RowString(0), ShouldEqual, "5.1 3.5 1.4 0.2 Iris-setosa")
So(instances.RowString(50), ShouldEqual, "7.0 3.2 4.7 1.4 Iris-versicolor")
So(instances.RowString(100), ShouldEqual, "6.3 3.3 6.0 2.5 Iris-virginica")
})
})
Convey("Given a path to another reasonable CSV file", func() {
_, err := ParseCSVToInstances("../examples/datasets/c45-numeric.csv", true)
So(err, ShouldBeNil)
})
Convey("Given a path to a non-existent file", func() {
_, err := ParseCSVToInstances("../examples/datasets/non-existent.csv", true)
Convey("It should return an error", func() {
So(err, ShouldNotBeNil)
})
})
Convey("Given a path to a CSV file with awkward data", func() { // what's so awkward about it?
instances, err := ParseCSVToInstances("../examples/datasets/chim.csv", true)
So(err, ShouldBeNil)
Convey("It parses the data correctly, assigning the correct types to attributes", func() {
attributes := instances.AllAttributes()
So(attributes[0].GetType(), ShouldEqual, Float64Type)
So(attributes[1].GetType(), ShouldEqual, CategoricalType)
})
})
})
}
================================================
FILE: base/data.go
================================================
package base
// SortDirection specifies sorting direction...
type SortDirection int
const (
// Descending says that Instances should be sorted high to low...
Descending SortDirection = 1
// Ascending states that Instances should be sorted low to high...
Ascending SortDirection = 2
)
// DataGrid implementations represent data addressable by rows and columns.
type DataGrid interface {
// Retrieves a given Attribute's specification
GetAttribute(Attribute) (AttributeSpec, error)
// Retrieves details of every Attribute
AllAttributes() []Attribute
// Marks an Attribute as a class Attribute
AddClassAttribute(Attribute) error
// Unmarks an Attribute as a class Attribute
RemoveClassAttribute(Attribute) error
// Returns details of all class Attributes
AllClassAttributes() []Attribute
// Gets the bytes at a given position or nil
Get(AttributeSpec, int) []byte
// Convenience function for iteration.
MapOverRows([]AttributeSpec, func([][]byte, int) (bool, error)) error
}
// FixedDataGrid implementations have a size known in advance and implement
// all of the functionality offered by DataGrid implementations.
type FixedDataGrid interface {
DataGrid
// Returns a string representation of a given row
RowString(int) string
// Returns the number of Attributes and rows currently allocated
Size() (int, int)
}
// UpdatableDataGrid implementations can be changed in addition to implementing
// all of the functionality offered by FixedDataGrid implementations.
type UpdatableDataGrid interface {
FixedDataGrid
// Sets a given Attribute and row to a byte sequence.
Set(AttributeSpec, int, []byte)
// Adds an Attribute to the grid.
AddAttribute(Attribute) AttributeSpec
// Allocates additional room to hold a number of rows
Extend(int) error
}
================================================
FILE: base/dataframe_go.go
================================================
package base
import (
"fmt"
"reflect"
"strconv"
"github.com/rocketlaunchr/dataframe-go"
)
// ConvertDataFrameToInstances converts a DataFrame-go dataframe object to Golearn Fixed Data Grid. Allows for compabitibility between dataframe and golearn's ML models.
// df is the dataframe Object. classAttrIndex is the index of the class Attribute in the data.i
func ConvertDataFrameToInstances(df *dataframe.DataFrame, classAttrIndex int) FixedDataGrid {
// Creating Attributes based on Dataframe
names := df.Names()
attrs := make([]Attribute, len(names))
newInst := NewDenseInstances()
for i := range names {
col := df.Series[i]
if reflect.TypeOf(col.Value(0)).Kind() == reflect.String {
attrs[i] = new(CategoricalAttribute)
attrs[i].SetName(names[i])
} else {
attrs[i] = NewFloatAttribute(names[i])
}
}
// Add the attributes
newSpecs := make([]AttributeSpec, len(attrs))
for i, a := range attrs {
newSpecs[i] = newInst.AddAttribute(a)
}
// Adding the class attribute
newInst.AddClassAttribute(attrs[classAttrIndex])
// Allocate space
nRows := df.NRows()
newInst.Extend(df.NRows())
// Write the data based on DataType
for i := 0; i < nRows; i++ {
for j := range names {
col := df.Series[j]
var val string
switch v := col.Value(i).(type) {
case string:
val = v
case int64:
val = strconv.FormatInt(v, 10)
case float64:
val = fmt.Sprintf("%f", v)
case float32:
val = fmt.Sprintf("%f", v)
}
newInst.Set(newSpecs[j], i, newSpecs[j].GetAttribute().GetSysValFromString(val))
}
}
return newInst
}
================================================
FILE: base/dense.go
================================================
package base
import (
"bytes"
"fmt"
"os"
"sync"
)
// DenseInstances stores each Attribute value explicitly
// in a large grid.
type DenseInstances struct {
agMap map[string]int
agRevMap map[int]string
ags []AttributeGroup
lock sync.Mutex
fixed bool
classAttrs map[AttributeSpec]bool
maxRow int
attributes []Attribute
tmpAttrAgMap map[Attribute]string
// Counters for each AttributeGroup type
floatRowSizeBytes int
catRowSizeBytes int
binRowSizeBits int
}
// NewDenseInstances generates a new DenseInstances set
// with an anonymous EDF mapping and default settings.
func NewDenseInstances() *DenseInstances {
return &DenseInstances{
make(map[string]int),
make(map[int]string),
make([]AttributeGroup, 0),
sync.Mutex{},
false,
make(map[AttributeSpec]bool),
0,
make([]Attribute, 0),
make(map[Attribute]string),
0,
0,
0,
}
}
func copyFixedDataGridStructure(of FixedDataGrid) (*DenseInstances, []AttributeSpec, []AttributeSpec) {
ret := NewDenseInstances() // Create the skeleton
// Attribute creation
attrs := of.AllAttributes()
specs1 := make([]AttributeSpec, len(attrs))
specs2 := make([]AttributeSpec, len(attrs))
for i, a := range attrs {
// Retrieve old AttributeSpec
s, err := of.GetAttribute(a)
if err != nil {
panic(err)
}
specs1[i] = s
// Add and store new AttributeSpec
specs2[i] = ret.AddAttribute(a)
}
// Add class attributes
cAttrs := of.AllClassAttributes()
for _, a := range cAttrs {
ret.AddClassAttribute(a)
}
return ret, specs1, specs2
}
// NewStructuralCopy generates an empty DenseInstances with the same layout as
// an existing FixedDataGrid, but with no data.
func NewStructuralCopy(of FixedDataGrid) *DenseInstances {
ret, _, _ := copyFixedDataGridStructure(of)
return ret
}
// NewDenseCopy generates a new DenseInstances set
// from an existing FixedDataGrid.
func NewDenseCopy(of FixedDataGrid) *DenseInstances {
ret, specs1, specs2 := copyFixedDataGridStructure(of)
// Allocate memory
_, rows := of.Size()
ret.Extend(rows)
// Copy each row from the old one to the new
of.MapOverRows(specs1, func(v [][]byte, r int) (bool, error) {
for i, c := range v {
ret.Set(specs2[i], r, c)
}
return true, nil
})
return ret
}
//
// AttributeGroup functions
//
// createAttributeGroup adds a new AttributeGroup to this set of Instances
// IMPORTANT: do not call unless you've acquired the lock
func (inst *DenseInstances) createAttributeGroup(name string, size int) {
var agAdd AttributeGroup
if inst.fixed {
panic("Can't add additional Attributes")
}
// Create the AttributeGroup information
if size != 0 {
ag := new(FixedAttributeGroup)
ag.parent = inst
ag.attributes = make([]Attribute, 0)
ag.size = size
ag.alloc = make([]byte, 0)
agAdd = ag
} else {
ag := new(BinaryAttributeGroup)
ag.parent = inst
ag.attributes = make([]Attribute, 0)
ag.size = size
ag.alloc = make([]byte, 0)
agAdd = ag
}
inst.agMap[name] = len(inst.ags)
inst.agRevMap[len(inst.ags)] = name
inst.ags = append(inst.ags, agAdd)
}
// CreateAttributeGroup adds a new AttributeGroup to this set of instances
// with a given name. If the size is 0, a bit-ag is added
// if the size of not 0, then the size of each ag attribute
// is set to that number of bytes.
func (inst *DenseInstances) CreateAttributeGroup(name string, size int) (err error) {
defer func() {
if r := recover(); r != nil {
var ok bool
if err, ok = r.(error); !ok {
err = fmt.Errorf("CreateAttributeGroup: %v (not created)", r)
}
}
}()
inst.lock.Lock()
defer inst.lock.Unlock()
inst.createAttributeGroup(name, size)
return nil
}
// AllAttributeGroups returns a copy of the available AttributeGroups
func (inst *DenseInstances) AllAttributeGroups() map[string]AttributeGroup {
ret := make(map[string]AttributeGroup)
for a := range inst.agMap {
ret[a] = inst.ags[inst.agMap[a]]
}
return ret
}
// GetAttributeGroup returns a reference to a AttributeGroup of a given name /
func (inst *DenseInstances) GetAttributeGroup(name string) (AttributeGroup, error) {
inst.lock.Lock()
defer inst.lock.Unlock()
// Check if the ag exists
if id, ok := inst.agMap[name]; !ok {
return nil, fmt.Errorf("AttributeGroup '%s' doesn't exist", name)
} else {
// Return the ag
return inst.ags[id], nil
}
}
//
// Attribute creation and handling
//
// AddAttribute adds an Attribute to this set of DenseInstances
// Creates a default AttributeGroup for it if a suitable one doesn't exist.
// Returns an AttributeSpec for subsequent Set() calls.
//
// IMPORTANT: will panic if storage has been allocated via Extend.
func (inst *DenseInstances) AddAttribute(a Attribute) AttributeSpec {
var ok bool
inst.lock.Lock()
defer inst.lock.Unlock()
if inst.fixed {
panic("Can't add additional Attributes")
}
cur := 0
// Generate a default AttributeGroup name
ag := "FLOAT"
generatingBinClass := false
if ag, ok = inst.tmpAttrAgMap[a]; ok {
// Retrieved the group id
} else if _, ok := a.(*CategoricalAttribute); ok {
inst.catRowSizeBytes += 8
cur = inst.catRowSizeBytes / os.Getpagesize()
ag = fmt.Sprintf("CAT%d", cur)
} else if _, ok := a.(*FloatAttribute); ok {
inst.floatRowSizeBytes += 8
cur = inst.floatRowSizeBytes / os.Getpagesize()
ag = fmt.Sprintf("FLOAT%d", cur)
} else if _, ok := a.(*BinaryAttribute); ok {
inst.binRowSizeBits++
cur = (inst.binRowSizeBits / 8) / os.Getpagesize()
ag = fmt.Sprintf("BIN%d", cur)
generatingBinClass = true
} else {
panic("Unrecognised Attribute type")
}
// Create the ag if it doesn't exist
if _, ok := inst.agMap[ag]; !ok {
if !generatingBinClass {
inst.createAttributeGroup(ag, 8)
} else {
inst.createAttributeGroup(ag, 0)
}
}
id := inst.agMap[ag]
p := inst.ags[id]
p.AddAttribute(a)
inst.attributes = append(inst.attributes, a)
return AttributeSpec{id, len(p.Attributes()) - 1, a}
}
// AddAttributeToAttributeGroup adds an Attribute to a given ag
func (inst *DenseInstances) AddAttributeToAttributeGroup(newAttribute Attribute, ag string) (AttributeSpec, error) {
inst.lock.Lock()
defer inst.lock.Unlock()
// Check if the ag exists
if _, ok := inst.agMap[ag]; !ok {
return AttributeSpec{-1, 0, nil}, fmt.Errorf("AttributeGroup '%s' doesn't exist. Call CreateAttributeGroup() first", ag)
}
id := inst.agMap[ag]
p := inst.ags[id]
for i, a := range p.Attributes() {
if !a.Compatible(newAttribute) {
return AttributeSpec{-1, 0, nil}, fmt.Errorf("Attribute %s is not Compatible with %s in pond '%s' (position %d)", newAttribute, a, ag, i)
}
}
p.AddAttribute(newAttribute)
inst.attributes = append(inst.attributes, newAttribute)
return AttributeSpec{id, len(p.Attributes()) - 1, newAttribute}, nil
}
// GetAttribute returns an Attribute equal to the argument.
//
// TODO: Write a function to pre-compute this once we've allocated
// TODO: Write a utility function which retrieves all AttributeSpecs for
// a given instance set.
func (inst *DenseInstances) GetAttribute(get Attribute) (AttributeSpec, error) {
inst.lock.Lock()
defer inst.lock.Unlock()
for i, p := range inst.ags {
for j, a := range p.Attributes() {
if a.Equals(get) {
return AttributeSpec{i, j, a}, nil
}
}
}
return AttributeSpec{-1, 0, nil}, fmt.Errorf("Couldn't resolve %s", get)
}
// AllAttributes returns a slice of all Attributes.
func (inst *DenseInstances) AllAttributes() []Attribute {
inst.lock.Lock()
defer inst.lock.Unlock()
ret := make([]Attribute, 0)
for _, p := range inst.ags {
for _, a := range p.Attributes() {
ret = append(ret, a)
}
}
return ret
}
// AddClassAttribute sets an Attribute to be a class Attribute.
func (inst *DenseInstances) AddClassAttribute(a Attribute) error {
as, err := inst.GetAttribute(a)
if err != nil {
return err
}
inst.lock.Lock()
defer inst.lock.Unlock()
inst.classAttrs[as] = true
return nil
}
// RemoveClassAttribute removes an Attribute from the set of class Attributes.
func (inst *DenseInstances) RemoveClassAttribute(a Attribute) error {
as, err := inst.GetAttribute(a)
if err != nil {
return err
}
inst.lock.Lock()
defer inst.lock.Unlock()
inst.classAttrs[as] = false
return nil
}
// AllClassAttributes returns a slice of Attributes which have
// been designated class Attributes.
func (inst *DenseInstances) AllClassAttributes() []Attribute {
inst.lock.Lock()
defer inst.lock.Unlock()
return inst.allClassAttributes()
}
// allClassAttributes returns a slice of Attributes which have
// been designated class Attributes (doesn't lock)
func (inst *DenseInstances) allClassAttributes() []Attribute {
var ret []Attribute
for a := range inst.classAttrs {
if inst.classAttrs[a] {
ret = append(ret, a.attr)
}
}
return ret
}
//
// Allocation functions
//
// realiseAttributeGroups decides which Attributes are going
// to be stored in which groups
func (inst *DenseInstances) realiseAttributeGroups() error {
for a := range inst.tmpAttrAgMap {
// Generate a default AttributeGroup name
ag := inst.tmpAttrAgMap[a]
// Augment with some additional information
// Find out whether this attribute is also a class
classAttribute := false
for _, c := range inst.allClassAttributes() {
if c.Equals(a) {
classAttribute = true
}
}
// This might result in multiple ClassAttribute groups
// but hopefully nothing too crazy
if classAttribute {
// ag = fmt.Sprintf("CLASS_%s", ag)
}
// Create the ag if it doesn't exist
if agId, ok := inst.agMap[ag]; !ok {
_, generatingBinClass := inst.ags[agId].(*BinaryAttributeGroup)
if !generatingBinClass {
inst.createAttributeGroup(ag, 8)
} else {
inst.createAttributeGroup(ag, 0)
}
}
id := inst.agMap[ag]
p := inst.ags[id]
err := p.AddAttribute(a)
if err != nil {
return err
}
}
return nil
}
// Extend extends this set of Instances to store rows additional rows.
// It's recommended to set rows to something quite large.
//
// IMPORTANT: panics if the allocation fails
func (inst *DenseInstances) Extend(rows int) error {
inst.lock.Lock()
defer inst.lock.Unlock()
if !inst.fixed {
err := inst.realiseAttributeGroups()
if err != nil {
return err
}
}
for _, p := range inst.ags {
// Compute ag row storage requirements
rowSize := p.RowSizeInBytes()
// How many bytes?
allocSize := rows * rowSize
p.resize(allocSize)
}
inst.fixed = true
inst.maxRow += rows
return nil
}
// Set sets a particular Attribute (given as an AttributeSpec) on a particular
// row to a particular value.
//
// AttributeSpecs can be obtained using GetAttribute() or AddAttribute().
//
// IMPORTANT: Will panic() if the AttributeSpec isn't valid
//
// IMPORTANT: Will panic() if the row is too large
//
// IMPORTANT: Will panic() if the val is not the right length
func (inst *DenseInstances) Set(a AttributeSpec, row int, val []byte) {
inst.ags[a.pond].set(a.position, row, val)
}
// Get gets a particular Attribute (given as an AttributeSpec) on a particular
// row.
// AttributeSpecs can be obtained using GetAttribute() or AddAttribute()
func (inst *DenseInstances) Get(a AttributeSpec, row int) []byte {
return inst.ags[a.pond].get(a.position, row)
}
// RowString returns a string representation of a given row.
func (inst *DenseInstances) RowString(row int) string {
var buffer bytes.Buffer
first := true
for _, p := range inst.ags {
if first {
first = false
} else {
buffer.WriteString(" ")
}
p.appendToRowBuf(row, &buffer)
}
return buffer.String()
}
// MapOverRows passes each row map into a function.
// First argument is a list of AttributeSpec in the order
// they're needed in for the function. The second is the function
// to call on each row.
func (inst *DenseInstances) MapOverRows(asv []AttributeSpec, mapFunc func([][]byte, int) (bool, error)) error {
rowBuf := make([][]byte, len(asv))
for i := 0; i < inst.maxRow; i++ {
for j, as := range asv {
p := inst.ags[as.pond]
rowBuf[j] = p.get(as.position, i)
}
ok, err := mapFunc(rowBuf, i)
if err != nil {
return err
}
if !ok {
break
}
}
return nil
}
// Size returns the number of Attributes as the first return value
// and the maximum allocated row as the second value.
func (inst *DenseInstances) Size() (int, int) {
return len(inst.AllAttributes()), inst.maxRow
}
// swapRows swaps over rows i and j
func (inst *DenseInstances) swapRows(i, j int) {
as := ResolveAllAttributes(inst)
for _, a := range as {
v1 := inst.Get(a, i)
v2 := inst.Get(a, j)
v3 := make([]byte, len(v2))
copy(v3, v2)
inst.Set(a, j, v1)
inst.Set(a, i, v3)
}
}
// String returns a human-readable summary of this dataset.
func (inst *DenseInstances) String() string {
var buffer bytes.Buffer
// Get all Attribute information
as := ResolveAllAttributes(inst)
// Print header
cols, rows := inst.Size()
buffer.WriteString("Instances with ")
buffer.WriteString(fmt.Sprintf("%d row(s) ", rows))
buffer.WriteString(fmt.Sprintf("%d attribute(s)\n", cols))
buffer.WriteString(fmt.Sprintf("Attributes: \n"))
for _, a := range as {
prefix := "\t"
if inst.classAttrs[a] {
prefix = "*\t"
}
buffer.WriteString(fmt.Sprintf("%s%s\n", prefix, a.attr))
}
buffer.WriteString("\nData:\n")
maxRows := 30
if rows < maxRows {
maxRows = rows
}
for i := 0; i < maxRows; i++ {
buffer.WriteString("\t")
for _, a := range as {
val := inst.Get(a, i)
buffer.WriteString(fmt.Sprintf("%s ", a.attr.GetStringFromSysVal(val)))
}
buffer.WriteString("\n")
}
missingRows := rows - maxRows
if missingRows != 0 {
buffer.WriteString(fmt.Sprintf("\t...\n%d row(s) undisplayed", missingRows))
} else {
buffer.WriteString("All rows displayed")
}
return buffer.String()
}
================================================
FILE: base/dense_test.go
================================================
package base
import (
. "github.com/smartystreets/goconvey/convey"
"testing"
)
func TestHighDimensionalInstancesLoad(t *testing.T) {
Convey("Given a high-dimensional dataset...", t, func() {
_, err := ParseCSVToInstances("../examples/datasets/mnist_train.csv", true)
So(err, ShouldEqual, nil)
})
}
func TestHighDimensionalInstancesLoad2(t *testing.T) {
Convey("Given a high-dimensional dataset...", t, func() {
// Create the class Attribute
classAttrs := make(map[int]Attribute)
classAttrs[0] = NewCategoricalAttribute()
classAttrs[0].SetName("Number")
// Setup the class Attribute to be in its own group
classAttrGroups := make(map[string]string)
classAttrGroups["Number"] = "ClassGroup"
// The rest can go in a default group
attrGroups := make(map[string]string)
_, err := ParseCSVToInstancesWithAttributeGroups(
"../examples/datasets/mnist_train.csv",
attrGroups,
classAttrGroups,
classAttrs,
true,
)
So(err, ShouldEqual, nil)
})
}
================================================
FILE: base/domain.go
================================================
// Package base provides base interfaces for GoLearn objects to implement.
// It also provides a raw base for those objects.
package base
import (
"bytes"
"encoding/gob"
"io/ioutil"
"gonum.org/v1/gonum/mat"
)
// An Estimator is object that can ingest some data and train on it.
type Estimator interface {
Fit()
}
// A Predictor is an object that provides predictions.
type Predictor interface {
Predict()
}
// A Model is a supervised learning object, that is
// possible of scoring accuracy against a test set.
type Model interface {
Score()
}
type BaseEstimator struct {
Data *mat.Dense
}
// SaveEstimatorToGob serialises an estimator to a provided filepath, in gob format.
// See http://golang.org/pkg/encoding/gob for further details.
func SaveEstimatorToGob(path string, e *Estimator) {
b := new(bytes.Buffer)
enc := gob.NewEncoder(b)
err := enc.Encode(e)
if err != nil {
panic(err)
}
err = ioutil.WriteFile(path, b.Bytes(), 0644)
if err != nil {
panic(err)
}
}
================================================
FILE: base/error.go
================================================
package base
import (
"fmt"
"os"
"runtime/debug"
"strings"
)
type GoLearnError struct {
WrappedError error
CurrentStack string
Description string
}
func wrapLinesWithTabPrefix(s string) string {
split := strings.Split(s, "\n")
stack := make([]string, len(split))
for i := 0; i < len(split); i++ {
stack[i] = fmt.Sprintf("\t%s", split[i])
}
return strings.Join(stack, "\n")
}
func (g *GoLearnError) Error() string {
if os.Getenv("GOLEARN_FULL_DEBUG") == "true" {
return fmt.Sprintf("GoLearnError( %s\n\tCaptured at: %s\n)",
wrapLinesWithTabPrefix(g.WrappedError.Error()), wrapLinesWithTabPrefix(g.CurrentStack))
}
if g.Description == "" {
fmt.Sprintf("%s", g.WrappedError)
}
return fmt.Sprintf("GoLearnError( %s: %s )", g.Description, g.WrappedError)
}
func (g *GoLearnError) attachFormattedStack() {
stackString := string(debug.Stack())
stackFrames := strings.Split(stackString, "\n")
stackFmt := make([]string, 0)
for i := 0; i < len(stackFrames); i++ {
if strings.Contains(stackFrames[i], "golearn") {
if strings.Contains(stackFrames[i], "golearn/base/error.go") {
continue
}
if strings.Contains(stackFrames[i], "base.WrapError") {
continue
}
if strings.Contains(stackFrames[i], "base.DescribeError") {
continue
}
if strings.Contains(stackFrames[i], "golearn/base.(*GoLearnError).attachFormattedStack") {
continue
}
stackFmt = append(stackFmt, stackFrames[i])
}
}
stackOut := "<invalid>"
if len(stackFmt) > 0 {
stackOut = strings.Join(stackFmt, "\t\t\n")
}
g.CurrentStack = stackOut
}
func DescribeError(description string, err error) error {
ret := &GoLearnError{}
ret.WrappedError = err
ret.attachFormattedStack()
ret.Description = description
return ret
}
func WrapError(err error) error {
ret := &GoLearnError{}
ret.WrappedError = err
ret.attachFormattedStack()
return ret
}
func FormatError(err error, format string, args ...interface{}) error {
description := fmt.Sprintf(format, args...)
return DescribeError(description, err)
}
================================================
FILE: base/error_test.go
================================================
package base
import (
. "github.com/smartystreets/goconvey/convey"
"testing"
)
func TestId3(t *testing.T) {
Convey("Doing a error test", t, func() {
var _gerr GoLearnError
gerr := &_gerr
gerr.attachFormattedStack()
s := gerr.Error()
So(s, ShouldNotBeNil)
err := DescribeError("test", nil)
So(err, ShouldNotBeNil)
err = WrapError(nil)
So(err, ShouldNotBeNil)
s = wrapLinesWithTabPrefix("123\ntest\n")
So(s, ShouldEqual, "\t123\n\ttest\n\t")
})
}
================================================
FILE: base/filewrapper.go
================================================
package base
import (
"os"
)
// ParseCSVGetRows returns the number of rows in a given file.
func ParseCSVGetRows(filepath string) (int, error) {
f, err := os.Open(filepath)
if err != nil {
return 0, err
}
defer f.Close()
return ParseCSVGetRowsFromReader(f)
}
// ParseCSVEstimateFilePrecision determines what the maximum number of
// digits occuring anywhere after the decimal point within the file.
func ParseCSVEstimateFilePrecision(filepath string) (int, error) {
// Open the source file
f, err := os.Open(filepath)
if err != nil {
return 0, err
}
defer f.Close()
return ParseCSVEstimateFilePrecisionFromReader(f)
}
// ParseCSVGetAttributes returns an ordered slice of appropriate-ly typed
// and named Attributes.
func ParseCSVGetAttributes(filepath string, hasHeaders bool) []Attribute {
f, err := os.Open(filepath)
if err != nil {
panic(err)
}
defer f.Close()
return ParseCSVGetAttributesFromReader(f, hasHeaders)
}
// ParseCSVSniffAttributeNames returns a slice containing the top row
// of a given CSV file, or placeholders if hasHeaders is false.
func ParseCSVSniffAttributeNames(filepath string, hasHeaders bool) []string {
f, err := os.Open(filepath)
if err != nil {
panic(err)
}
defer f.Close()
return ParseCSVSniffAttributeNamesFromReader(f, hasHeaders)
}
// ParseCSVSniffAttributeTypes returns a slice of appropriately-typed Attributes.
//
// The type of a given attribute is determined by looking at the first data row
// of the CSV.
func ParseCSVSniffAttributeTypes(filepath string, hasHeaders bool) []Attribute {
// Open file
f, err := os.Open(filepath)
if err != nil {
panic(err)
}
defer f.Close()
return ParseCSVSniffAttributeTypesFromReader(f, hasHeaders)
}
// ParseCSVToInstances reads the CSV file given by filepath and returns
// the read Instances.
func ParseCSVToInstances(filepath string, hasHeaders bool) (instances *DenseInstances, err error) {
// Open the file
f, err := os.Open(filepath)
if err != nil {
return nil, err
}
defer f.Close()
return ParseCSVToInstancesFromReader(f, hasHeaders)
}
// ParseCSVToInstancesTemplated reads the CSV file given by filepath and returns
// the read Instances, using another already read DenseInstances as a template.
func ParseCSVToTemplatedInstances(filepath string, hasHeaders bool, template *DenseInstances) (instances *DenseInstances, err error) {
// Open the file
f, err := os.Open(filepath)
if err != nil {
return nil, err
}
defer f.Close()
return ParseCSVToTemplatedInstancesFromReader(f, hasHeaders, template)
}
// ParseCSVToInstancesWithAttributeGroups reads the CSV file given by filepath,
// and returns the read DenseInstances, but also makes sure to group any Attributes
// specified in the first argument and also any class Attributes specified in the second
func ParseCSVToInstancesWithAttributeGroups(filepath string, attrGroups, classAttrGroups map[string]string, attrOverrides map[int]Attribute, hasHeaders bool) (instances *DenseInstances, err error) {
// Open file
f, err := os.Open(filepath)
if err != nil {
return nil, err
}
defer f.Close()
return ParseCSVToInstancesWithAttributeGroupsFromReader(f, attrGroups, classAttrGroups, attrOverrides, hasHeaders)
}
================================================
FILE: base/filtered.go
================================================
package base
import (
"bytes"
"fmt"
)
// Maybe included a TransformedAttribute struct
// so we can map from ClassAttribute to ClassAttribute
// LazilyFilteredInstances map a Filter over an underlying
// FixedDataGrid and are a memory-efficient way of applying them.
type LazilyFilteredInstances struct {
filter Filter
src FixedDataGrid
attrs []FilteredAttribute
classAttrs map[Attribute]bool
unfilteredMap map[Attribute]bool
}
// NewLazilyFitleredInstances returns a new FixedDataGrid after
// applying the given Filter to the Attributes it includes. Unfiltered
// Attributes are passed through without modification.
func NewLazilyFilteredInstances(src FixedDataGrid, f Filter) *LazilyFilteredInstances {
// Get the Attributes after filtering
attrs := f.GetAttributesAfterFiltering()
// Build a set of Attributes which have undergone filtering
unFilteredMap := make(map[Attribute]bool)
for _, a := range src.AllAttributes() {
unFilteredMap[a] = true
}
for _, a := range attrs {
unFilteredMap[a.Old] = false
}
// Create the return structure
ret := &LazilyFilteredInstances{
f,
src,
attrs,
make(map[Attribute]bool),
unFilteredMap,
}
// Transfer class Attributes
for _, a := range src.AllClassAttributes() {
ret.AddClassAttribute(a)
}
return ret
}
// GetAttribute returns an AttributeSpecification for a given Attribute
func (l *LazilyFilteredInstances) GetAttribute(target Attribute) (AttributeSpec, error) {
if l.unfilteredMap[target] {
return l.src.GetAttribute(target)
}
var ret AttributeSpec
ret.pond = -1
for i, a := range l.attrs {
if a.New.Equals(target) {
ret.position = i
ret.attr = target
return ret, nil
}
}
return ret, fmt.Errorf("Couldn't resolve %s", target)
}
// AllAttributes returns every Attribute defined in the source datagrid,
// in addition to the revised Attributes created by the filter.
func (l *LazilyFilteredInstances) AllAttributes() []Attribute {
ret := make([]Attribute, 0)
for _, a := range l.src.AllAttributes() {
if l.unfilteredMap[a] {
ret = append(ret, a)
} else {
for _, b := range l.attrs {
if a.Equals(b.Old) {
ret = append(ret, b.New)
}
}
}
}
return ret
}
// AddClassAttribute adds a given Attribute (either before or after filtering)
// to the set of defined class Attributes.
func (l *LazilyFilteredInstances) AddClassAttribute(cls Attribute) error {
if l.unfilteredMap[cls] {
l.classAttrs[cls] = true
return nil
}
matched := false
for _, a := range l.attrs {
if a.Old.Equals(cls) || a.New.Equals(cls) {
l.classAttrs[a.New] = true
matched = true
}
}
if !matched {
return fmt.Errorf("Attribute %s could not be resolved", cls)
}
return nil
}
// RemoveClassAttribute removes a given Attribute (either before or
// after filtering) from the set of defined class Attributes.
func (l *LazilyFilteredInstances) RemoveClassAttribute(cls Attribute) error {
if l.unfilteredMap[cls] {
l.classAttrs[cls] = false
return nil
}
for _, a := range l.attrs {
if a.Old.Equals(cls) || a.New.Equals(cls) {
l.classAttrs[a.New] = false
return nil
}
}
return fmt.Errorf("Attribute %s could not be resolved", cls)
}
// AllClassAttributes returns details of all Attributes currently specified
// as being class Attributes.
//
// If applicable, the Attributes returned are those after modification
// by the Filter.
func (l *LazilyFilteredInstances) AllClassAttributes() []Attribute {
ret := make([]Attribute, 0)
for a := range l.classAttrs {
if l.classAttrs[a] {
ret = append(ret, a)
}
}
return ret
}
func (l *LazilyFilteredInstances) transformNewToOldAttribute(as AttributeSpec) (AttributeSpec, error) {
if l.unfilteredMap[as.GetAttribute()] {
return as, nil
}
for _, a := range l.attrs {
if a.Old.Equals(as.attr) || a.New.Equals(as.attr) {
as, err := l.src.GetAttribute(a.Old)
if err != nil {
return AttributeSpec{}, fmt.Errorf("Internal error in Attribute resolution: '%s'", err)
}
return as, nil
}
}
return AttributeSpec{}, fmt.Errorf("No matching Attribute")
}
// Get returns a transformed byte slice stored at a given AttributeSpec and row.
func (l *LazilyFilteredInstances) Get(as AttributeSpec, row int) []byte {
asOld, err := l.transformNewToOldAttribute(as)
if err != nil {
panic(fmt.Sprintf("Attribute %s could not be resolved. (Error: %s)", as.String(), err.Error()))
}
byteSeq := l.src.Get(asOld, row)
if l.unfilteredMap[as.attr] {
return byteSeq
}
newByteSeq := l.filter.Transform(asOld.attr, as.attr, byteSeq)
return newByteSeq
}
// MapOverRows maps an iteration mapFunc over the bytes contained in the source
// FixedDataGrid, after modification by the filter.
func (l *LazilyFilteredInstances) MapOverRows(asv []AttributeSpec, mapFunc func([][]byte, int) (bool, error)) error {
// Have to transform each item of asv into an
// AttributeSpec in the original
oldAsv := make([]AttributeSpec, len(asv))
for i, a := range asv {
old, err := l.transformNewToOldAttribute(a)
if err != nil {
return fmt.Errorf("Couldn't fetch old Attribute: '%s'", a.String())
}
oldAsv[i] = old
}
// Then map over each row in the original
newRowBuf := make([][]byte, len(asv))
return l.src.MapOverRows(oldAsv, func(oldRow [][]byte, oldRowNo int) (bool, error) {
for i, b := range oldRow {
newField := l.filter.Transform(oldAsv[i].attr, asv[i].attr, b)
newRowBuf[i] = newField
}
return mapFunc(newRowBuf, oldRowNo)
})
}
// RowString returns a string representation of a given row
// after filtering.
func (l *LazilyFilteredInstances) RowString(row int) string {
var buffer bytes.Buffer
as := ResolveAllAttributes(l) // Retrieve all Attribute data
first := true // Decide whether to prefix
for _, a := range as {
prefix := " " // What to print before value
if first {
first = false // Don't print space on first value
prefix = ""
}
val := l.Get(a, row) // Retrieve filtered value
buffer.WriteString(fmt.Sprintf("%s%s", prefix, a.attr.GetStringFromSysVal(val)))
}
return buffer.String() // Return the result
}
// Size returns the number of Attributes and rows of the underlying
// FixedDataGrid.
func (l *LazilyFilteredInstances) Size() (int, int) {
return l.src.Size()
}
// String returns a human-readable summary of this FixedDataGrid
// after filtering.
func (l *LazilyFilteredInstances) String() string {
var buffer bytes.Buffer
// Decide on rows to print
_, rows := l.Size()
maxRows := 5
if rows < maxRows {
maxRows = rows
}
// Get all Attribute information
as := ResolveAllAttributes(l)
// Print header
buffer.WriteString("Lazily filtered instances using ")
buffer.WriteString(fmt.Sprintf("%s\n", l.filter))
buffer.WriteString(fmt.Sprintf("Attributes: \n"))
for _, a := range as {
prefix := "\t"
if l.classAttrs[a.attr] {
prefix = "*\t"
}
buffer.WriteString(fmt.Sprintf("%s%s\n", prefix, a.attr))
}
buffer.WriteString("\nData:\n")
for i := 0; i < maxRows; i++ {
buffer.WriteString("\t")
for _, a := range as {
val := l.Get(a, i)
buffer.WriteString(fmt.Sprintf("%s ", a.attr.GetStringFromSysVal(val)))
}
buffer.WriteString("\n")
}
return buffer.String()
}
================================================
FILE: base/filters.go
================================================
package base
// FilteredAttributes represent a mapping from the output
// generated by a filter to the original value.
type FilteredAttribute struct {
Old Attribute
New Attribute
}
// Filters transform the byte sequences stored in DataGrid
// implementations.
type Filter interface {
// Adds an Attribute to the filter
AddAttribute(Attribute) error
// Allows mapping old to new Attributes
GetAttributesAfterFiltering() []FilteredAttribute
// Gets a string for printing
String() string
// Accepts an old Attribute, the new one and returns a sequence
Transform(Attribute, Attribute, []byte) []byte
// Builds the filter
Train() error
}
================================================
FILE: base/fixed.go
================================================
package base
import (
"bytes"
"fmt"
)
// FixedAttributeGroups contain a particular number of rows of
// a particular number of Attributes, all of a given type.
type FixedAttributeGroup struct {
parent DataGrid
attributes []Attribute
size int
alloc []byte
maxRow int
}
// String gets a human-readable summary
func (f *FixedAttributeGroup) String() string {
return "FixedAttributeGroup"
}
// RowSizeInBytes returns the size of each row in bytes
func (f *FixedAttributeGroup) RowSizeInBytes() int {
return len(f.attributes) * f.size
}
// Attributes returns a slice of Attributes in this FixedAttributeGroup
func (f *FixedAttributeGroup) Attributes() []Attribute {
ret := make([]Attribute, len(f.attributes))
// Add Attributes
for i, a := range f.attributes {
ret[i] = a
}
return ret
}
// AddAttribute adds an attribute to this FixedAttributeGroup
func (f *FixedAttributeGroup) AddAttribute(a Attribute) error {
f.attributes = append(f.attributes, a)
return nil
}
// addStorage appends the given storage reference to this FixedAttributeGroup
func (f *FixedAttributeGroup) setStorage(a []byte) {
f.alloc = a
}
// Storage returns a slice of FixedAttributeGroupStorageRefs which can
// be used to access the memory in this pond.
func (f *FixedAttributeGroup) Storage() []byte {
return f.alloc
}
func (f *FixedAttributeGroup) offset(col, row int) int {
return row*f.RowSizeInBytes() + col*f.size
}
func (f *FixedAttributeGroup) set(col int, row int, val []byte) {
// Double-check the length
if len(val) != f.size {
panic(fmt.Sprintf("Tried to call set() with %d bytes, should be %d", len(val), f.size))
}
// Find where in the pond the byte is
offset := f.offset(col, row)
// Copy the value in
copied := copy(f.alloc[offset:], val)
if copied != f.size {
panic(fmt.Sprintf("set() terminated by only copying %d bytes, should be %d", copied, f.size))
}
row++
if row > f.maxRow {
f.maxRow = row
}
}
func (f *FixedAttributeGroup) get(col int, row int) []byte {
offset := f.offset(col, row)
return f.alloc[offset : offset+f.size]
}
func (f *FixedAttributeGroup) appendToRowBuf(row int, buffer *bytes.Buffer) {
for i, a := range f.attributes {
postfix := " "
if i == len(f.attributes)-1 {
postfix = ""
}
buffer.WriteString(fmt.Sprintf("%s%s", a.GetStringFromSysVal(f.get(i, row)), postfix))
}
}
func (f *FixedAttributeGroup) resize(add int) {
newAlloc := make([]byte, len(f.alloc)+add)
copy(newAlloc, f.alloc)
f.alloc = newAlloc
}
================================================
FILE: base/float.go
================================================
package base
import (
"encoding/json"
"fmt"
"strconv"
)
// FloatAttribute is an implementation which stores floating point
// representations of numbers.
type FloatAttribute struct {
Name string
Precision int
}
// MarshalJSON returns a JSON representation of this Attribute
// for serialisation.
func (f *FloatAttribute) MarshalJSON() ([]byte, error) {
return json.Marshal(map[string]interface{}{
"type": "float",
"name": f.Name,
"attr": map[string]interface{}{
"precision": f.Precision,
},
})
}
// UnmarshalJSON reads a JSON representation of this Attribute.
func (f *FloatAttribute) UnmarshalJSON(data []byte) error {
var d map[string]interface{}
err := json.Unmarshal(data, &d)
if err != nil {
return err
}
if precision, ok := d["precision"]; ok {
f.Precision = int(precision.(float64))
return nil
}
return fmt.Errorf("Precision must be specified")
}
// NewFloatAttribute returns a new FloatAttribute with a default
// precision of 2 decimal places
func NewFloatAttribute(name string) *FloatAttribute {
return &FloatAttribute{name, 2}
}
// Compatible checks whether this FloatAttribute can be ponded with another
// Attribute (checks if they're both FloatAttributes)
func (Attr *FloatAttribute) Compatible(other Attribute) bool {
_, ok := other.(*FloatAttribute)
return ok
}
// Equals tests a FloatAttribute for equality with another Attribute.
//
// Returns false if the other Attribute has a different name
// or if the other Attribute is not a FloatAttribute.
func (Attr *FloatAttribute) Equals(other Attribute) bool {
// Check whether this FloatAttribute is equal to another
_, ok := other.(*FloatAttribute)
if !ok {
// Not the same type, so can't be equal
return false
}
if Attr.GetName() != other.GetName() {
return false
}
return true
}
// GetName returns this FloatAttribute's human-readable name.
func (Attr *FloatAttribute) GetName() string {
return Attr.Name
}
// SetName sets this FloatAttribute's human-readable name.
func (Attr *FloatAttribute) SetName(name string) {
Attr.Name = name
}
// GetType returns Float64Type.
func (Attr *FloatAttribute) GetType() int {
return Float64Type
}
// String returns a human-readable summary of this Attribute.
// e.g. "FloatAttribute(Sepal Width)"
func (Attr *FloatAttribute) String() string {
return fmt.Sprintf("FloatAttribute(%s)", Attr.Name)
}
// CheckSysValFromString confirms whether a given rawVal can
// be converted into a valid system representation. If it can't,
// the returned value is nil.
func (Attr *FloatAttribute) CheckSysValFromString(rawVal string) ([]byte, error) {
f, err := strconv.ParseFloat(rawVal, 64)
if err != nil {
return nil, err
}
ret := PackFloatToBytes(f)
return ret, nil
}
// GetSysValFromString parses the given rawVal string to a float64 and returns it.
//
// float64 happens to be a 1-to-1 mapping to the system representation.
// IMPORTANT: This function panic()s if rawVal is not a valid float.
// Use CheckSysValFromString to confirm.
func (Attr *FloatAttribute) GetSysValFromString(rawVal string) []byte {
f, err := Attr.CheckSysValFromString(rawVal)
if err != nil {
panic(err)
}
return f
}
// GetFloatFromSysVal converts a given system value to a float
func (Attr *FloatAttribute) GetFloatFromSysVal(rawVal []byte) float64 {
return UnpackBytesToFloat(rawVal)
}
// GetStringFromSysVal converts a given system value to to a string with two decimal
// places of precision.
func (Attr *FloatAttribute) GetStringFromSysVal(rawVal []byte) string {
f := UnpackBytesToFloat(rawVal)
formatString := fmt.Sprintf("%%.%df", Attr.Precision)
return fmt.Sprintf(formatString, f)
}
================================================
FILE: base/group.go
================================================
package base
import (
"bytes"
)
// AttributeGroups store related sequences of system values
// in memory for the DenseInstances structure.
type AttributeGroup interface {
// Used for printing
appendToRowBuf(row int, buffer *bytes.Buffer)
// Adds a new Attribute
AddAttribute(Attribute) error
// Returns all Attributes
Attributes() []Attribute
// Gets the byte slice at a given column, row offset
get(int, int) []byte
// Stores the byte slice at a given column, row offset
set(int, int, []byte)
// Sets the reference to underlying memory
setStorage([]byte)
// Gets the size of each row in bytes (rounded up)
RowSizeInBytes() int
// Adds some storage to this group
resize(int)
// Gets a reference to underlying memory
Storage() []byte
// Returns a human-readable summary
String() string
}
================================================
FILE: base/lazy_sort_test.go
================================================
package base
import (
. "github.com/smartystreets/goconvey/convey"
"testing"
)
func TestLazySortDesc(t *testing.T) {
Convey("Given data that's not already sorted descending", t, func() {
unsorted, err := ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
So(err, ShouldBeNil)
as1 := ResolveAllAttributes(unsorted)
So(isSortedDesc(unsorted, as1[0]), ShouldBeFalse)
Convey("Given reference data that's alredy sorted descending", func() {
sortedDescending, err := ParseCSVToInstances("../examples/datasets/iris_sorted_desc.csv", true)
So(err, ShouldBeNil)
as2 := ResolveAllAttributes(sortedDescending)
So(isSortedDesc(sortedDescending, as2[0]), ShouldBeTrue)
Convey("LazySorting Descending", func() {
result, err := LazySort(unsorted, Descending, as1[0:len(as1)-1])
So(err, ShouldBeNil)
Convey("Result should be sorted descending", func() {
So(isSortedDesc(result, as1[0]), ShouldBeTrue)
})
Convey("Result should match the reference", func() {
So(InstancesAreEqual(sortedDescending, result), ShouldBeTrue)
})
})
})
})
}
func TestLazySortAsc(t *testing.T) {
Convey("Given data that's not already sorted ascending", t, func() {
unsorted, err := ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
So(err, ShouldBeNil)
as1 := ResolveAllAttributes(unsorted)
So(isSortedAsc(unsorted, as1[0]), ShouldBeFalse)
Convey("Given reference data that's alredy sorted ascending", func() {
sortedAscending, err := ParseCSVToInstances("../examples/datasets/iris_sorted_asc.csv", true)
So(err, ShouldBeNil)
as2 := ResolveAllAttributes(sortedAscending)
So(isSortedAsc(sortedAscending, as2[0]), ShouldBeTrue)
Convey("LazySorting Ascending", func() {
result, err := LazySort(unsorted, Ascending, as1[0:len(as1)-1])
So(err, ShouldBeNil)
Convey("Result should be sorted descending", func() {
So(isSortedAsc(result, as1[0]), ShouldBeTrue)
})
Convey("Result should match the reference", func() {
So(InstancesAreEqual(sortedAscending, result), ShouldBeTrue)
})
Convey("First element of Result should equal known value", func() {
So(result.RowString(0), ShouldEqual, "4.3 3.0 1.1 0.1 Iris-setosa")
})
})
})
})
}
================================================
FILE: base/logger.go
================================================
package base
import (
"io"
"log"
"os"
)
// Logger is the default logger for the entire golearn package. It writes
// to stdout and has no prefix and no flags.
var Logger *log.Logger = log.New(os.Stdout, "", 0)
// SetLogger sets the base logger for the entire golearn package.
func SetLogger(logger *log.Logger) {
Logger = logger
}
// SetLoggerOut creates a new base logger for the entire golearn
// package using the given out instead of the default, os.Stdout.
// The other log options are set to the default, i.e. no prefix and no
// flags.
func SetLoggerOut(out io.Writer) {
Logger = log.New(out, "", 0)
}
// Silent turns off logging throughout the golearn package by setting
// the logger to write to dev/null.
func Silent() {
if out, err := os.Open(os.DevNull); err != nil {
panic(err)
} else {
Logger = log.New(out, "", 0)
}
}
================================================
FILE: base/mat.go
================================================
package base
import (
"bytes"
"fmt"
"gonum.org/v1/gonum/mat"
)
type Mat64Instances struct {
attributes []Attribute
classAttrs map[int]bool
Data *mat.Dense
rows int
}
// InstancesFromMat64 returns a new Mat64Instances from a literal provided.
func InstancesFromMat64(rows, cols int, data *mat.Dense) *Mat64Instances {
var ret Mat64Instances
for i := 0; i < cols; i++ {
ret.attributes = append(ret.attributes, NewFloatAttribute(fmt.Sprintf("%d", i)))
}
ret.classAttrs = make(map[int]bool)
ret.Data = data
ret.rows = rows
return &ret
}
// GetAttribute returns an AttributeSpec from an Attribute field.
func (m *Mat64Instances) GetAttribute(a Attribute) (AttributeSpec, error) {
for i, at := range m.attributes {
if at.Equals(a) {
return AttributeSpec{0, i, at}, nil
}
}
return AttributeSpec{}, fmt.Errorf("Couldn't find a matching attribute")
}
// AllAttributes returns every defined Attribute.
func (m *Mat64Instances) AllAttributes() []Attribute {
ret := make([]Attribute, len(m.attributes))
for i, a := range m.attributes {
ret[i] = a
}
return ret
}
// AddClassAttribute adds an attribute to the class set.
func (m *Mat64Instances) AddClassAttribute(a Attribute) error {
as, err := m.GetAttribute(a)
if err != nil {
return err
}
m.classAttrs[as.position] = true
return nil
}
// RemoveClassAttribute removes an attribute to the class set.
func (m *Mat64Instances) RemoveClassAttribute(a Attribute) error {
as, err := m.GetAttribute(a)
if err != nil {
return err
}
m.classAttrs[as.position] = false
return nil
}
// AllClassAttributes returns every class attribute.
func (m *Mat64Instances) AllClassAttributes() []Attribute {
ret := make([]Attribute, 0)
for i := range m.classAttrs {
if m.classAttrs[i] {
ret = append(ret, m.attributes[i])
}
}
return ret
}
// Get returns the bytes at a given position
func (m *Mat64Instances) Get(as AttributeSpec, row int) []byte {
val := m.Data.At(row, as.position)
return PackFloatToBytes(val)
}
// MapOverRows is a convenience function for iteration
func (m *Mat64Instances) MapOverRows(as []AttributeSpec, f func([][]byte, int) (bool, error)) error {
rowData := make([][]byte, len(as))
for j, _ := range as {
rowData[j] = make([]byte, 8)
}
for i := 0; i < m.rows; i++ {
for j, as := range as {
PackFloatToBytesInline(m.Data.At(i, as.position), rowData[j])
}
stat, err := f(rowData, i)
if !stat {
return err
}
}
return nil
}
// RowString: should print the values of a row
// TODO: make this less half-assed
func (m *Mat64Instances) RowString(row int) string {
return fmt.Sprintf("%d", row)
}
// Size returns the number of Attributes, then the number of rows
func (m *Mat64Instances) Size() (int, int) {
return len(m.attributes), m.rows
}
// String returns a human-readable summary of this dataset.
func (m *Mat64Instances) String() string {
var buffer bytes.Buffer
// Get all Attribute information
as := ResolveAllAttributes(m)
// Print header
cols, rows := m.Size()
buffer.WriteString("Instances with ")
buffer.WriteString(fmt.Sprintf("%d row(s) ", rows))
buffer.WriteString(fmt.Sprintf("%d attribute(s)\n", cols))
buffer.WriteString(fmt.Sprintf("Attributes: \n"))
cnt := 0
for _, a := range as {
prefix := "\t"
if m.classAttrs[cnt] {
prefix = "*\t"
}
cnt++
buffer.WriteString(fmt.Sprintf("%s%s\n", prefix, a.attr))
}
buffer.WriteString("\nData:\n")
maxRows := 30
if rows < maxRows {
maxRows = rows
}
for i := 0; i < maxRows; i++ {
buffer.WriteString("\t")
for _, a := range as {
val := m.Get(a, i)
buffer.WriteString(fmt.Sprintf("%s ", a.attr.GetStringFromSysVal(val)))
}
buffer.WriteString("\n")
}
missingRows := rows - maxRows
if missingRows != 0 {
buffer.WriteString(fmt.Sprintf("\t...\n%d row(s) undisplayed", missingRows))
} else {
buffer.WriteString("All rows displayed")
}
return buffer.String()
}
================================================
FILE: base/mat_test.go
================================================
package base
import (
. "github.com/smartystreets/goconvey/convey"
"gonum.org/v1/gonum/mat"
"testing"
)
func TestInlineMat64Creation(t *testing.T) {
Convey("Given a literal array...", t, func() {
X := mat.NewDense(4, 3, []float64{
1, 0, 1,
0, 1, 1,
0, 0, 0,
1, 1, 0,
})
inst := InstancesFromMat64(4, 3, X)
attrs := inst.AllAttributes()
Convey("Attributes should be well-defined...", func() {
So(len(attrs), ShouldEqual, 3)
})
Convey("No class variables set by default...", func() {
classAttrs := inst.AllClassAttributes()
So(len(classAttrs), ShouldEqual, 0)
})
Convey("Getting values should work...", func() {
as, err := inst.GetAttribute(attrs[0])
So(err, ShouldBeNil)
valBytes := inst.Get(as, 3)
val := UnpackBytesToFloat(valBytes)
So(val, ShouldAlmostEqual, 1.0)
})
Convey("Getting size should work...", func() {
attrLen, rows := inst.Size()
So(attrLen, ShouldEqual, 3)
So(rows, ShouldEqual, 4)
})
Convey("Getting row string should work...", func() {
So(inst.RowString(0), ShouldEqual, "0")
})
Convey("Getting attribute not in it should error...", func() {
Y := mat.NewDense(1, 4, []float64{1, 2, 3, 4})
ins := InstancesFromMat64(1, 4, Y)
attr := ins.AllAttributes()
_, err := inst.GetAttribute(attr[3])
So(err.Error(), ShouldEqual, "Couldn't find a matching attribute")
})
Convey("Generate human-readable summary...", func() {
output := inst.String()
So(output, ShouldStartWith, "Instances with")
So(output, ShouldContainSubstring, "Attributes:")
So(output, ShouldContainSubstring, "Data:")
})
})
}
func TestStringWithExceedMaxRow(t *testing.T) {
Convey("Given a long literal array...", t, func() {
v := make([]float64, 35, 35)
X := mat.NewDense(35, 1, v)
inst := InstancesFromMat64(35, 1, X)
output := inst.String()
So(output, ShouldStartWith, "Instances with")
So(output, ShouldContainSubstring, "Attributes:")
So(output, ShouldContainSubstring, "Data:")
So(output, ShouldContainSubstring, "undisplayed")
})
}
================================================
FILE: base/serialize.go
================================================
package base
import (
"archive/tar"
"compress/gzip"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"log"
"os"
"reflect"
)
const (
SerializationFormatVersion = "golearn 1.0"
)
// FunctionalTarReader allows you to read anything in a tar file in any order, rather than just
// sequentially.
type FunctionalTarReader struct {
Regenerate func() *tar.Reader
}
// NewFunctionalTarReader creates a new FunctionalTarReader using a function that it can call
// to get a tar.Reader at the beginning of the file.
func NewFunctionalTarReader(regenFunc func() *tar.Reader) *FunctionalTarReader {
return &FunctionalTarReader{
regenFunc,
}
}
// GetNamedFile returns a file named a given thing from the tar file. If there's more than one
// entry, the most recent is returned.
func (f *FunctionalTarReader) GetNamedFile(name string) ([]byte, error) {
tr := f.Regenerate()
var returnCandidate []byte = nil
for {
hdr, err := tr.Next()
if err == io.EOF {
break
} else if err != nil {
return nil, err
}
if hdr.Name == name {
ret, err := ioutil.ReadAll(tr)
if err != nil {
return nil, WrapError(err)
}
if int64(len(ret)) != hdr.Size {
if int64(len(ret)) < hdr.Size {
log.Printf("Size mismatch, got %d byte(s) for %s, expected %d (err was %s)", len(ret), hdr.Name, hdr.Size, err)
} else {
return nil, WrapError(fmt.Errorf("Size mismatch, expected %d byte(s) for %s, got %d", len(ret), hdr.Name, hdr.Size))
}
}
returnCandidate = ret
break
}
}
if returnCandidate == nil {
return nil, WrapError(fmt.Errorf("Not found (looking for %s)", name))
}
return returnCandidate, nil
}
func tarPrefix(prefix string, suffix string) string {
if prefix == "" {
return suffix
}
return fmt.Sprintf("%s/%s", prefix, suffix)
}
// ClassifierMetadataV1 is what gets written into METADATA
// in a classification file format.
type ClassifierMetadataV1 struct {
// FormatVersion should always be 1 for this structure
FormatVersion int `json:"format_version"`
// Uses the classifier name (provided by the classifier)
ClassifierName string `json:"classifier"`
// ClassifierVersion is also provided by the classifier
// and checks whether this version of GoLearn can read what's
// be written.
ClassifierVersion string `json"classifier_version"`
// This is a custom metadata field, provided by the classifier
ClassifierMetadata map[string]interface{} `json:"classifier_metadata"`
}
// ClassifierDeserializer attaches helper functions useful for reading classificatiers. (UNSTABLE).
type ClassifierDeserializer struct {
gzipReader io.Reader
fileReader io.ReadCloser
tarReader *FunctionalTarReader
Metadata *ClassifierMetadataV1
}
// Prefix outputs a string in the right format for TAR
func (c *ClassifierDeserializer) Prefix(prefix string, suffix string) string {
if prefix == "" {
return suffix
}
return fmt.Sprintf("%s/%s", prefix, suffix)
}
// ReadMetadataAtPrefix reads the METADATA file after prefix. If an error is returned, the first value is undefined.
func (c *ClassifierDeserializer) ReadMetadataAtPrefix(prefix string) (ClassifierMetadataV1, error) {
var ret ClassifierMetadataV1
err := c.GetJSONForKey(c.Prefix(prefix, "METADATA"), &ret)
return ret, err
}
// ReadSerializedClassifierStub is the counterpart of CreateSerializedClassifierStub.
// It's used inside SaveableClassifiers to read information from a perviously saved
// model file.
func ReadSerializedClassifierStub(filePath string) (*ClassifierDeserializer, error) {
f, err := os.Open(filePath)
if err != nil {
return nil, DescribeError("Can't open file", err)
}
gzr, err := gzip.NewReader(f)
if err != nil {
return nil, DescribeError("Can't decompress", err)
}
regenerateFunc := func() *tar.Reader {
f.Seek(0, os.SEEK_SET)
gzr.Reset(f)
tz := tar.NewReader(gzr)
return tz
}
tz := NewFunctionalTarReader(regenerateFunc)
// Check that the serialization format is right
// Retrieve the MANIFEST and verify
manifestBytes, err := tz.GetNamedFile("CLS_MANIFEST")
if err != nil {
return nil, DescribeError("Error reading CLS_MANIFEST", err)
}
if !reflect.DeepEqual(manifestBytes, []byte(SerializationFormatVersion)) {
return nil, fmt.Errorf("Unsupported CLS_MANIFEST: %s", string(manifestBytes))
}
//
// Parse METADATA
//
var metadata ClassifierMetadataV1
ret := &ClassifierDeserializer{
f,
gzr,
tz,
&metadata,
}
metadata, err = ret.ReadMetadataAtPrefix("")
if err != nil {
return nil, fmt.Errorf("Error whilst reading METADATA: %s", err)
}
ret.Metadata = &metadata
// Check that we can understand this archive
if metadata.FormatVersion != 1 {
return nil, fmt.Errorf("METADATA: wrong format_version for this version of golearn")
}
return ret, nil
}
// GetBytesForKey returns the bytes at a given location in the output.
func (c *ClassifierDeserializer) GetBytesForKey(key string) ([]byte, error) {
return c.tarReader.GetNamedFile(key)
}
func (c *ClassifierDeserializer) GetStringForKey(key string) (string, error) {
b, err := c.GetBytesForKey(key)
if err != nil {
return "", err
}
return string(b), err
}
// GetJSONForKey deserializes a JSON key in the output file.
func (c *ClassifierDeserializer) GetJSONForKey(key string, v interface{}) error {
b, err := c.GetBytesForKey(key)
if err != nil {
return err
}
return json.Unmarshal(b, v)
}
// GetInstancesForKey deserializes some instances stored in a classifier output file
func (c *ClassifierDeserializer) GetInstancesForKey(key string) (FixedDataGrid, error) {
return DeserializeInstancesFromTarReader(c.tarReader, key)
}
// GetUInt64ForKey returns a int64 stored at a given key
func (c *ClassifierDeserializer) GetU64ForKey(key string) (uint64, error) {
b, err := c.GetBytesForKey(key)
if err != nil {
return 0, err
}
return UnpackBytesToU64(b), nil
}
// GetAttributeForKey returns an Attribute stored at a given key
func (c *ClassifierDeserializer) GetAttributeForKey(key string) (Attribute, error) {
b, err := c.GetBytesForKey(key)
if err != nil {
return nil, WrapError(err)
}
attr, err := DeserializeAttribute(b)
if err != nil {
return nil, WrapError(err)
}
return attr, nil
}
// GetAttributesForKey returns an Attribute list stored at a given key
func (c *ClassifierDeserializer) GetAttributesForKey(key string) ([]Attribute, error) {
attrCountKey := c.Prefix(key, "ATTR_COUNT")
attrCount, err := c.GetU64ForKey(attrCountKey)
if err != nil {
return nil, DescribeError("Unable to read ATTR_COUNT", err)
}
ret := make([]Attribute, attrCount)
for i := range ret {
attrKey := c.Prefix(key, fmt.Sprintf("%d", i))
ret[i], err = c.GetAttributeForKey(attrKey)
if err != nil {
return nil, DescribeError("Unable to read Attribute", err)
}
}
return ret, nil
}
// Close cleans up everything.
func (c *ClassifierDeserializer) Close() {
c.fileReader.Close()
}
// ClassifierSerializer is an object used by SaveableClassifiers.
type ClassifierSerializer struct {
gzipWriter *gzip.Writer
fileWriter *os.File
tarWriter *tar.Writer
f *os.File
filePath string
}
// Close finalizes the Classifier serialization session.
func (c *ClassifierSerializer) Close() error {
// Finally, close and flush the various levels
if err := c.tarWriter.Flush(); err != nil {
return fmt.Errorf("Could not flush tar: %s", err)
}
if err := c.tarWriter.Close(); err != nil {
return fmt.Errorf("Could not close tar: %s", err)
}
if err := c.gzipWriter.Flush(); err != nil {
return fmt.Errorf("Could not flush gz: %s", err)
}
if err := c.gzipWriter.Close(); err != nil {
return fmt.Errorf("Could not close gz: %s", err)
}
if err := c.fileWriter.Sync(); err != nil {
return fmt.Errorf("Could not close file writer: %s", err)
}
if err := c.fileWriter.Close(); err != nil {
return fmt.Errorf("Could not close file writer: %s", err)
}
return nil
}
// WriteBytesForKey creates a new entry in the serializer file with some user-defined bytes.
func (c *ClassifierSerializer) WriteBytesForKey(key string, b []byte) error {
//
// Write header for key
//
hdr := &tar.Header{
Name: key,
Size: int64(len(b)),
}
if err := c.tarWriter.WriteHeader(hdr); err != nil {
return fmt.Errorf("Could not write header for '%s': %s", key, err)
}
//
// Write data
//
if _, err := c.tarWriter.Write(b); err != nil {
return fmt.Errorf("Could not write data for '%s': %s", key, err)
}
c.tarWriter.Flush()
c.gzipWriter.Flush()
c.fileWriter.Sync()
return nil
}
// WriteU64ForKey creates a new entry in the serializer file with the bytes of a uint64
func (c *ClassifierSerializer) WriteU64ForKey(key string, v uint64) error {
b := PackU64ToBytes(v)
return c.WriteBytesForKey(key, b)
}
// WriteJSONForKey creates a new entry in the file with an interface serialized as JSON.
func (c *ClassifierSerializer) WriteJSONForKey(key string, v interface{}) error {
b, err := json.Marshal(v)
if err != nil {
return err
}
return c.WriteBytesForKey(key, b)
}
// WriteAttributeForKey creates a new entry in the file containing a serialized representation of Attribute
func (c *ClassifierSerializer) WriteAttributeForKey(key string, a Attribute) error {
b, err := SerializeAttribute(a)
if err != nil {
return WrapError(err)
}
return c.WriteBytesForKey(key, b)
}
// WriteAttributesForKey does the same as WriteAttributeForKey, just with more than one Attribute.
func (c *ClassifierSerializer) WriteAttributesForKey(key string, attrs []Attribute) error {
attrCountKey := c.Prefix(key, "ATTR_COUNT")
err := c.WriteU64ForKey(attrCountKey, uint64(len(attrs)))
if err != nil {
return DescribeError("Unable to write ATTR_COUNT", err)
}
for i, a := range attrs {
attrKey := c.Prefix(key, fmt.Sprintf("%d", i))
err = c.WriteAttributeForKey(attrKey, a)
if err != nil {
return DescribeError("Unable to write Attribute", err)
}
}
return nil
}
// WriteInstances for key creates a new entry in the file containing some training instances
func (c *ClassifierSerializer) WriteInstancesForKey(key string, g FixedDataGrid, includeData bool) error {
fmt.Sprintf("%v", c)
return SerializeInstancesToTarWriter(g, c.tarWriter, key, includeData)
}
// Prefix outputs a string in the right format for TAR
func (c *ClassifierSerializer) Prefix(prefix string, suffix string) string {
if prefix == "" {
return suffix
}
return fmt.Sprintf("%s/%s", prefix, suffix)
}
// WriteMetadataAtPrefix outputs a METADATA entry in the right place
func (c *ClassifierSerializer) WriteMetadataAtPrefix(prefix string, metadata ClassifierMetadataV1) error {
return c.WriteJSONForKey(c.Prefix(prefix, "METADATA"), &metadata)
}
// CreateSerializedClassifierStub generates a file to serialize into
// and writes the METADATA header.
func CreateSerializedClassifierStub(filePath string, metadata ClassifierMetadataV1) (*ClassifierSerializer, error) {
// Open the filePath
f, err := os.OpenFile(filePath, os.O_RDWR|os.O_TRUNC|os.O_CREATE, 0600)
if err != nil {
return nil, err
}
var hdr *tar.Header
gzWriter := gzip.NewWriter(f)
tw := tar.NewWriter(gzWriter)
ret := ClassifierSerializer{
gzipWriter: gzWriter,
fileWriter: f,
tarWriter: tw,
}
//
// Write the MANIFEST entry
//
hdr = &tar.Header{
Name: "CLS_MANIFEST",
Size: int64(len(SerializationFormatVersion)),
}
if err := tw.WriteHeader(hdr); err != nil {
return nil, fmt.Errorf("Could not write CLS_MANIFEST header: %s", err)
}
if _, err := tw.Write([]byte(SerializationFormatVersion)); err != nil {
return nil, fmt.Errorf("Could not write CLS_MANIFEST contents: %s", err)
}
//
// Write the METADATA entry
//
err = ret.WriteMetadataAtPrefix("", metadata)
if err != nil {
return nil, fmt.Errorf("JSON marshal error: %s", err)
}
return &ret, nil
}
================================================
FILE: base/serialize_attributes.go
================================================
package base
import (
"archive/tar"
"encoding/json"
"fmt"
)
func writeAttributesToFilePart(attrs []Attribute, f *tar.Writer, name string) error {
// Get the marshaled Attribute array
body, err := json.Marshal(attrs)
if err != nil {
return err
}
// Write a header
hdr := &tar.Header{
Name: name,
Size: int64(len(body)),
}
if err := f.WriteHeader(hdr); err != nil {
return err
}
// Write the marshaled data
if _, err := f.Write([]byte(body)); err != nil {
return err
}
return nil
}
// MarshalAttribute converts an Attribute to a JSON map.
func MarshalAttribute(a Attribute) (map[string]interface{}, error) {
ret := make(map[string]interface{})
marshaledAttrRaw, err := a.MarshalJSON()
if err != nil {
return nil, err
}
err = json.Unmarshal(marshaledAttrRaw, &ret)
if err != nil {
return nil, err
}
return ret, nil
}
func SerializeAttribute(attr Attribute) ([]byte, error) {
// Get the marshaled Attribute array
body, err := json.Marshal(attr)
if err != nil {
return nil, err
}
return []byte(body), nil
}
func DeserializeAttribute(data []byte) (Attribute, error) {
type JSONAttribute struct {
Type string `json:"type"`
Name string `json:"name"`
Attr json.RawMessage `json:"attr"`
}
var rawAttr JSONAttribute
err := json.Unmarshal(data, &rawAttr)
if err != nil {
return nil, err
}
var attr Attribute
switch rawAttr.Type {
case "binary":
attr = new(BinaryAttribute)
break
case "float":
attr = new(FloatAttribute)
break
case "categorical":
attr = new(CategoricalAttribute)
break
default:
return nil, fmt.Errorf("Unrecognised Attribute format: %s", rawAttr.Type)
}
err = attr.UnmarshalJSON(rawAttr.Attr)
if err != nil {
return nil, fmt.Errorf("Can't deserialize: %s (error: %s)", rawAttr, err)
}
attr.SetName(rawAttr.Name)
return attr, nil
}
// DeserializeAttributes constructs a ve
func DeserializeAttributes(data []byte) ([]Attribute, error) {
// Define a JSON shim Attribute
var attrs []json.RawMessage
err := json.Unmarshal(data, &attrs)
if err != nil {
return nil, fmt.Errorf("Failed to deserialize attributes: %v", err)
}
ret := make([]Attribute, len(attrs))
for i, v := range attrs {
ret[i], err = DeserializeAttribute(v)
if err != nil {
return nil, err
}
}
return ret, nil
}
// ReplaceDeserializedAttributeWithVersionFromInstances takes an independently deserialized Attribute and matches it
// if possible with one from a candidate FixedDataGrid.
func ReplaceDeserializedAttributeWithVersionFromInstances(deserialized Attribute, matchingWith FixedDataGrid) (Attribute, error) {
for _, a := range matchingWith.AllAttributes() {
if a.Equals(deserialized) {
return a, nil
}
}
return nil, WrapError(fmt.Errorf("Unable to match %v in %v", deserialized, matchingWith))
}
// ReplaceDeserializedAttributesWithVersionsFromInstances takes some independently loaded Attributes and
// matches them up with a candidate FixedDataGrid.
func ReplaceDeserializedAttributesWithVersionsFromInstances(deserialized []Attribute, matchingWith FixedDataGrid) ([]Attribute, error) {
ret := make([]Attribute, len(deserialized))
for i, a := range deserialized {
match, err := ReplaceDeserializedAttributeWithVersionFromInstances(a, matchingWith)
if err != nil {
return nil, WrapError(err)
}
ret[i] = match
}
return ret, nil
}
================================================
FILE: base/serialize_instances.go
================================================
package base
import (
"archive/tar"
"compress/gzip"
"encoding/csv"
"fmt"
"io"
"os"
"reflect"
"runtime"
)
func SerializeInstancesToFile(inst FixedDataGrid, path string) error {
f, err := os.OpenFile(path, os.O_RDWR, 0600)
if err != nil {
return err
}
err = SerializeInstances(inst, f)
if err != nil {
return err
}
err = f.Sync()
if err != nil {
return fmt.Errorf("Couldn't flush file: %s", err)
}
f.Close()
return nil
}
// SerializesInstancesToCSV converts a FixedDataGrid into a CSV file format.
func SerializeInstancesToCSV(inst FixedDataGrid, path string) error {
f, err := os.OpenFile(path, os.O_RDWR, 0600)
if err != nil {
return err
}
defer func() {
f.Sync()
f.Close()
}()
return SerializeInstancesToCSVStream(inst, f)
}
// SerializeInstancesToCSVStream outputs a FixedDataGrid into a CSV file format, via the io.Writer stream.
func SerializeInstancesToCSVStream(inst FixedDataGrid, f io.Writer) error {
// Create the CSV writer
w := csv.NewWriter(f)
colCount, _ := inst.Size()
// Write out Attribute headers
// Start with the regular Attributes
normalAttrs := NonClassAttributes(inst)
classAttrs := inst.AllClassAttributes()
allAttrs := make([]Attribute, colCount)
n := copy(allAttrs, normalAttrs)
copy(allAttrs[n:], classAttrs)
headerRow := make([]string, colCount)
for i, v := range allAttrs {
headerRow[i] = v.GetName()
}
w.Write(headerRow)
specs := ResolveAttributes(inst, allAttrs)
curRow := make([]string, colCount)
inst.MapOverRows(specs, func(row [][]byte, rowNo int) (bool, error) {
for i, v := range row {
attr := allAttrs[i]
curRow[i] = attr.GetStringFromSysVal(v)
}
w.Write(curRow)
return true, nil
})
w.Flush()
return nil
}
// DeserializeInstancesFromTarReader returns DenseInstances from a FunctionalTarReader with the name prefix.
func DeserializeInstancesFromTarReader(tr *FunctionalTarReader, prefix string) (ret *DenseInstances, err error) {
p := func(n string) string {
return fmt.Sprintf("%s%s", prefix, n)
}
// Retrieve the MANIFEST and verify
manifestBytes, err := tr.GetNamedFile(p("MANIFEST"))
if err != nil {
return nil, err
}
if !reflect.DeepEqual(manifestBytes, []byte(SerializationFormatVersion)) {
return nil, fmt.Errorf("Unsupported MANIFEST: %s", string(manifestBytes))
}
// Get the size
sizeBytes, err := tr.GetNamedFile(p("DIMS"))
if err != nil {
return nil, WrapError(fmt.Errorf("Unable to read DIMS: %v", err))
}
if len(sizeBytes) < 16 {
return nil, WrapError(fmt.Errorf("DIMS: must be 16 bytes"))
}
attrCount := int(UnpackBytesToU64(sizeBytes[0:8]))
rowCount := int(UnpackBytesToU64(sizeBytes[8:]))
// Unmarshal the Attributes
attrBytes, err := tr.GetNamedFile(p("CATTRS"))
if err != nil {
return nil, DescribeError("Unable to read CATTRS", err)
}
cAttrs, err := DeserializeAttributes(attrBytes)
if err != nil {
return nil, DescribeError("Class Attribute deserialization error", err)
}
attrBytes, err = tr.GetNamedFile(p("ATTRS"))
if err != nil {
return nil, DescribeError("Unable to read ATTRS", err)
}
normalAttrs, err := DeserializeAttributes(attrBytes)
if err != nil {
return nil, DescribeError("Unable to deserialize normal attributes", err)
}
// Create the return instances
ret = NewDenseInstances()
// Normal Attributes first, class Attributes on the end
allAttributes := make([]Attribute, attrCount)
for i, v := range normalAttrs {
ret.AddAttribute(v)
allAttributes[i] = v
}
for i, v := range cAttrs {
ret.AddAttribute(v)
err = ret.AddClassAttribute(v)
if err != nil {
return nil, DescribeError(fmt.Sprintf("Could not set Attribute '%s' as a class Attribute", v), err)
}
allAttributes[i+len(normalAttrs)] = v
}
// Allocate memory
err = ret.Extend(int(rowCount))
if err != nil {
return nil, WrapError(fmt.Errorf("Could not allocate memory"))
}
// Seek through the TAR file until we get to the DATA section
reader := tr.Regenerate()
for {
hdr, err := reader.Next()
if err == io.EOF {
return nil, WrapError(fmt.Errorf("DATA section missing!"))
} else if err != nil {
return nil, WrapError(fmt.Errorf("Error seeking to DATA section: %s", err))
}
if hdr.Name == p("DATA") {
break
}
}
// Resolve AttributeSpecs
specs := ResolveAttributes(ret, allAttributes)
// Finally, read the values out of the data section
for i := 0; i < rowCount; i++ {
for j, s := range specs {
r := ret.Get(s, i)
n, err := reader.Read(r)
if n != len(r) {
return nil, WrapError(fmt.Errorf("Expected %d bytes (read %d) on row %d", len(r), n, i))
}
ret.Set(s, i, r)
if err != nil {
if i == rowCount-1 && j == len(specs)-1 && err == io.EOF {
break
}
return nil, WrapError(fmt.Errorf("Read error in data section (at row %d from %d, attr %d from %d): %s", i, rowCount, j, len(specs), err))
}
}
}
return ret, nil
}
// DeserializeInstances returns a DenseInstances using a given io.Reader.
func DeserializeInstances(f io.ReadSeeker) (ret *DenseInstances, err error) {
// Recovery function
defer func() {
if r := recover(); r != nil {
if _, ok := r.(runtime.Error); ok {
panic(r)
}
err = r.(error)
}
}()
// Open the .gz layer
gzReader, err := gzip.NewReader(f)
if err != nil {
panic(WrapError(err))
}
regenerateTarReader := func() *tar.Reader {
f.Seek(0, os.SEEK_SET)
gzReader.Reset(f)
tr := tar.NewReader(gzReader)
return tr
}
tr := NewFunctionalTarReader(regenerateTarReader)
ret, deSerializeErr := DeserializeInstancesFromTarReader(tr, "")
if err = gzReader.Close(); err != nil {
return ret, fmt.Errorf("Error closing gzip stream: %s", err)
}
return ret, deSerializeErr
}
// SerializeInstances stores a FixedDataGrid into an efficient format to the given io.Writer stream.
func SerializeInstances(inst FixedDataGrid, f io.Writer) error {
// Create a .tar.gz container
gzWriter := gzip.NewWriter(f)
tw := tar.NewWriter(gzWriter)
serializeErr := SerializeInstancesToTarWriter(inst, tw, "", true)
// Finally, close and flush the various levels
if err := tw.Flush(); err != nil {
return fmt.Errorf("Could not flush tar: %s", err)
}
if err := tw.Close(); err != nil {
return fmt.Errorf("Could not close tar: %s", err)
}
if err := gzWriter.Flush(); err != nil {
return fmt.Errorf("Could not flush gz: %s", err)
}
if err := gzWriter.Close(); err != nil {
return fmt.Errorf("Could not close gz: %s", err)
}
return serializeErr
}
// SerializeInstancesToTarWriter stores a FixedDataGrid into an efficient form given a tar.Writer.
func SerializeInstancesToTarWriter(inst FixedDataGrid, tw *tar.Writer, prefix string, includeData bool) error {
var hdr *tar.Header
p := func(n string) string {
return fmt.Sprintf("%s%s", prefix, n)
}
// Write the MANIFEST entry
hdr = &tar.Header{
Name: p("MANIFEST"),
Size: int64(len(SerializationFormatVersion)),
}
if err := tw.WriteHeader(hdr); err != nil {
return fmt.Errorf("Could not write MANIFEST header: %s", err)
}
if _, err := tw.Write([]byte(SerializationFormatVersion)); err != nil {
return fmt.Errorf("Could not write MANIFEST contents: %s", err)
}
tw.Flush()
// Now write the dimensions of the dataset
attrCount, rowCount := inst.Size()
hdr = &tar.Header{
Name: p("DIMS"),
Size: 16,
}
if err := tw.WriteHeader(hdr); err != nil {
return fmt.Errorf("Could not write DIMS header: %s", err)
}
if _, err := tw.Write(PackU64ToBytes(uint64(attrCount))); err != nil {
return fmt.Errorf("Could not write DIMS (attrCount): %s", err)
}
if _, err := tw.Write(PackU64ToBytes(uint64(rowCount))); err != nil {
return fmt.Errorf("Could not write DIMS (rowCount): %s", err)
}
// Write the ATTRIBUTES files
classAttrs := inst.AllClassAttributes()
normalAttrs := NonClassAttributes(inst)
if err := writeAttributesToFilePart(classAttrs, tw, p("CATTRS")); err != nil {
return fmt.Errorf("Could not write CATTRS: %s", err)
}
if err := writeAttributesToFilePart(normalAttrs, tw, p("ATTRS")); err != nil {
return fmt.Errorf("Could not write ATTRS: %s", err)
}
// Data must be written out in the same order as the Attributes
allAttrs := make([]Attribute, attrCount)
normCount := copy(allAttrs, normalAttrs)
for i, v := range classAttrs {
allAttrs[normCount+i] = v
}
allSpecs := ResolveAttributes(inst, allAttrs)
if len(allSpecs) != len(allAttrs) {
return WrapError(fmt.Errorf("Error resolving all Attributes: resolved %d, expected %d", len(allSpecs), len(allAttrs)))
}
// First, estimate the amount of data we'll need...
dataLength := int64(0)
inst.MapOverRows(allSpecs, func(val [][]byte, row int) (bool, error) {
for _, v := range val {
dataLength += int64(len(v))
}
return true, nil
})
// Then write the header
hdr = &tar.Header{
Name: p("DATA"),
Size: dataLength,
}
if err := tw.WriteHeader(hdr); err != nil {
return fmt.Errorf("Could not write DATA: %s", err)
}
tw.Flush()
if !includeData {
return nil
}
// Then write the actual data
writtenLength := int64(0)
if err := inst.MapOverRows(allSpecs, func(val [][]byte, row int) (bool, error) {
for _, v := range val {
wl, err := tw.Write(v)
writtenLength += int64(wl)
if err != nil {
return false, err
}
}
return true, nil
}); err != nil {
return err
}
if writtenLength != dataLength {
return fmt.Errorf("Could not write DATA: changed size from %v to %v", dataLength, writtenLength)
}
tw.Flush()
return nil
}
================================================
FILE: base/serialize_test.go
================================================
package base
import (
"archive/tar"
"compress/gzip"
"fmt"
. "github.com/smartystreets/goconvey/convey"
"io"
"io/ioutil"
"testing"
)
func TestSerializeToCSV(t *testing.T) {
Convey("Reading some instances...", t, func() {
inst, err := ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
So(err, ShouldBeNil)
Convey("Saving the instances to CSV...", func() {
f, err := ioutil.TempFile("", "instTmpCSV")
So(err, ShouldBeNil)
err = SerializeInstancesToCSV(inst, f.Name())
So(err, ShouldBeNil)
Convey("What's written out should match what's read in", func() {
dinst, err := ParseCSVToInstances(f.Name(), true)
So(err, ShouldBeNil)
So(InstancesAreEqual(inst, dinst), ShouldBeTrue)
})
})
})
}
func TestCreateAndReadClassifierStub(t *testing.T) {
Convey("Creating a classifier stub...", t, func() {
exampleClassifierMetadata := make(map[string]interface{})
exampleClassifierMetadata["num_trees"] = 4
metadata := ClassifierMetadataV1{
FormatVersion: 1,
ClassifierName: "test",
ClassifierVersion: "1",
ClassifierMetadata: exampleClassifierMetadata,
}
Convey("Saving the classifier...", func() {
f, err := ioutil.TempFile("", "classTmpF")
So(err, ShouldBeNil)
serializer, err := CreateSerializedClassifierStub(f.Name(), metadata)
So(err, ShouldBeNil)
err = serializer.Close()
So(err, ShouldBeNil)
Convey("Should be able to read the information back...", func() {
reader, err := ReadSerializedClassifierStub(f.Name())
So(err, ShouldBeNil)
So(reader, ShouldNotBeNil)
So(reader.Metadata.FormatVersion, ShouldEqual, 1)
So(reader.Metadata.ClassifierName, ShouldEqual, "test")
So(reader.Metadata.ClassifierVersion, ShouldEqual, "1")
So(reader.Metadata.ClassifierMetadata["num_trees"], ShouldEqual, 4)
})
})
})
}
func TestSerializeToFile(t *testing.T) {
Convey("Reading some instances...", t, func() {
inst, err := ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
So(err, ShouldBeNil)
Convey("Dumping to file...", func() {
f, err := ioutil.TempFile("", "instTmpF")
So(err, ShouldBeNil)
err = SerializeInstances(inst, f)
So(err, ShouldBeNil)
f.Seek(0, 0)
Convey("Contents of the archive should be right...", func() {
gzr, err := gzip.NewReader(f)
So(err, ShouldBeNil)
tr := tar.NewReader(gzr)
classAttrsPresent := false
manifestPresent := false
regularAttrsPresent := false
dataPresent := false
dimsPresent := false
readBytes := make([]byte, len([]byte(SerializationFormatVersion)))
for {
hdr, err := tr.Next()
if err == io.EOF {
break
}
So(err, ShouldBeNil)
switch hdr.Name {
case "MANIFEST":
tr.Read(readBytes)
manifestPresent = true
break
case "CATTRS":
classAttrsPresent = true
break
case "ATTRS":
regularAttrsPresent = true
break
case "DATA":
dataPresent = true
break
case "DIMS":
dimsPresent = true
break
default:
fmt.Printf("Unknown file: %s\n", hdr.Name)
}
}
Convey("MANIFEST should be present", func() {
So(manifestPresent, ShouldBeTrue)
Convey("MANIFEST should be right...", func() {
So(readBytes, ShouldResemble, []byte(SerializationFormatVersion))
})
Convey("DATA should be present", func() {
So(dataPresent, ShouldBeTrue)
})
Convey("ATTRS should be present", func() {
So(regularAttrsPresent, ShouldBeTrue)
})
Convey("CATTRS should be present", func() {
So(classAttrsPresent, ShouldBeTrue)
})
Convey("DIMS should be present", func() {
So(dimsPresent, ShouldBeTrue)
})
})
Convey("Should be able to reconstruct...", func() {
f.Seek(0, 0)
dinst, err := DeserializeInstances(f)
So(err, ShouldBeNil)
So(InstancesAreEqual(inst, dinst), ShouldBeTrue)
})
})
})
})
}
================================================
FILE: base/sort.go
================================================
package base
import (
"bytes"
"encoding/binary"
)
func sortXorOp(b []byte) []byte {
ret := make([]byte, len(b))
copy(ret, b)
ret[0] ^= 0x80
return ret
}
type sortSpec struct {
r1 int
r2 int
}
// Returns sortSpecs for inst in ascending order
func createSortSpec(inst FixedDataGrid, attrsArg []AttributeSpec) []sortSpec {
attrs := make([]AttributeSpec, len(attrsArg))
copy(attrs, attrsArg)
// Reverse attribute order to be more intuitive
for i, j := 0, len(attrs)-1; i < j; i, j = i+1, j-1 {
attrs[i], attrs[j] = attrs[j], attrs[i]
}
_, rows := inst.Size()
ret := make([]sortSpec, 0)
// Create a buffer
buf := bytes.NewBuffer(nil)
ds := make([][]byte, rows)
rs := make([]int, rows)
rowSize := 0
inst.MapOverRows(attrs, func(row [][]byte, rowNo int) (bool, error) {
if rowSize == 0 {
// Allocate a row buffer
for _, r := range row {
rowSize += len(r)
}
}
byteBuf := make([]byte, rowSize)
for i, r := range row {
if i == 0 {
binary.Write(buf, binary.LittleEndian, sortXorOp(r))
} else {
binary.Write(buf, binary.LittleEndian, r)
}
}
buf.Read(byteBuf)
ds[rowNo] = byteBuf
rs[rowNo] = rowNo
return true, nil
})
// Sort values
valueBins := make([][][]byte, 256)
rowBins := make([][]int, 256)
for i := 0; i < rowSize; i++ {
for j := 0; j < len(ds); j++ {
// Address each row value by it's ith byte
b := ds[j]
valueBins[b[i]] = append(valueBins[b[i]], b)
rowBins[b[i]] = append(rowBins[b[i]], rs[j])
}
j := 0
for k := 0; k < 256; k++ {
bs := valueBins[k]
rc := rowBins[k]
copy(ds[j:], bs)
copy(rs[j:], rc)
j += len(bs)
valueBins[k] = bs[:0]
rowBins[k] = rc[:0]
}
}
done := make([]bool, rows)
for index := range rs {
if done[index] {
continue
}
j := index
for {
done[j] = true
if rs[j] != index {
ret = append(ret, sortSpec{j, rs[j]})
j = rs[j]
} else {
break
}
}
}
return ret
}
// Sort does a radix sort of DenseInstances, using SortDirection
// direction (Ascending or Descending) with attrs as a slice of Attribute
// indices that you want to sort by.
//
// IMPORTANT: Radix sort is not stable, so ordering outside
// the attributes used for sorting is arbitrary.
func Sort(inst FixedDataGrid, direction SortDirection, attrs []AttributeSpec) (FixedDataGrid, error) {
sortInstructions := createSortSpec(inst, attrs)
instUpdatable, ok := inst.(*DenseInstances)
if ok {
for _, i := range sortInstructions {
instUpdatable.swapRows(i.r1, i.r2)
}
if direction == Descending {
// Reverse the matrix
_, rows := inst.Size()
for i, j := 0, rows-1; i < j; i, j = i+1, j-1 {
instUpdatable.swapRows(i, j)
}
}
} else {
panic("Sort is not supported for this yet!")
}
return instUpdatable, nil
}
// LazySort also does a sort, but returns an InstanceView and doesn't actually
// reorder the rows, just makes it look like they've been reordered
// See also: Sort
func LazySort(inst FixedDataGrid, direction SortDirection, attrs []AttributeSpec) (FixedDataGrid, error) {
// Run the sort operation
sortInstructions := createSortSpec(inst, attrs)
// Build the row -> row mapping
_, rows := inst.Size() // Get the total row count
rowArr := make([]int, rows) // Create an array of positions
for i := 0; i < len(rowArr); i++ {
rowArr[i] = i
}
for i := range sortInstructions {
r1 := rowArr[sortInstructions[i].r1]
r2 := rowArr[sortInstructions[i].r2]
// Swap
rowArr[sortInstructions[i].r1] = r2
rowArr[sortInstructions[i].r2] = r1
}
if direction == Descending {
for i, j := 0, rows-1; i < j; i, j = i+1, j-1 {
tmp := rowArr[i]
rowArr[i] = rowArr[j]
rowArr[j] = tmp
}
}
// Create a mapping dictionary
rowMap := make(map[int]int)
for i, a := range rowArr {
if i == a {
continue
}
rowMap[i] = a
}
// Create the return structure
ret := NewInstancesViewFromRows(inst, rowMap)
return ret, nil
}
================================================
FILE: base/sort_test.go
================================================
package base
import (
. "github.com/smartystreets/goconvey/convey"
"testing"
)
func isSortedAsc(inst FixedDataGrid, attr AttributeSpec) bool {
valPrev := 0.0
_, rows := inst.Size()
for i := 0; i < rows; i++ {
cur := UnpackBytesToFloat(inst.Get(attr, i))
if i > 0 {
if valPrev > cur {
return false
}
}
valPrev = cur
}
return true
}
func isSortedDesc(inst FixedDataGrid, attr AttributeSpec) bool {
valPrev := 0.0
_, rows := inst.Size()
for i := 0; i < rows; i++ {
cur := UnpackBytesToFloat(inst.Get(attr, i))
if i > 0 {
if valPrev < cur {
return false
}
}
valPrev = cur
}
return true
}
func TestSortDesc(t *testing.T) {
Convey("Given data that's not already sorted descending", t, func() {
unsorted, err := ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
So(err, ShouldBeNil)
as1 := ResolveAllAttributes(unsorted)
So(isSortedDesc(unsorted, as1[0]), ShouldBeFalse)
Convey("Given reference data that's alredy sorted descending", func() {
sortedDescending, err := ParseCSVToInstances("../examples/datasets/iris_sorted_desc.csv", true)
So(err, ShouldBeNil)
as2 := ResolveAllAttributes(sortedDescending)
So(isSortedDesc(sortedDescending, as2[0]), ShouldBeTrue)
Convey("Sorting Descending", func() {
result, err := Sort(unsorted, Descending, as1[0:len(as1)-1])
So(err, ShouldBeNil)
Convey("Result should be sorted descending", func() {
So(isSortedDesc(result, as1[0]), ShouldBeTrue)
})
Convey("Result should match the reference", func() {
So(InstancesAreEqual(sortedDescending, result), ShouldBeTrue)
})
})
})
})
}
func TestSortAsc(t *testing.T) {
Convey("Given data that's not already sorted ascending", t, func() {
unsorted, err := ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
So(err, ShouldBeNil)
as1 := ResolveAllAttributes(unsorted)
So(isSortedAsc(unsorted, as1[0]), ShouldBeFalse)
Convey("Given reference data that's alredy sorted ascending", func() {
sortedAscending, err := ParseCSVToInstances("../examples/datasets/iris_sorted_asc.csv", true)
So(err, ShouldBeNil)
as2 := ResolveAllAttributes(sortedAscending)
So(isSortedAsc(sortedAscending, as2[0]), ShouldBeTrue)
Convey("Sorting Ascending", func() {
result, err := Sort(unsorted, Ascending, as1[0:len(as1)-1])
So(err, ShouldBeNil)
Convey("Result should be sorted descending", func() {
So(isSortedAsc(result, as1[0]), ShouldBeTrue)
})
Convey("Result should match the reference", func() {
So(InstancesAreEqual(sortedAscending, result), ShouldBeTrue)
})
Convey("First element of Result should equal known value", func() {
So(result.RowString(0), ShouldEqual, "4.3 3.0 1.1 0.1 Iris-setosa")
})
})
})
})
}
================================================
FILE: base/spec.go
================================================
package base
import (
"fmt"
)
// AttributeSpec is a pointer to a particular Attribute
// within a particular Instance structure and encodes position
// and storage information associated with that Attribute.
type AttributeSpec struct {
pond int
position int
attr Attribute
}
type byPosition []AttributeSpec
func (b byPosition) Len() int {
return len(b)
}
func (b byPosition) Swap(i, j int) {
b[i], b[j] = b[j], b[i]
}
func (b byPosition) Less(i, j int) bool {
iPos := (uint64(b[i].pond) << 32) + (uint64(b[i].position))
jPos := (uint64(b[i].pond) << 32) + (uint64(b[i].position))
return iPos < jPos
}
// GetAttribute returns an AttributeSpec which matches a given
// Attribute.
func (a *AttributeSpec) GetAttribute() Attribute {
return a.attr
}
// String returns a human-readable description of this AttributeSpec.
func (a *AttributeSpec) String() string {
return fmt.Sprintf("AttributeSpec(Attribute: '%s', Pond: %d/%d)", a.attr, a.pond, a.position)
}
================================================
FILE: base/util.go
================================================
package base
import (
"math"
"unsafe"
)
// PackU64ToBytesInline fills ret with the byte values of
// val. Ret must have length at least 8.
func PackU64ToBytesInline(val uint64, ret []byte) {
ret[7] = byte(val & (0xFF << 56) >> 56)
ret[6] = byte(val & (0xFF << 48) >> 48)
ret[5] = byte(val & (0xFF << 40) >> 40)
ret[4] = byte(val & (0xFF << 32) >> 32)
ret[3] = byte(val & (0xFF << 24) >> 24)
ret[2] = byte(val & (0xFF << 16) >> 16)
ret[1] = byte(val & (0xFF << 8) >> 8)
ret[0] = byte(val & (0xFF << 0) >> 0)
}
// PackFloatToBytesInline fills ret with the byte values of
// the float64 argument. ret must be at least 8 bytes in size.
func PackFloatToBytesInline(val float64, ret []byte) {
PackU64ToBytesInline(math.Float64bits(val), ret)
}
// PackU64ToBytes allocates a return value of appropriate length
// and fills it with the values of val.
func PackU64ToBytes(val uint64) []byte {
ret := make([]byte, 8)
ret[7] = byte(val & (0xFF << 56) >> 56)
ret[6] = byte(val & (0xFF << 48) >> 48)
ret[5] = byte(val & (0xFF << 40) >> 40)
ret[4] = byte(val & (0xFF << 32) >> 32)
ret[3] = byte(val & (0xFF << 24) >> 24)
ret[2] = byte(val & (0xFF << 16) >> 16)
ret[1] = byte(val & (0xFF << 8) >> 8)
ret[0] = byte(val & (0xFF << 0) >> 0)
return ret
}
// UnpackBytesToU64 converst a given byte slice into
// a uint64 value.
func UnpackBytesToU64(val []byte) uint64 {
pb := unsafe.Pointer(&val[0])
return *(*uint64)(pb)
}
// PackFloatToBytes returns a 8-byte slice containing
// the byte values of a float64.
func PackFloatToBytes(val float64) []byte {
return PackU64ToBytes(math.Float64bits(val))
}
// UnpackBytesToFloat converts a given byte slice into an
// equivalent float64.
func UnpackBytesToFloat(val []byte) float64 {
pb := unsafe.Pointer(&val[0])
return *(*float64)(pb)
}
func byteSeqEqual(a, b []byte) bool {
if len(a) != len(b) {
return false
}
for i, v := range a {
if v != b[i] {
return false
}
}
return true
}
================================================
FILE: base/util_attributes.go
================================================
package base
import (
"fmt"
"sort"
)
// This file contains utility functions relating to Attributes and Attribute specifications.
// NonClassFloatAttributes returns all FloatAttributes which
// aren't designated as a class Attribute.
func NonClassFloatAttributes(d DataGrid) []Attribute {
classAttrs := d.AllClassAttributes()
allAttrs := d.AllAttributes()
ret := make([]Attribute, 0)
for _, a := range allAttrs {
matched := false
if _, ok := a.(*FloatAttribute); !ok {
continue
}
for _, b := range classAttrs {
if a.Equals(b) {
matched = true
break
}
}
if !matched {
ret = append(ret, a)
}
}
return ret
}
// NonClassAttrs returns all Attributes which aren't designated as a
// class Attribute.
func NonClassAttributes(d DataGrid) []Attribute {
classAttrs := d.AllClassAttributes()
allAttrs := d.AllAttributes()
return AttributeDifferenceReferences(allAttrs, classAttrs)
}
// ResolveAttributes returns AttributeSpecs describing
// all of the Attributes.
func ResolveAttributes(d DataGrid, attrs []Attribute) []AttributeSpec {
ret := make([]AttributeSpec, len(attrs))
n := len(attrs)
for i := 0; i < n; i++ {
a := attrs[i]
spec, err := d.GetAttribute(a)
if err != nil {
panic(fmt.Errorf("Error resolving Attribute %s: %s", a, err))
}
ret[i] = spec
}
sort.Sort(byPosition(ret))
return ret
}
// ResolveAllAttributes returns every AttributeSpec
func ResolveAllAttributes(d DataGrid) []AttributeSpec {
return ResolveAttributes(d, d.AllAttributes())
}
func buildAttrSet(a []Attribute) map[Attribute]bool {
ret := make(map[Attribute]bool)
for _, a := range a {
ret[a] = true
}
return ret
}
// AttributeIntersect returns the intersection of two Attribute slices.
//
// IMPORTANT: result is ordered in order of the first []Attribute argument.
//
// IMPORTANT: result contains only Attributes from a1.
func AttributeIntersect(a1, a2 []Attribute) []Attribute {
ret := make([]Attribute, 0)
for _, a := range a1 {
matched := false
for _, b := range a2 {
if a.Equals(b) {
matched = true
break
}
}
if matched {
ret = append(ret, a)
}
}
return ret
}
// AttributeIntersectReferences returns the intersection of two Attribute slices.
//
// IMPORTANT: result is not guaranteed to be ordered.
//
// IMPORTANT: done using pointers for speed, use AttributeDifference
// if the Attributes originate from different DataGrids.
func AttributeIntersectReferences(a1, a2 []Attribute) []Attribute {
a1b := buildAttrSet(a1)
a2b := buildAttrSet(a2)
ret := make([]Attribute, 0)
for a := range a1b {
if _, ok := a2b[a]; ok {
ret = append(ret, a)
}
}
return ret
}
// AttributeDifference returns the difference between two Attribute
// slices: i.e. all the values in a1 which do not occur in a2.
//
// IMPORTANT: result is ordered the same as a1.
//
// IMPORTANT: result only contains values from a1.
func AttributeDifference(a1, a2 []Attribute) []Attribute {
ret := make([]Attribute, 0)
for _, a := range a1 {
matched := false
for _, b := range a2 {
if a.Equals(b) {
matched = true
break
}
}
if !matched {
ret = append(ret, a)
}
}
return ret
}
// AttributeDifferenceReferences returns the difference between two Attribute
// slices: i.e. all the values in a1 which do not occur in a2.
//
// IMPORTANT: result is not guaranteed to be ordered.
//
// IMPORTANT: done using pointers for speed, use AttributeDifference
// if the Attributes originate from different DataGrids.
func AttributeDifferenceReferences(a1, a2 []Attribute) []Attribute {
a1b := buildAttrSet(a1)
a2b := buildAttrSet(a2)
ret := make([]Attribute, 0)
for a := range a1b {
if _, ok := a2b[a]; !ok {
ret = append(ret, a)
}
}
return ret
}
================================================
FILE: base/util_instances.go
================================================
package base
import (
"fmt"
"math/rand"
)
// This file contains utility functions relating to efficiently
// generating predictions and instantiating DataGrid implementations.
// GeneratePredictionVector selects the class Attributes from a given
// FixedDataGrid and returns something which can hold the predictions.
func GeneratePredictionVector(from FixedDataGrid) UpdatableDataGrid {
classAttrs := from.AllClassAttributes()
_, rowCount := from.Size()
ret := NewDenseInstances()
for _, a := range classAttrs {
ret.AddAttribute(a)
ret.AddClassAttribute(a)
}
ret.Extend(rowCount)
return ret
}
// CopyDenseInstancesStructure returns a new DenseInstances
// with identical structure (layout, Attributes) to the original
func CopyDenseInstances(template *DenseInstances, templateAttrs []Attribute) *DenseInstances {
instances := NewDenseInstances()
templateAgs := template.AllAttributeGroups()
for ag := range templateAgs {
agTemplate := templateAgs[ag]
if _, ok := agTemplate.(*BinaryAttributeGroup); ok {
instances.CreateAttributeGroup(ag, 0)
} else {
instances.CreateAttributeGroup(ag, 8)
}
}
for _, a := range templateAttrs {
s, err := template.GetAttribute(a)
if err != nil {
panic(err)
}
if ag, ok := template.agRevMap[s.pond]; !ok {
panic(ag)
} else {
_, err := instances.AddAttributeToAttributeGroup(a, ag)
if err != nil {
panic(err)
}
}
}
return instances
}
// GetClass is a shortcut for returning the string value of the current
// class on a given row.
//
// IMPORTANT: GetClass will panic if the number of ClassAttributes is
// set to anything other than one.
func GetClass(from DataGrid, row int) string {
// Get the Attribute
classAttrs := from.AllClassAttributes()
if len(classAttrs) > 1 {
panic("More than one class defined")
} else if len(classAttrs) == 0 {
panic("No class defined!")
}
classAttr := classAttrs[0]
// Fetch and convert the class value
classAttrSpec, err := from.GetAttribute(classAttr)
if err != nil {
panic(fmt.Errorf("Can't resolve class Attribute %s", err))
}
classVal := from.Get(classAttrSpec, row)
if classVal == nil {
panic("Class values shouldn't be missing")
}
return classAttr.GetStringFromSysVal(classVal)
}
// SetClass is a shortcut for updating the given class of a row.
//
// IMPORTANT: SetClass will panic if the number of class Attributes
// is anything other than one.
func SetClass(at UpdatableDataGrid, row int, class string) {
// Get the Attribute
classAttrs := at.AllClassAttributes()
if len(classAttrs) > 1 {
panic("More than one class defined")
} else if len(classAttrs) == 0 {
panic("No class Attributes are defined")
}
classAttr := classAttrs[0]
// Fetch and convert the class value
classAttrSpec, err := at.GetAttribute(classAttr)
if err != nil {
panic(fmt.Errorf("Can't resolve class Attribute %s", err))
}
classBytes := classAttr.GetSysValFromString(class)
at.Set(classAttrSpec, row, classBytes)
}
// GetAttributeByName returns an Attribute matching a given name.
// Returns nil if one doesn't exist.
func GetAttributeByName(inst FixedDataGrid, name string) Attribute {
for _, a := range inst.AllAttributes() {
if a.GetName() == name {
return a
}
}
return nil
}
// GetClassDistributionByBinaryFloatValue returns the count of each row
// which has a float value close to 0.0 or 1.0.
func GetClassDistributionByBinaryFloatValue(inst FixedDataGrid) []int {
// Get the class variable
attrs := inst.AllClassAttributes()
if len(attrs) != 1 {
panic(fmt.Errorf("Wrong number of class variables (has %d, should be 1)", len(attrs)))
}
if _, ok := attrs[0].(*FloatAttribute); !ok {
panic(fmt.Errorf("Class Attribute must be FloatAttribute (is %s)", attrs[0]))
}
// Get the number of class values
ret := make([]int, 2)
// Map through everything
specs := ResolveAttributes(inst, attrs)
inst.MapOverRows(specs, func(vals [][]byte, row int) (bool, error) {
index := UnpackBytesToFloat(vals[0])
if index > 0.5 {
ret[1]++
} else {
ret[0]++
}
return false, nil
})
return ret
}
// GetClassDistributionByIntegerVal returns a vector containing
// the count of each class vector (indexed by the class' system
// integer representation)
func GetClassDistributionByCategoricalValue(inst FixedDataGrid) []int {
var classAttr *CategoricalAttribute
var ok bool
// Get the class variable
attrs := inst.AllClassAttributes()
if len(attrs) != 1 {
panic(fmt.Errorf("Wrong number of class variables (has %d, should be 1)", len(attrs)))
}
if classAttr, ok = attrs[0].(*CategoricalAttribute); !ok {
panic(fmt.Errorf("Class Attribute must be a CategoricalAttribute (is %s)", attrs[0]))
}
// Get the number of class values
classLen := len(classAttr.GetValues())
ret := make([]int, classLen)
// Map through everything
specs := ResolveAttributes(inst, attrs)
inst.MapOverRows(specs, func(vals [][]byte, row int) (bool, error) {
index := UnpackBytesToU64(vals[0])
ret[int(index)]++
return false, nil
})
return ret
}
// GetClassDistribution returns a map containing the count of each
// class type (indexed by the class' string representation).
func GetClassDistribution(inst FixedDataGrid) map[string]int {
ret := make(map[string]int)
_, rows := inst.Size()
for i := 0; i < rows; i++ {
cls := GetClass(inst, i)
ret[cls]++
}
return ret
}
// GetClassDistributionAfterThreshold returns the class distribution
// after a speculative split on a given Attribute using a threshold.
func GetClassDistributionAfterThreshold(inst FixedDataGrid, at Attribute, val float64) map[string]map[string]int {
ret := make(map[string]map[string]int)
// Find the attribute we're decomposing on
attrSpec, err := inst.GetAttribute(at)
if err != nil {
panic(fmt.Sprintf("Invalid attribute %s (%s)", at, err))
}
// Validate
if _, ok := at.(*FloatAttribute); !ok {
panic(fmt.Sprintf("Must be numeric!"))
}
_, rows := inst.Size()
for i := 0; i < rows; i++ {
splitVal := UnpackBytesToFloat(inst.Get(attrSpec, i)) > val
splitVar := "0"
if splitVal {
splitVar = "1"
}
classVar := GetClass(inst, i)
if _, ok := ret[splitVar]; !ok {
ret[splitVar] = make(map[string]int)
i--
continue
}
ret[splitVar][classVar]++
}
return ret
}
// GetClassDistributionAfterSplit returns the class distribution
// after a speculative split on a given Attribute.
func GetClassDistributionAfterSplit(inst FixedDataGrid, at Attribute) map[string]map[string]int {
ret := make(map[string]map[string]int)
// Find the attribute we're decomposing on
attrSpec, err := inst.GetAttribute(at)
if err != nil {
panic(fmt.Sprintf("Invalid attribute %s (%s)", at, err))
}
_, rows := inst.Size()
for i := 0; i < rows; i++ {
splitVar := at.GetStringFromSysVal(inst.Get(attrSpec, i))
classVar := GetClass(inst, i)
if _, ok := ret[splitVar]; !ok {
ret[splitVar] = make(map[string]int)
i--
continue
}
ret[splitVar][classVar]++
}
return ret
}
// DecomposeOnNumericAttributeThreshold divides the instance set depending on the
// value of a given numeric Attribute, constructs child instances, and returns
// them in a map keyed on whether that row had a higher value than the threshold
// or not.
//
// IMPORTANT: calls panic() if the AttributeSpec of at cannot be determined, or if
// the Attribute is not numeric.
func DecomposeOnNumericAttributeThreshold(inst FixedDataGrid, at Attribute, val float64) map[string]FixedDataGrid {
// Verify
if _, ok := at.(*FloatAttribute); !ok {
panic("Invalid argument")
}
// Find the Attribute we're decomposing on
attrSpec, err := inst.GetAttribute(at)
if err != nil {
panic(fmt.Sprintf("Invalid Attribute index %s", at))
}
// Construct the new Attribute set
newAttrs := make([]Attribute, 0)
for _, a := range inst.AllAttributes() {
if a.Equals(at) {
continue
}
newAttrs = append(newAttrs, a)
}
// Create the return map
ret := make(map[string]FixedDataGrid)
// Create the return row mapping
rowMaps := make(map[string][]int)
// Build full Attribute set
fullAttrSpec := ResolveAttributes(inst, newAttrs)
fullAttrSpec = append(fullAttrSpec, attrSpec)
// Decompose
inst.MapOverRows(fullAttrSpec, func(row [][]byte, rowNo int) (bool, error) {
// Find the output instance set
targetBytes := row[len(row)-1]
targetVal := UnpackBytesToFloat(targetBytes)
val := targetVal > val
targetSet := "0"
if val {
targetSet = "1"
}
rowMap := rowMaps[targetSet]
rowMaps[targetSet] = append(rowMap, rowNo)
return true, nil
})
for a := range rowMaps {
ret[a] = NewInstancesViewFromVisible(inst, rowMaps[a], newAttrs)
}
return ret
}
// DecomposeOnAttributeValues divides the instance set depending on the
// value of a given Attribute, constructs child instances, and returns
// them in a map keyed on the string value of that Attribute.
//
// IMPORTANT: calls panic() if the AttributeSpec of at cannot be determined.
func DecomposeOnAttributeValues(inst FixedDataGrid, at Attribute) map[string]FixedDataGrid {
// Find the Attribute we're decomposing on
attrSpec, err := inst.GetAttribute(at)
if err != nil {
panic(fmt.Sprintf("Invalid Attribute index %s", at))
}
// Construct the new Attribute set
newAttrs := make([]Attribute, 0)
for _, a := range inst.AllAttributes() {
if a.Equals(at) {
continue
}
newAttrs = append(newAttrs, a)
}
// Create the return map
ret := make(map[string]FixedDataGrid)
// Create the return row mapping
rowMaps := make(map[string][]int)
// Build full Attribute set
fullAttrSpec := ResolveAttributes(inst, newAttrs)
fullAttrSpec = append(fullAttrSpec, attrSpec)
// Decompose
inst.MapOverRows(fullAttrSpec, func(row [][]byte, rowNo int) (bool, error) {
// Find the output instance set
targetBytes := row[len(row)-1]
targetAttr := fullAttrSpec[len(fullAttrSpec)-1].attr
targetSet := targetAttr.GetStringFromSysVal(targetBytes)
if _, ok := rowMaps[targetSet]; !ok {
rowMaps[targetSet] = make([]int, 0)
}
rowMap := rowMaps[targetSet]
rowMaps[targetSet] = append(rowMap, rowNo)
return true, nil
})
for a := range rowMaps {
ret[a] = NewInstancesViewFromVisible(inst, rowMaps[a], newAttrs)
}
return ret
}
// InstancesTrainTestSplit takes a given Instances (src) and a train-test fraction
// (prop) and returns an array of two new Instances, one containing approximately
// that fraction and the other containing what's left.
//
// IMPORTANT: this function is only meaningful when prop is between 0.0 and 1.0.
// Using any other values may result in odd behaviour.
func InstancesTrainTestSplit(src FixedDataGrid, prop float64) (FixedDataGrid, FixedDataGrid) {
trainingRows := make([]int, 0)
testingRows := make([]int, 0)
src = Shuffle(src)
// Create the return structure
_, rows := src.Size()
for i := 0; i < rows; i++ {
trainOrTest := rand.Intn(101)
if trainOrTest > int(100*prop) {
trainingRows = append(trainingRows, i)
} else {
testingRows = append(testingRows, i)
}
}
allAttrs := src.AllAttributes()
return NewInstancesViewFromVisible(src, trainingRows, allAttrs), NewInstancesViewFromVisible(src, testingRows, allAttrs)
}
// LazyShuffle randomizes the row order without re-ordering the rows
// via an InstancesView.
func LazyShuffle(from FixedDataGrid) FixedDataGrid {
_, rows := from.Size()
rowMap := make(map[int]int)
for i := 0; i < rows; i++ {
j := rand.Intn(i + 1)
rowMap[i] = j
rowMap[j] = i
}
return NewInstancesViewFromRows(from, rowMap)
}
// Shuffle randomizes the row order either in place (if DenseInstances)
// or using LazyShuffle.
func Shuffle(from FixedDataGrid) FixedDataGrid {
_, rows := from.Size()
if inst, ok := from.(*DenseInstances); ok {
for i := 0; i < rows; i++ {
j := rand.Intn(i + 1)
inst.swapRows(i, j)
}
return inst
} else {
return LazyShuffle(from)
}
}
// SampleWithReplacement returns a new FixedDataGrid containing
// an equal number of random rows drawn from the original FixedDataGrid
//
// IMPORTANT: There's a high chance of seeing duplicate rows
// whenever size is close to the row count.
func SampleWithReplacement(from FixedDataGrid, size int) FixedDataGrid {
rowMap := make(map[int]int)
_, rows := from.Size()
for i := 0; i < size; i++ {
srcRow := rand.Intn(rows)
rowMap[i] = srcRow
}
return NewInstancesViewFromRows(from, rowMap)
}
// CheckCompatible checks whether two DataGrids have the same Attributes
// and if they do, it returns them.
func CheckCompatible(s1 FixedDataGrid, s2 FixedDataGrid) []Attribute {
s1Attrs := s1.AllAttributes()
s2Attrs := s2.AllAttributes()
interAttrs := AttributeIntersect(s1Attrs, s2Attrs)
if len(interAttrs) != len(s1Attrs) {
return nil
} else if len(interAttrs) != len(s2Attrs) {
return nil
}
return interAttrs
}
// CheckStrictlyCompatible checks whether two DenseInstances have
// AttributeGroups with the same Attributes, in the same order,
// enabling optimisations.
func CheckStrictlyCompatible(s1 FixedDataGrid, s2 FixedDataGrid) bool {
// Cast
d1, ok1 := s1.(*DenseInstances)
d2, ok2 := s2.(*DenseInstances)
if !ok1 || !ok2 {
return false
}
// Retrieve AttributeGroups
d1ags := d1.AllAttributeGroups()
d2ags := d2.AllAttributeGroups()
// Check everything in d1 is in d2
for a := range d1ags {
_, ok := d2ags[a]
if !ok {
return false
}
}
// Check everything in d2 is in d1
for a := range d2ags {
_, ok := d1ags[a]
if !ok {
return false
}
}
// Check that everything has the same number
// of equivalent Attributes, in the same order
for a := range d1ags {
ag1 := d1ags[a]
ag2 := d2ags[a]
a1 := ag1.Attributes()
a2 := ag2.Attributes()
for i := range a1 {
at1 := a1[i]
at2 := a2[i]
if !at1.Equals(at2) {
return false
}
}
}
return true
}
// InstancesAreEqual checks whether a given Instance set is exactly
// the same as another (i.e. has the same size and values).
func InstancesAreEqual(inst, other FixedDataGrid) bool {
_, rows := inst.Size()
for _, a := range inst.AllAttributes() {
as1, err := inst.GetAttribute(a)
if err != nil {
panic(err) // That indicates some kind of error
}
as2, err := inst.GetAttribute(a)
if err != nil {
return false // Obviously has different Attributes
}
if !as1.GetAttribute().Equals(as2.GetAttribute()) {
return false
}
for i := 0; i < rows; i++ {
b1 := inst.Get(as1, i)
b2 := inst.Get(as2, i)
if !byteSeqEqual(b1, b2) {
return false
}
}
}
return true
}
================================================
FILE: base/util_test.go
================================================
package base
import (
. "github.com/smartystreets/goconvey/convey"
"testing"
)
func TestClassDistributionAfterSplit(t *testing.T) {
Convey("Given the PlayTennis dataset", t, func() {
inst, err := ParseCSVToInstances("../examples/datasets/tennis.csv", true)
So(err, ShouldEqual, nil)
Convey("Splitting on Sunny should give the right result...", func() {
result := GetClassDistributionAfterSplit(inst, inst.AllAttributes()[0])
So(result["sunny"]["no"], ShouldEqual, 3)
So(result["sunny"]["yes"], ShouldEqual, 2)
So(result["overcast"]["yes"], ShouldEqual, 4)
So(result["rainy"]["yes"], ShouldEqual, 3)
So(result["rainy"]["no"], ShouldEqual, 2)
})
})
}
func TestPackAndUnpack(t *testing.T) {
Convey("Given some uint64", t, func() {
x := uint64(0xDEADBEEF)
Convey("When the integer is packed", func() {
packed := PackU64ToBytes(x)
Convey("And then unpacked", func() {
unpacked := UnpackBytesToU64(packed)
Convey("The unpacked version should be the same", func() {
So(x, ShouldEqual, unpacked)
})
})
})
})
Convey("Given another uint64", t, func() {
x := uint64(1)
Convey("When the integer is packed", func() {
packed := PackU64ToBytes(x)
Convey("And then unpacked", func() {
unpacked := UnpackBytesToU64(packed)
Convey("The unpacked version should be the same", func() {
So(x, ShouldEqual, unpacked)
})
})
})
})
}
func TestPackAndUnpackFloat(t *testing.T) {
Convey("Given some float", t, func() {
x := 1.2011
Convey("When the float gets packed", func() {
packed := PackFloatToBytes(x)
Convey("And then unpacked", func() {
unpacked := UnpackBytesToFloat(packed)
Convey("The unpacked version should be the same", func() {
So(unpacked, ShouldEqual, x)
})
})
})
})
}
func TestStrictlyCompatable(t *testing.T) {
Convey("Given two datasets...", t, func() {
Convey("Given two identical datasets", func() {
// Violates the requirement that both CategoricalAttributes
// must have values in the same order
d1, err := ParseCSVToInstances("../examples/datasets/exam.csv", true)
So(err, ShouldEqual, nil)
d2, err := ParseCSVToInstances("../examples/datasets/exams.csv", true)
So(err, ShouldEqual, nil)
So(CheckStrictlyCompatible(d1, d2), ShouldEqual, true)
})
Convey("Given two identical datasets (apart from sorting)", func() {
// Violates the requirement that both CategoricalAttributes
// must have values in the same order
d1, err := ParseCSVToInstances("../examples/datasets/iris_sorted_asc.csv", true)
So(err, ShouldEqual, nil)
d2, err := ParseCSVToInstances("../examples/datasets/iris_sorted_desc.csv", true)
So(err, ShouldEqual, nil)
So(CheckStrictlyCompatible(d1, d2), ShouldEqual, false)
})
Convey("Given two different datasets...", func() {
// Violates verything
d1, err := ParseCSVToInstances("../examples/datasets/tennis.csv", true)
So(err, ShouldEqual, nil)
d2, err := ParseCSVToInstances("../examples/datasets/iris_sorted_desc.csv", true)
So(err, ShouldEqual, nil)
So(CheckStrictlyCompatible(d1, d2), ShouldEqual, false)
})
})
}
func TestCategoricalEquality(t *testing.T) {
Convey("Given two outwardly identical class Attributes...", t, func() {
d1, err := ParseCSVToInstances("../examples/datasets/iris_sorted_asc.csv", true)
So(err, ShouldEqual, nil)
d2, err := ParseCSVToInstances("../examples/datasets/iris_sorted_desc.csv", true)
So(err, ShouldEqual, nil)
c1 := d1.AllClassAttributes()[0]
c2 := d2.AllClassAttributes()[0]
So(c1.GetName(), ShouldEqual, c2.GetName())
So(c1.Equals(c2), ShouldBeFalse)
So(c2.Equals(c1), ShouldBeFalse) // Violates the fact that Attributes must appear in the same order
})
}
================================================
FILE: base/view.go
================================================
package base
import (
"bytes"
"fmt"
)
// InstancesViews hide or re-order Attributes and rows from
// a given DataGrid to make it appear that they've been deleted.
type InstancesView struct {
src FixedDataGrid
attrs []AttributeSpec
rows map[int]int
classAttrs map[Attribute]bool
maskRows bool
}
func (v *InstancesView) addClassAttrsFromSrc(src FixedDataGrid) {
for _, a := range src.AllClassAttributes() {
matched := true
if v.attrs != nil {
matched = false
for _, b := range v.attrs {
if b.attr.Equals(a) {
matched = true
}
}
}
if matched {
v.classAttrs[a] = true
}
}
}
func (v *InstancesView) resolveRow(origRow int) int {
if v.rows != nil {
if newRow, ok := v.rows[origRow]; !ok {
if v.maskRows {
return -1
}
} else {
return newRow
}
}
return origRow
}
// NewInstancesViewFromRows creates a new InstancesView from a source
// FixedDataGrid and row -> row mapping. The key of the rows map is the
// row as it exists within this mapping: for example an entry like 5 -> 1
// means that row 1 in src will appear at row 5 in the Instancesview.
//
// Rows are not masked in this implementation, meaning that all rows which
// are left unspecified appear as normal.
func NewInstancesViewFromRows(src FixedDataGrid, rows map[int]int) *InstancesView {
ret := &InstancesView{
src,
nil,
rows,
make(map[Attribute]bool),
false,
}
ret.addClassAttrsFromSrc(src)
return ret
}
// NewInstancesViewFromVisible creates a new InstancesView from a source
// FixedDataGrid, a slice of row numbers and a slice of Attributes.
//
// Only the rows specified will appear in this InstancesView, and they will
// appear in the same order they appear within the rows array.
//
// Only the Attributes specified will appear in this InstancesView. Retrieving
// Attribute specifications from this InstancesView will maintain their order.
func NewInstancesViewFromVisible(src FixedDataGrid, rows []int, attrs []Attribute) *InstancesView {
ret := &InstancesView{
src,
ResolveAttributes(src, attrs),
make(map[int]int),
make(map[Attribute]bool),
true,
}
for i, a := range rows {
ret.rows[i] = a
}
ret.addClassAttrsFromSrc(src)
return ret
}
// NewInstancesViewFromAttrs creates a new InstancesView from a source
// FixedDataGrid and a slice of Attributes.
//
// Only the Attributes specified will appear in this InstancesView.
func NewInstancesViewFromAttrs(src FixedDataGrid, attrs []Attribute) *InstancesView {
ret := &InstancesView{
src,
ResolveAttributes(src, attrs),
nil,
make(map[Attribute]bool),
false,
}
ret.addClassAttrsFromSrc(src)
return ret
}
// GetAttribute returns an Attribute specification matching an Attribute
// if it has not been filtered.
//
// The AttributeSpecs returned are the same as those returned by the
// source FixedDataGrid.
func (v *InstancesView) GetAttribute(a Attribute) (AttributeSpec, error) {
if a == nil {
return AttributeSpec{}, fmt.Errorf("Attribute can't be nil")
}
// Pass-through on nil
if v.attrs == nil {
return v.src.GetAttribute(a)
}
// Otherwise
for _, r := range v.attrs {
// If the attribute matches...
if r.GetAttribute().Equals(a) {
return r, nil
}
}
return AttributeSpec{}, fmt.Errorf("Requested Attribute has been filtered")
}
// AllAttributes returns every Attribute which hasn't been filtered.
func (v *InstancesView) AllAttributes() []Attribute {
if v.attrs == nil {
return v.src.AllAttributes()
}
ret := make([]Attribute, len(v.attrs))
for i, a := range v.attrs {
ret[i] = a.GetAttribute()
}
return ret
}
// AddClassAttribute adds the given Attribute to the set of defined
// class Attributes, if it hasn't been filtered.
func (v *InstancesView) AddClassAttribute(a Attribute) error {
// Check that this Attribute is defined
matched := false
for _, r := range v.AllAttributes() {
if r.Equals(a) {
matched = true
}
}
if !matched {
return fmt.Errorf("Attribute has been filtered")
}
v.classAttrs[a] = true
return nil
}
// RemoveClassAttribute removes the given Attribute from the set of
// class Attributes.
func (v *InstancesView) RemoveClassAttribute(a Attribute) error {
v.classAttrs[a] = false
return nil
}
// AllClassAttributes returns all the Attributes currently defined
// as being class Attributes.
func (v *InstancesView) AllClassAttributes() []Attribute {
ret := make([]Attribute, 0)
for a := range v.classAttrs {
if v.classAttrs[a] {
ret = append(ret, a)
}
}
return ret
}
// Get returns a sequence of bytes stored under a given Attribute
// on a given row.
//
// IMPORTANT: The AttributeSpec is unverified, meaning it's possible
// to return values from Attributes filtered by this InstancesView
// if the underlying AttributeSpec is known.
func (v *InstancesView) Get(as AttributeSpec, row int) []byte {
// Change the row if necessary
row = v.resolveRow(row)
if row == -1 {
panic("Out of range")
}
return v.src.Get(as, row)
}
// MapOverRows, see DenseInstances.MapOverRows.
//
// IMPORTANT: MapOverRows is not guaranteed to be ordered, but this one
// especially so.
func (v *InstancesView) MapOverRows(as []AttributeSpec, rowFunc func([][]byte, int) (bool, error)) error {
if v.maskRows {
rowBuf := make([][]byte, len(as))
for r := range v.rows {
row := v.rows[r]
for i, a := range as {
rowBuf[i] = v.src.Get(a, row)
}
ok, err := rowFunc(rowBuf, r)
if err != nil {
return err
}
if !ok {
break
}
}
return nil
} else {
return v.src.MapOverRows(as, rowFunc)
}
}
// Size Returns the number of Attributes and rows this InstancesView
// contains.
func (v *InstancesView) Size() (int, int) {
// Get the original size
hSize, vSize := v.src.Size()
// Adjust to the number of defined Attributes
if v.attrs != nil {
hSize = len(v.attrs)
}
// Adjust to the number of defined rows
if v.rows != nil {
if v.maskRows {
vSize = len(v.rows)
} else if len(v.rows) > vSize {
vSize = len(v.rows)
}
}
return hSize, vSize
}
// String returns a human-readable summary of this InstancesView.
func (v *InstancesView) String() string {
var buffer bytes.Buffer
maxRows := 30
// Get all Attribute information
as := ResolveAllAttributes(v)
// Print header
cols, rows := v.Size()
buffer.WriteString("InstancesView with ")
buffer.WriteString(fmt.Sprintf("%d row(s) ", rows))
buffer.WriteString(fmt.Sprintf("%d attribute(s)\n", cols))
if v.attrs != nil {
buffer.WriteString(fmt.Sprintf("With defined Attribute view\n"))
}
if v.rows != nil {
buffer.WriteString(fmt.Sprintf("With defined Row view\n"))
}
if v.maskRows {
buffer.WriteString("Row masking on.\n")
}
buffer.WriteString(fmt.Sprintf("Attributes:\n"))
for _, a := range as {
prefix := "\t"
if v.classAttrs[a.attr] {
prefix = "*\t"
}
buffer.WriteString(fmt.Sprintf("%s%s\n", prefix, a.attr))
}
// Print data
if rows < maxRows {
maxRows = rows
}
buffer.WriteString("Data:")
for i := 0; i < maxRows; i++ {
buffer.WriteString("\t")
for _, a := range as {
val := v.Get(a, i)
buffer.WriteString(fmt.Sprintf("%s ", a.attr.GetStringFromSysVal(val)))
}
buffer.WriteString("\n")
}
missingRows := rows - maxRows
if missingRows != 0 {
buffer.WriteString(fmt.Sprintf("\t...\n%d row(s) undisplayed", missingRows))
} else {
buffer.WriteString("All rows displayed")
}
return buffer.String()
}
// RowString returns a string representation of a given row.
func (v *InstancesView) RowString(row int) string {
var buffer bytes.Buffer
as := ResolveAllAttributes(v)
first := true
for _, a := range as {
val := v.Get(a, row)
prefix := " "
if first {
prefix = ""
first = false
}
buffer.WriteString(fmt.Sprintf("%s%s", prefix, a.attr.GetStringFromSysVal(val)))
}
return buffer.String()
}
================================================
FILE: base/view_test.go
================================================
package base
import (
. "github.com/smartystreets/goconvey/convey"
"testing"
)
func TestInstancesViewRows(t *testing.T) {
Convey("Given Iris", t, func() {
instOrig, err := ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
So(err, ShouldEqual, nil)
Convey("Given a new row map containing only row 5", func() {
rMap := make(map[int]int)
rMap[0] = 5
instView := NewInstancesViewFromRows(instOrig, rMap)
Convey("The internal structure should be right...", func() {
So(instView.rows[0], ShouldEqual, 5)
})
Convey("The reconstructed values should be correct...", func() {
str := "5.4 3.9 1.7 0.4 Iris-setosa"
row := instView.RowString(0)
So(row, ShouldEqual, str)
})
Convey("And the size should be correct...", func() {
width, height := instView.Size()
So(width, ShouldEqual, 5)
So(height, ShouldEqual, 150)
})
})
})
}
func TestInstancesViewFromVisible(t *testing.T) {
Convey("Given Iris", t, func() {
instOrig, err := ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
So(err, ShouldEqual, nil)
Convey("Generate something that says every other row should be visible", func() {
rowVisiblex1 := make([]int, 0)
_, totalRows := instOrig.Size()
for i := 0; i < totalRows; i += 2 {
rowVisiblex1 = append(rowVisiblex1, i)
}
instViewx1 := NewInstancesViewFromVisible(instOrig, rowVisiblex1, instOrig.AllAttributes())
for i, a := range rowVisiblex1 {
rowStr1 := instViewx1.RowString(i)
rowStr2 := instOrig.RowString(a)
So(rowStr1, ShouldEqual, rowStr2)
}
Convey("And then generate something that says that every other row than that should be visible", func() {
rowVisiblex2 := make([]int, 0)
for i := 0; i < totalRows; i += 4 {
rowVisiblex2 = append(rowVisiblex1, i)
}
instViewx2 := NewInstancesViewFromVisible(instOrig, rowVisiblex2, instOrig.AllAttributes())
for i, a := range rowVisiblex2 {
rowStr1 := instViewx2.RowString(i)
rowStr2 := instOrig.RowString(a)
So(rowStr1, ShouldEqual, rowStr2)
}
})
})
})
}
func TestInstancesViewAttrs(t *testing.T) {
Convey("Given Iris", t, func() {
instOrig, err := ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
So(err, ShouldEqual, nil)
Convey("Given a new Attribute vector with the last 4...", func() {
cMap := instOrig.AllAttributes()[1:]
instView := NewInstancesViewFromAttrs(instOrig, cMap)
Convey("The size should be correct", func() {
h, v := instView.Size()
So(h, ShouldEqual, 4)
_, vOrig := instOrig.Size()
So(v, ShouldEqual, vOrig)
})
Convey("There should be 4 Attributes...", func() {
attrs := instView.AllAttributes()
So(len(attrs), ShouldEqual, 4)
})
Convey("There should be 4 Attributes with the right headers...", func() {
attrs := instView.AllAttributes()
So(attrs[0].GetName(), ShouldEqual, "Sepal width")
So(attrs[1].GetName(), ShouldEqual, "Petal length")
So(attrs[2].GetName(), ShouldEqual, "Petal width")
So(attrs[3].GetName(), ShouldEqual, "Species")
})
Convey("There should be a class Attribute...", func() {
attrs := instView.AllClassAttributes()
So(len(attrs), ShouldEqual, 1)
})
Convey("The class Attribute should be preserved...", func() {
attrs := instView.AllClassAttributes()
So(attrs[0].GetName(), ShouldEqual, "Species")
})
Convey("Attempts to get the filtered Attribute should fail...", func() {
_, err := instView.GetAttribute(instOrig.AllAttributes()[0])
So(err, ShouldNotEqual, nil)
})
Convey("The filtered Attribute should not appear in the RowString", func() {
str := "3.9 1.7 0.4 Iris-setosa"
row := instView.RowString(5)
So(row, ShouldEqual, str)
})
Convey("The filtered Attributes should all be the same type...", func() {
attrs := instView.AllAttributes()
_, ok1 := attrs[0].(*FloatAttribute)
_, ok2 := attrs[1].(*FloatAttribute)
_, ok3 := attrs[2].(*FloatAttribute)
_, ok4 := attrs[3].(*CategoricalAttribute)
So(ok1, ShouldEqual, true)
So(ok2, ShouldEqual, true)
So(ok3, ShouldEqual, true)
So(ok4, ShouldEqual, true)
})
Convey("The InstancesView should match one prepared earlier...", func() {
instRef, err := ParseCSVToInstances("../examples/datasets/iris_headers_subset.csv", true)
So(err, ShouldBeNil)
So(InstancesAreEqual(instRef, instView), ShouldBeTrue)
Convey("And a DenseInstances conversion should too...", func() {
instView2 := NewDenseCopy(instRef)
So(InstancesAreEqual(instRef, instView2), ShouldBeTrue)
})
})
})
})
}
================================================
FILE: clustering/cluster_extra_test.go
================================================
package clustering
import (
. "github.com/smartystreets/goconvey/convey"
"testing"
)
func Test(t *testing.T) {
Convey("Only m[0]", t, func() {
m1 := ClusterMap(make(map[int][]int))
m1[0] = []int{1, 2}
m2 := ClusterMap(make(map[int][]int))
m2[0] = []int{1, 2}
ret, err := m1.Equals(m2)
So(err, ShouldBeNil)
So(ret, ShouldBeTrue)
})
Convey("Nothing in m", t, func() {
m1 := ClusterMap(make(map[int][]int))
m2 := ClusterMap(make(map[int][]int))
ret, err := m1.Equals(m2)
So(err, ShouldBeNil)
So(ret, ShouldBeTrue)
})
Convey("Many elements in m", t, func() {
m1 := ClusterMap(make(map[int][]int))
m1[0] = []int{1, 2, 3, 4, 5}
m1[1] = []int{11, 12, 13, 14, 15}
m2 := ClusterMap(make(map[int][]int))
m2[0] = []int{1, 2, 3, 4, 5}
m2[1] = []int{11, 12, 13, 14, 15}
ret, err := m1.Equals(m2)
So(err, ShouldBeNil)
So(ret, ShouldBeTrue)
})
Convey("m[0] not the same", t, func() {
m1 := ClusterMap(make(map[int][]int))
m1[1] = []int{1, 2, 3}
m1[0] = []int{4, 5}
m2 := ClusterMap(make(map[int][]int))
m2[1] = []int{1, 2, 3}
m2[0] = []int{6, 5}
_, err := m1.Equals(m2)
So(err, ShouldNotBeNil)
})
Convey("m[0] size diff", t, func() {
m1 := ClusterMap(make(map[int][]int))
m1[1] = []int{1, 2, 3}
m1[0] = []int{4, 5}
m2 := ClusterMap(make(map[int][]int))
m2[1] = []int{1, 2, 3}
_, err := m1.Equals(m2)
So(err, ShouldNotBeNil)
})
Convey("m[1] size diff", t, func() {
m1 := ClusterMap(make(map[int][]int))
m1[1] = []int{1, 3}
m1[0] = []int{4, 5}
m2 := ClusterMap(make(map[int][]int))
m2[1] = []int{1, 2, 3}
m1[0] = []int{4, 5}
_, err := m1.Equals(m2)
So(err, ShouldNotBeNil)
})
Convey("m[1] duplicate", t, func() {
m1 := ClusterMap(make(map[int][]int))
m1[1] = []int{1, 1}
m1[0] = []int{4, 5}
m2 := ClusterMap(make(map[int][]int))
m1[1] = []int{1, 1}
m1[0] = []int{4, 5}
_, err := m1.Equals(m2)
So(err, ShouldNotBeNil)
})
Convey("m[0] duplicate", t, func() {
m1 := ClusterMap(make(map[int][]int))
m1[1] = []int{1, 2}
m1[0] = []int{4, 4}
m2 := ClusterMap(make(map[int][]int))
m1[1] = []int{1, 2}
m1[0] = []int{4, 4}
_, err := m1.Equals(m2)
So(err, ShouldNotBeNil)
})
}
================================================
FILE: clustering/cluster_test.go
================================================
package clustering
import (
. "github.com/smartystreets/goconvey/convey"
"testing"
)
func TestClusterEquality(t *testing.T) {
Convey("Should be able to determine if two cluster maps represent the same thing...", t, func() {
Convey("When everything's exactly the same...", func() {
m1 := ClusterMap(make(map[int][]int))
m1[0] = []int{1, 2, 3}
m1[1] = []int{4, 5}
m2 := ClusterMap(make(map[int][]int))
m2[0] = []int{1, 2, 3}
m2[1] = []int{4, 5}
ret, err := m1.Equals(m2)
So(err, ShouldBeNil)
So(ret, ShouldBeTrue)
})
Convey("With re-labelled clusters...", func() {
m1 := ClusterMap(make(map[int][]int))
m1[1] = []int{1, 2, 3}
m1[0] = []int{4, 5}
m2 := ClusterMap(make(map[int][]int))
m2[1] = []int{1, 2, 3}
m2[0] = []int{4, 5}
ret, err := m1.Equals(m2)
So(err, ShouldBeNil)
So(ret, ShouldBeTrue)
})
Convey("With missing clusters...", func() {
m1 := ClusterMap(make(map[int][]int))
m1[1] = []int{1, 2, 3}
m2 := ClusterMap(make(map[int][]int))
m2[1] = []int{1, 2, 3}
m2[0] = []int{4, 5}
_, err := m1.Equals(m2)
So(err, ShouldNotBeNil)
})
Convey("With missing points...", func() {
m1 := ClusterMap(make(map[int][]int))
m1[1] = []int{1, 3}
m1[0] = []int{4, 5}
m2 := ClusterMap(make(map[int][]int))
m2[1] = []int{1, 2, 3}
m2[0] = []int{4, 5}
_, err := m1.Equals(m2)
So(err, ShouldNotBeNil)
})
Convey("With invalid maps...", func() {
m1 := ClusterMap(make(map[int][]int))
m1[0] = []int{1, 2, 3}
m1[1] = []int{4, 4, 5}
m2 := ClusterMap(make(map[int][]int))
m2[0] = []int{1, 2, 3}
m2[1] = []int{4, 5}
_, err := m1.Equals(m2)
So(err, ShouldNotBeNil)
})
})
}
================================================
FILE: clustering/clustering.go
================================================
/* This package implements clustering algorithms */
package clustering
import (
"fmt"
"github.com/sjwhitworth/golearn/base"
"github.com/sjwhitworth/golearn/metrics/pairwise"
)
// ClusterParameters takes a number of variables common to all clustering
// algorithms.
type ClusterParameters struct {
// Attributes represents the set of Attributes which
// can be used for clustering
Attributes []base.Attribute
// Metric is used to compute pairwise distance
Metric pairwise.PairwiseDistanceFunc
}
// ClusterMap contains the cluster identifier as a key, followed by a vector of point
// indices that cluster contains.
type ClusterMap map[int][]int
// Invert returns an alternative form of cluster map where the key represents the point
// index and the value represents the cluster index it's assigned to
func (ref ClusterMap) Invert() (map[int]int, error) {
ret := make(map[int]int)
for c := range ref {
for _, p := range ref[c] {
if _, ok := ret[p]; ok {
return nil, fmt.Errorf("Not a valid cluster map (points appear in more than one cluster)")
} else {
ret[p] = c
}
}
}
return ret, nil
}
// Equals checks whether a bijection exists between two ClusterMaps (i.e. the clusters in one can
// be re-labelled to become the clusters of another)
func (ref ClusterMap) Equals(other ClusterMap) (bool, error) {
if len(ref) != len(other) {
return false, fmt.Errorf("ref and other do not contain the same number of clusters (%d and %d)", len(ref), len(other))
}
refInv, err := ref.Invert()
if err != nil {
return false, fmt.Errorf("ref: %s", err)
}
otherInv, err := other.Invert()
if err != nil {
return false, fmt.Errorf("other: %s", err)
}
clusterIdMap := make(map[int]int)
// Range through each point index
for p := range refInv {
c1 := refInv[p] // Get the cluster index of this point
if c2, ok := otherInv[p]; ok { // Check if the other map has this point
// if so, c2 is the point's cluster in the other map
if c3, ok := clusterIdMap[c2]; ok { // what's our correspondance with c2?
if c1 != c3 {
// if c1 is not what we've currently got, error out
return false, fmt.Errorf("ref point %d (cluster %d) is assigned to a different cluster (%d) in ref %+v", p, c2, c1, clusterIdMap)
}
} else {
clusterIdMap[c2] = c1
}
} else {
return false, fmt.Errorf("failed to find reference point %d in src", p)
}
}
// Check that after transformation, key contains the same points
arraysEqual := func(a1, a2 []int) bool {
cnt := make(map[int]bool)
for _, a := range a1 {
cnt[a] = true
}
for _, a := range a2 {
if _, ok := cnt[a]; !ok {
return false
}
}
return true
}
newMap := ClusterMap(make(map[int][]int))
for cOld := range other {
cNew := clusterIdMap[cOld]
if !arraysEqual(ref[cNew], other[cOld]) {
return false, fmt.Errorf("Re-labelled cluster %d => %d doesn't contain the same points (%d, %d)", cOld, cNew, ref[cNew], other[cOld])
}
newMap[cNew] = other[cOld]
}
return true, nil
}
================================================
FILE: clustering/dbscan.csv
================================================
0.494260967249,1.45106696541
-1.42808099324,-0.83706376669
0.338559182384,1.03875870939
0.119001013781,-1.05397553336
1.12242460445,1.77493654436
-1.26156989707,0.271881354299
-1.30154774626,-0.762062025148
0.585698651521,-0.339104628157
1.08247212014,0.886855396912
1.01416667809,1.34114022391
-1.21578195893,-0.601021238858
-1.25021782593,-1.05761650335
-1.05160415572,-0.780084156141
1.15263449272,-0.648539905918
-0.783299140581,-1.2248966985
0.202587147419,1.61104848936
-1.43020789851,-1.82380067733
-0.916300845616,-0.480830396598
-0.506013825832,-0.295715454174
0.436426179395,-1.06597144351
0.468034167368,-0.974110220304
0.522354793098,-0.641695891625
0.94533367495,-0.543880951202
0.94661473578,-0.939854758443
-1.38551398913,-0.73950655252
-1.15374916281,-0.250507932367
0.493572698047,-0.949825244593
0.884913340754,1.66591701207
0.249587300835,1.57229126004
1.02800263162,-0.340081504198
0.478275464063,1.19798226443
-1.19268844384,-0.510240121174
-1.85804701232,-1.33021784213
0.528139618545,1.32892750576
-0.918024481532,-0.652157357893
0.756316701741,0.920633635328
0.855048505014,-0.481028310004
0.492824086051,1.78274421923
0.380510951332,1.24884772379
-0.166999182256,-0.0916528008137
0.862512958934,-0.29122649879
-1.28326220483,-0.63402691263
-1.46013480318,-0.722834729597
-1.48000289758,-1.09948040102
-2.19020872323,-0.630588973627
-1.07505211635,-0.474050249508
0.541969904427,1.03090707759
0.824488329821,-0.264039880782
0.456263169078,2.05788223562
-1.58709404439,-0.54480731903
1.32708272612,-0.345071514843
0.68614239282,-0.490086592009
-1.60725507262,0.070747440379
-1.53337705952,-0.570087546452
1.0491125845,-0.574435960384
0.731933094085,-0.608068176075
-1.13848133348,-0.0659881431468
1.36805202458,1.65962813336
0.222462580182,-0.65053906069
-1.18662195919,-0.78239641499
0.357717455186,-0.584924154569
0.588086269107,-0.230283609581
0.78242146637,-0.380417760077
1.2682093931,-0.857019912656
0.549567992097,-0.773931305337
0.981410379535,1.01828533931
0.707839055866,-0.233211620345
0.0165651739637,-0.923844177798
0.158530593126,1.68427935414
0.498933328512,1.18944226235
0.394392460137,1.10697668799
0.52298152277,-0.915281143053
0.363168115217,1.90748256868
0.346568780252,1.26411862836
0.966039504954,-0.4318119363
-1.14222916165,-0.398461611165
-0.134479180583,2.11039748445
-1.18845711973,0.191151161919
0.235515043844,1.71737552151
0.648790787207,-0.936837517765
-1.58852748366,-0.819181976895
-1.04572997888,-0.29002720873
0.467505726335,0.450459334368
0.0198833944692,1.48714816824
0.189992256516,1.10986299053
1.48201717596,1.82713555691
-1.30489683944,-1.15150866165
0.757809431355,-0.47686276961
-1.54387743826,-0.684212390528
0.53240786142,-0.776648241672
0.85665850455,-1.34594223446
0.403144558116,1.57028295161
-1.3011171994,-0.790729653327
0.972620490761,1.21000471162
-1.00025584409,-0.628924362444
1.22425496262,0.501610912038
-1.15175818324,0.22764659828
-1.31816425788,-0.630999410835
0.402531346597,1.15248839326
0.0906743459729,1.61848052292
0.598794476009,-0.744251645998
-1.37198702139,-0.980218172223
0.520218965558,-0.919220905523
0.631969327359,1.19544068432
0.728113832873,-0.518758002884
0.262658464722,0.0128713235313
1.01826270251,-0.800567265699
1.0896513853,-0.503675186289
1.53624088423,0.894604885123
0.511997776458,0.678078694437
1.68745105198,1.27830755696
-1.84237360674,-0.904437839063
-1.19200811061,-0.463511666939
-1.29275263692,0.287881967384
-2.03126575898,-0.895274949124
0.529118462695,0.654914838633
0.468283787666,0.755733587995
-1.638498618,-0.154707320244
0.605617236401,1.70403704905
-0.767697521224,-1.01384394922
0.421112557426,-0.813005680016
-1.1727392859,-0.0801023370369
0.763176137366,1.82318913399
-0.0334381403655,1.44539596918
-1.60758525806,-0.62956732394
0.72250888945,-0.367506703588
-1.48527973153,-0.62861576205
0.978478897202,1.05374904006
0.451784483015,1.13661154122
1.27710347995,-0.491509617737
0.7166105877,1.15073382716
0.705050630765,-1.01884736371
0.535813899767,-1.31595906212
0.279302786611,-1.16319317603
0.29795190705,1.14196446938
-1.5319923175,-1.74146843932
0.485447620689,-0.597755525309
0.407314491616,-0.790408883348
0.381481488856,1.90489980312
-1.60594123991,-0.76522411796
1.23408760826,1.97619040399
0.909343480925,-0.618337223907
0.495887533633,0.855925046745
0.793312516951,0.879279610882
0.346669837831,-0.395258378353
0.463120268974,-0.842105995666
-0.422275985459,-0.190344559422
0.938840781419,-0.223971270792
-1.58434365981,-0.845357036129
1.52307352239,0.741157517894
0.473203974657,-0.605056119142
-1.33430726419,-0.787153064395
-1.30774613959,-0.537830906671
0.44437726176,-0.570907450386
0.302728842099,-1.4022293954
0.498614426707,-0.661820178158
1.02546663264,2.17903746819
-0.888963724459,-0.894519799863
-0.0094375858741,2.06614833436
-1.259326547,-1.33666248485
0.334806319729,0.635350614538
-2.0514671874,-0.491853069487
0.511781097662,0.772058829646
0.635381289585,-1.23415961512
0.840452136147,-0.925641488461
-0.46307453491,-1.26531794688
-1.37224990492,-0.0477233997811
0.128494145161,-0.146277558271
0.629212436152,-0.545489790799
-1.28799441742,-0.218570654523
0.638847594716,1.0198939832
-1.90824567176,-1.24854294321
0.983925587407,-0.980132673476
0.751915912284,-0.434247990685
0.246162045698,-0.972003120401
-1.42184967713,-1.00645441438
-1.36258687372,-0.465192195174
0.729107773809,-1.12124670875
1.28828508776,-1.18972269812
0.936218595433,0.844436650383
-1.41967242002,-1.33553338128
0.451293435185,-0.337043043077
0.889211776584,0.683688380936
0.946264899744,0.846407250351
0.516908027375,-1.13002059107
0.663113490975,0.662420359006
0.985803048039,1.26228271875
-1.4124239618,-0.947706065026
0.642179325842,1.36969227279
-1.32320503558,-0.518361624408
0.389031988291,1.16716527963
-0.806854584638,-0.613264833433
-0.73049432945,-0.484378149065
0.493548378749,-0.761716569457
0.118175433165,-0.443557808199
1.00315780403,1.4310943891
0.778850340762,2.09349071844
-0.745033802864,-0.756441323796
-0.93389892072,-0.103482424997
0.68196176411,-0.273220993773
-1.16459401764,-0.315541399223
0.740399605464,-0.0945591684424
0.856407754419,0.252753351451
0.803410992909,-1.32952562448
0.429896355505,-0.758228537429
0.595823625156,1.74945400458
1.02085295004,-0.440804557414
0.30307695482,1.45762223084
1.18958904168,-0.581519032443
0.96915905519,-0.511234999414
0.697140552761,1.46349275366
0.637227696862,-0.764858659877
1.35045914484,-0.667938023256
0.250651256786,-1.19493208012
1.28347766291,1.37097619103
-0.128975958296,1.09716295281
-1.7517528787,-0.262053681114
-1.3635857203,-1.06031600728
-0.904113999203,0.191818430248
0.165426717861,-0.866647109384
0.232203921427,-0.682948158472
0.350368147923,-0.295280019807
1.5427482888,-0.592939512519
-1.13795423209,-0.133498274187
0.674237889386,-0.632143914378
0.334556478351,-1.20237442694
0.528396459186,1.34497258643
0.268370506258,0.734359941775
0.309361881005,-0.728426362716
0.917435744228,1.30854004814
0.428789300542,1.41209652083
0.199130767118,1.65759766562
-1.17444696491,-0.950375612201
0.597006581866,1.19119789824
-1.45170622969,-0.891168308477
-1.41986354849,-0.273475605125
-1.57409699552,-0.422236366569
1.04184264467,-0.362737479132
-2.14219480292,-0.482272076783
-1.50691533211,-0.200973148817
0.0543420665276,1.33168891813
-1.13144663461,-0.651825483298
1.07155174333,-0.692136570485
0.583387651839,-0.491450887858
-1.14297733022,-0.697948095468
0.0998245638451,0.10950372489
0.220588982913,-0.851548705937
-1.13730048755,-0.564448259501
0.905073179513,1.12779984735
0.72504167988,1.28738215218
-1.06955320593,-0.467663188307
-0.880265370005,-1.02614239598
-1.44264764226,-0.96145282057
1.01333072504,1.24675601661
-1.0093984377,-1.05143861237
0.507657052315,1.36804853004
1.26502785776,-0.711979714262
1.31608042094,1.5734222567
0.334632982453,-0.84147974129
0.802031438762,0.228215838939
1.38250775401,-0.644251339858
0.919614961822,-1.22049235391
0.929729151417,-0.208693463261
-1.53633104344,-0.511275317046
-0.665051865958,-0.739115745001
-0.335795516652,1.56140541417
1.23901518412,1.87882199622
-1.35543673912,-0.601849685925
-1.15154941392,-0.269135444753
0.608439338548,1.46684269694
1.06006794863,1.13065360895
0.942890187819,-0.742929110414
-1.15672050041,-0.436145800526
1.62198216506,0.050201317777
0.854125246175,-0.514807506009
-1.14337683511,-0.490935142717
-1.51048251847,-0.0345004965754
0.880530249926,-0.869888336327
-1.36540418059,-0.756111150943
0.601814512111,-1.21412505961
-0.0621652593321,1.12108597614
0.74067770872,-0.576648130759
-0.183577853633,-0.125433577503
0.417995488425,1.21449387096
-1.1856447963,-0.984315517908
1.07887574968,-0.840413058707
0.090657698723,-1.25434772582
0.0261662265887,1.22429234588
1.13673243898,-0.444139145222
1.23361139042,-1.09421718393
0.351468885092,1.51690258534
0.255831769187,1.27677830087
0.798195414423,-0.18283188485
1.31845143924,1.69400632284
0.938052607202,-0.419433668128
0.388310366276,1.31945848095
1.00904356759,-0.374533562373
-1.08675207316,-0.230719819714
0.956791915728,1.33752493245
0.964894172999,1.3091321864
0.630607763963,1.39287553367
-1.41288695181,-0.864681477113
0.261119656155,-1.02691248837
-0.882375409513,-0.666629249983
0.989911346176,-0.744391801077
0.867329484559,-0.768003291115
1.10613565156,1.4303998032
0.77134497925,-0.692113237484
0.343526184216,-0.991545218203
0.758591550569,1.54398289162
0.707946435833,1.45422137588
0.709604992056,-1.40060170714
-1.62485869339,-0.127799648835
-1.66703749341,0.0158250976471
-1.80730926772,-0.301662933271
-1.45291560869,-0.535118179264
-1.4701829607,-0.667609031391
0.826731842161,1.41567303436
-1.83590114306,-1.10954151061
-1.6332275232,-0.563497927722
-0.7388346936,-0.798186938046
-1.82702823377,0.13893299319
1.08739214482,0.826583726311
0.196057452318,2.06336452546
-0.962783057941,-0.109325188026
-1.19668293625,-1.1087752111
-0.920351459366,-0.706719513233
1.1741662534,1.0387978517
0.489318601459,-0.795493247886
-0.0285631715351,1.48253801626
-1.55996778776,-0.562017909444
0.0907181454452,-0.814517495862
1.04873107616,-0.452078258313
0.641663493277,1.45460629445
0.396805058072,1.10427025972
1.00336963075,-0.459191567668
0.907351763777,1.46562217387
0.904912861981,-1.62473397987
-1.30060206226,-0.639040245494
0.22255248672,1.32737094419
0.41209455966,-0.958675990971
0.941556677173,1.35441829013
1.28361991963,-1.24163477985
-0.376722258575,1.54300064517
0.930527863539,-0.784505897599
1.05101554226,-0.405406154061
1.22185277774,2.04479129366
-1.10897541444,-0.568930353083
0.637361305672,1.47374301327
-0.735046904585,-0.332733398991
0.914105951171,1.81364038611
0.815815323504,-0.428342552091
0.655466878695,-0.869548902941
-1.1045597651,-0.600408464946
-0.915703222184,-0.742626383383
-1.3571704177,-0.68125832152
0.69160775897,-0.893583583689
0.978900301359,1.75109237406
0.53683021324,-1.41620152234
1.09237619762,1.72716832141
0.866591909179,-0.581572078316
-1.80307744469,-0.65461097373
-0.127231346916,-0.409038899099
0.541525702451,-0.201173106705
0.68589072527,1.53390864901
-0.502670916098,-0.757868411152
0.417479823257,0.872860696972
-2.0289141946,-0.993678879688
0.245343426191,1.77834730722
0.316274690117,2.05030729845
1.23151797851,1.52230461678
0.488799329286,1.01622700328
0.736124228521,-0.560102473907
0.0380991755979,1.54458039477
0.348282296735,0.0373035505291
0.791153859839,1.36235109152
-1.89637476785,-0.983716547448
0.529079350094,1.21622740397
-1.2345838948,-0.786033236307
0.206511679327,-0.620187190429
-1.25908731883,-0.301031125224
-1.09843278784,0.0369549195008
-1.10406146313,-1.35048039511
0.983155368445,1.41480769807
-1.7328692309,-1.08216857053
-0.917910107541,-0.0889436794991
0.312585483993,1.0818337627
-0.0811644021867,-0.707691032276
-1.20266214326,-0.217504289139
0.454419137278,2.2457941917
0.471831725992,-0.493824106953
1.29161652352,-0.520992830994
-1.25588057463,-0.721197168795
-1.20377898567,-1.33173379489
1.11899200093,-0.713538916105
0.339906689497,-0.72413604985
0.615417018996,-0.858079193557
-1.01823258109,-0.78714664658
0.816099854449,-0.871668345031
-1.7212991458,-0.777848794878
0.843019145714,-0.498712137992
1.4021067635,1.45886382804
0.878294256485,-1.02266917785
-0.88512932828,-0.853503063368
0.430259456368,-0.453270444086
-1.77952949337,-0.141961490527
0.849914524615,1.24032152147
-1.32980886649,-0.481002489736
0.624470649758,1.26531866728
-1.06157593269,-1.13833962673
-1.3992137138,-0.965470741462
0.896181657602,0.695919911938
-1.418340371,-0.224255463115
0.0738188763056,-0.0563312160229
1.01170961883,0.241023782153
-1.5363281273,0.0159593515193
0.82770781377,0.709297571031
0.545029125045,0.868146825735
0.94527049937,-0.689257336931
-1.19201851393,-0.0979642908923
0.356642444398,-0.521177720048
1.25677847275,-0.948042349321
0.960112654402,-1.1046969869
0.467333609641,-0.297755148203
-1.09928800088,-0.782568121394
0.499876498504,1.34378633999
-0.0980920351721,1.38052928695
-0.233897355292,1.40492904943
0.951304495882,1.12558216168
-1.57107850167,-0.657989767628
0.284198318557,1.14751633136
1.14780923861,-0.398627857264
-1.63748393741,-0.707992965283
0.396760739464,1.1549469915
-0.856392511462,-0.729638141622
0.743336814006,-0.0447286202516
0.213902305912,1.02275520522
0.866879045866,1.22042656018
-0.88179618297,-1.43514524119
0.334722303045,0.736465317357
-1.71828945714,-0.333062709029
-0.918042667376,-0.843035843758
0.929243026125,1.35726190001
-0.431851673719,-1.10093484648
0.703743675795,1.87295209701
0.98717412056,-0.391248211672
0.446786417845,-0.232663277488
0.833397671467,-1.01523684003
-1.31380292373,-0.106348966316
-1.98210412488,-0.520364529607
0.882630413465,-0.204652953696
0.57473870386,1.15343094618
-1.64296177795,-0.545851844001
0.812520126446,1.57046768
-0.221156389297,0.90920018435
-1.31918421048,-1.02294749184
0.756117389326,1.26888096925
-1.00145716326,-1.06765844508
-1.16012367924,-1.17473398971
0.140325452005,-0.427986994764
0.5813642278,-0.83696135172
-0.31645030278,-1.51218920885
0.82452917064,0.93172792002
-0.750534982503,-0.836888860558
0.968658108542,-0.448623907721
1.2006923499,-0.475696442665
-1.26717115594,-0.665599874339
-1.82087781658,-0.868101472932
-1.16838236627,-1.54147890288
-0.981140298879,-1.28505380627
0.141023068843,1.12746333408
0.754032847532,0.960404487137
0.202135095167,1.18555519975
0.849908773169,-0.847682954547
0.744968023152,-0.228079376425
-1.91222754219,-0.796509854232
0.775623691917,-0.695029747499
-0.767188336951,-0.677911431003
0.712466108841,1.55417287552
1.21349899534,1.6388133243
-1.0869979326,-0.648693092282
0.699067612971,-1.40916870622
-1.53255598882,-0.261494722161
1.38939876357,1.88316296941
0.596690144163,1.72643881439
0.804964907977,-0.170902873462
0.40613498617,1.1198979641
-1.20807438507,-0.788501079273
0.728500901715,1.68709745134
0.316645956769,-0.510754409208
-0.823618040446,-0.884384414857
1.01442400059,1.24817740818
0.688659017161,-0.58639380357
0.370731358867,-0.986204337596
-1.02050291971,-0.913802249095
1.07231521798,1.81215231098
0.293755472217,0.389904123007
0.384580005797,1.95282853017
0.731079718128,-0.600671861978
-1.27084815866,-0.599802102819
-1.5506697485,-0.37391302332
0.819305570722,1.43691036146
0.758463908179,-0.257726277971
1.00739359449,1.43935814903
0.296387422059,1.74172031876
-1.56792541994,-0.625734935299
-1.58294937352,-0.212561302929
-1.48429016855,-0.214074430447
-1.57271416628,-0.983949703014
0.535738594277,1.01076484292
-1.47375056852,-0.955937874772
0.568475265758,1.64956338847
-0.862162831203,-0.884179051907
0.544925120741,1.6193204064
0.480499087021,2.02664864155
0.122038139573,0.119143611341
1.08322686266,1.50007405277
-1.26363865114,-1.24824215223
-1.09515150213,-0.580737374373
0.745663888861,-0.797265870367
-0.704911858139,-0.435654296496
-1.08345708839,-0.683728002502
-0.159115840147,1.35521476836
-0.834099861805,-0.571377281807
0.803301570929,1.04060299172
0.882227724909,-1.04635993234
-1.42356222195,-1.11563240162
0.598075641758,1.34363133224
1.00649041199,1.53362494993
-1.74840606346,-0.757167172502
0.665860879827,1.23423673133
-2.27447426719,-1.08752048002
-1.48420811929,-0.38750074543
0.710494890905,-0.0301573663517
0.2452388989,-1.06063486305
-1.30030123852,-0.741203235798
0.722560907798,-1.0887138629
0.845890473528,-0.765476650879
-0.987808045599,-0.300980235798
0.798685296365,-0.020380
gitextract_hhyq2fyg/
├── .gitignore
├── .travis.yml
├── Dockerfile
├── LICENSE.md
├── README.md
├── base/
│ ├── arff.go
│ ├── arff_test.go
│ ├── attributes.go
│ ├── attributes_test.go
│ ├── bag.go
│ ├── bag_test.go
│ ├── binary.go
│ ├── categorical.go
│ ├── classifier.go
│ ├── conversion.go
│ ├── csv.go
│ ├── csv_test.go
│ ├── data.go
│ ├── dataframe_go.go
│ ├── dense.go
│ ├── dense_test.go
│ ├── domain.go
│ ├── error.go
│ ├── error_test.go
│ ├── filewrapper.go
│ ├── filtered.go
│ ├── filters.go
│ ├── fixed.go
│ ├── float.go
│ ├── group.go
│ ├── lazy_sort_test.go
│ ├── logger.go
│ ├── mat.go
│ ├── mat_test.go
│ ├── serialize.go
│ ├── serialize_attributes.go
│ ├── serialize_instances.go
│ ├── serialize_test.go
│ ├── sort.go
│ ├── sort_test.go
│ ├── spec.go
│ ├── util.go
│ ├── util_attributes.go
│ ├── util_instances.go
│ ├── util_test.go
│ ├── view.go
│ └── view_test.go
├── clustering/
│ ├── cluster_extra_test.go
│ ├── cluster_test.go
│ ├── clustering.go
│ ├── dbscan.csv
│ ├── dbscan.go
│ ├── dbscan_labels.csv
│ ├── dbscan_test.go
│ ├── em.go
│ ├── em_test.go
│ ├── gaussian_mixture.csv
│ ├── gaussian_mixture.py
│ ├── gaussian_mixture_labels.csv
│ ├── gaussian_mixture_single_obs.csv
│ ├── gen_test.py
│ └── synthetic.csv
├── coverage.sh
├── doc/
│ ├── zh_CN/
│ │ ├── AddingAttributes.md
│ │ ├── AttributeSpecifications.md
│ │ ├── CSVFiles.md
│ │ ├── Classification/
│ │ │ ├── KNN.md
│ │ │ ├── Regression.md
│ │ │ ├── Trees.md
│ │ │ └── liblinear.md
│ │ ├── Contributing.md
│ │ ├── CustomDataGrids.md
│ │ ├── Filtering.md
│ │ ├── FloatAttributePrecision.md
│ │ ├── Home.md
│ │ ├── Installation.md
│ │ └── Instances.md
│ └── zh_TW/
│ ├── AddingAttributes.md
│ ├── AttributeSpecifications.md
│ ├── CSVFiles.md
│ ├── Classification/
│ │ ├── KNN.md
│ │ ├── Regression.md
│ │ ├── Trees.md
│ │ └── liblinear.md
│ ├── Contributing.md
│ ├── CustomDataGrids.md
│ ├── Filtering.md
│ ├── FloatAttributePrecision.md
│ ├── Home.md
│ ├── Installation.md
│ └── Instances.md
├── ensemble/
│ ├── ensemble.go
│ ├── multisvc.go
│ ├── multisvc_test.go
│ ├── randomforest.go
│ └── randomforest_test.go
├── evaluation/
│ ├── confusion.go
│ ├── confusion_test.go
│ ├── cross_fold.go
│ └── cross_fold_test.go
├── examples/
│ ├── averageperceptron/
│ │ └── averageperceptionexample.go
│ ├── crossfold/
│ │ └── rf.go
│ ├── datasets/
│ │ ├── articles.csv
│ │ ├── boston_house_prices.csv
│ │ ├── c45-numeric.csv
│ │ ├── chim.csv
│ │ ├── exam.csv
│ │ ├── exams.csv
│ │ ├── gaussian_outliers.csv
│ │ ├── house-votes-84.csv
│ │ ├── iris.arff
│ │ ├── iris.csv
│ │ ├── iris_binned.csv
│ │ ├── iris_headers.csv
│ │ ├── iris_headers_subset.csv
│ │ ├── iris_sorted_asc.csv
│ │ ├── iris_sorted_desc.csv
│ │ ├── mnist_test.csv
│ │ ├── mnist_train.csv
│ │ ├── randomdata.csv
│ │ ├── sources.txt
│ │ ├── tennis.csv
│ │ ├── titanic.csv
│ │ └── weather.arff
│ ├── instances/
│ │ └── instances.go
│ ├── knnclassifier/
│ │ └── knnclassifier_iris.go
│ ├── serialization/
│ │ └── attributes.go
│ └── trees/
│ ├── cart/
│ │ └── cart.go
│ ├── id3/
│ │ └── trees.go
│ └── isolationForest/
│ └── isolation_forest.go
├── filters/
│ ├── binary.go
│ ├── binary_test.csv
│ ├── binary_test.go
│ ├── binning.go
│ ├── binning_test.go
│ ├── chimerge.go
│ ├── chimerge_freq.go
│ ├── chimerge_funcs.go
│ ├── chimerge_test.go
│ ├── disc.go
│ ├── float.go
│ └── float_test.go
├── go.mod
├── go.sum
├── golearn.go
├── kdtree/
│ ├── heap.go
│ ├── heap_test.go
│ ├── kdtree.go
│ └── kdtree_test.go
├── knn/
│ ├── euclidean.c
│ ├── knn.go
│ ├── knn.h
│ ├── knn_bench_test.go
│ ├── knn_cov_test.go
│ ├── knn_kdtree_test.go
│ ├── knn_opt_euclidean.go
│ ├── knn_test.go
│ ├── knn_test_1.csv
│ ├── knn_test_2.csv
│ ├── knn_test_2_subset.csv
│ ├── knn_train_1.csv
│ ├── knn_train_2.csv
│ ├── knn_weighted_test.go
│ └── temp.cls
├── linear_models/
│ ├── blas.h
│ ├── blasp.h
│ ├── cfuncs.go
│ ├── daxpy.c
│ ├── ddot.c
│ ├── dnrm2.c
│ ├── doc.go
│ ├── dscal.c
│ ├── liblinear.go
│ ├── liblinear_print.go
│ ├── liblinear_print_11.go
│ ├── linear.cpp
│ ├── linear.h
│ ├── linear_models_test.go
│ ├── linear_regression.go
│ ├── linear_regression_test.go
│ ├── linearsvc.go
│ ├── linearsvc_test.go
│ ├── logistic.go
│ ├── logistic_test.go
│ ├── test.csv
│ ├── tmp
│ ├── train.csv
│ ├── tron.cpp
│ ├── tron.h
│ └── util.go
├── meta/
│ ├── bagging.go
│ ├── bagging_test.go
│ ├── meta.go
│ ├── one_v_all.go
│ └── one_v_all_test.go
├── metrics/
│ └── pairwise/
│ ├── chebyshev.go
│ ├── chebyshev_test.go
│ ├── cosine.go
│ ├── cosine_test.go
│ ├── cranberra.go
│ ├── cranberra_test.go
│ ├── euclidean.go
│ ├── euclidean_test.go
│ ├── manhattan.go
│ ├── manhattan_test.go
│ ├── pairwise.go
│ ├── poly_kernel.go
│ ├── poly_kernel_test.go
│ ├── rbf_kernel.go
│ └── rbf_kernel_test.go
├── naive/
│ ├── bernoulli_nb.go
│ ├── bernoulli_nb_test.go
│ ├── naive.go
│ └── test/
│ ├── simple_test.csv
│ └── simple_train.csv
├── neural/
│ ├── funcs.go
│ ├── layered.go
│ ├── layered_test.go
│ ├── network.go
│ ├── network_test.go
│ ├── neural.go
│ └── xor.csv
├── optimisation/
│ └── optimisation.go
├── pca/
│ ├── pca.go
│ └── pca_test.go
├── perceptron/
│ ├── average.go
│ └── average_test.go
├── trees/
│ ├── benchdata.csv
│ ├── cart_classifier.go
│ ├── cart_regressor.go
│ ├── cart_test.go
│ ├── cart_utils.go
│ ├── entropy.go
│ ├── gini.go
│ ├── gr.go
│ ├── id3.go
│ ├── id3_test.go
│ ├── isolation.go
│ ├── isolation_test.go
│ ├── onerow.csv
│ ├── random.go
│ ├── sorter.go
│ ├── tree_bench_test.go
│ ├── tree_test.go
│ └── trees.go
└── utilities/
└── utilities.go
SYMBOL INDEX (950 symbols across 155 files)
FILE: base/arff.go
function SerializeInstancesToDenseARFF (line 16) | func SerializeInstancesToDenseARFF(inst FixedDataGrid, path, relation st...
function SerializeInstancesToDenseARFFWithAttributes (line 31) | func SerializeInstancesToDenseARFFWithAttributes(inst FixedDataGrid, raw...
function SerializeInstancesToWriterDenseARFFWithAttributes (line 44) | func SerializeInstancesToWriterDenseARFFWithAttributes(w io.Writer, inst...
function ParseARFFGetRows (line 77) | func ParseARFFGetRows(filepath string) (int, error) {
function ParseARFFGetAttributes (line 114) | func ParseARFFGetAttributes(filepath string) []Attribute {
function ParseDenseARFFBuildInstancesFromReader (line 199) | func ParseDenseARFFBuildInstancesFromReader(r io.Reader, attrs []Attribu...
function ParseDenseARFFToInstances (line 253) | func ParseDenseARFFToInstances(filepath string) (ret *DenseInstances, er...
FILE: base/arff_test.go
function TestParseARFFGetRows (line 9) | func TestParseARFFGetRows(t *testing.T) {
function TestParseARFFGetAttributes (line 20) | func TestParseARFFGetAttributes(t *testing.T) {
function TestParseARFF1 (line 45) | func TestParseARFF1(t *testing.T) {
function TestParseARFF2 (line 56) | func TestParseARFF2(t *testing.T) {
function TestSerializeToARFF (line 85) | func TestSerializeToARFF(t *testing.T) {
FILE: base/attributes.go
constant CategoricalType (line 9) | CategoricalType = iota
constant Float64Type (line 11) | Float64Type
constant BinaryType (line 12) | BinaryType
type Attribute (line 16) | type Attribute interface
FILE: base/attributes_test.go
function TestFloatAttributeSysVal (line 8) | func TestFloatAttributeSysVal(t *testing.T) {
function TestCategoricalAttributeVal (line 24) | func TestCategoricalAttributeVal(t *testing.T) {
function TestBinaryAttribute (line 53) | func TestBinaryAttribute(t *testing.T) {
FILE: base/bag.go
type BinaryAttributeGroup (line 10) | type BinaryAttributeGroup struct
method String (line 19) | func (b *BinaryAttributeGroup) String() string {
method RowSizeInBytes (line 25) | func (b *BinaryAttributeGroup) RowSizeInBytes() int {
method Attributes (line 30) | func (b *BinaryAttributeGroup) Attributes() []Attribute {
method AddAttribute (line 39) | func (b *BinaryAttributeGroup) AddAttribute(a Attribute) error {
method Storage (line 47) | func (b *BinaryAttributeGroup) Storage() []byte {
method setStorage (line 55) | func (b *BinaryAttributeGroup) setStorage(a []byte) {
method getByteOffset (line 59) | func (b *BinaryAttributeGroup) getByteOffset(col, row int) int {
method set (line 63) | func (b *BinaryAttributeGroup) set(col, row int, val []byte) {
method get (line 81) | func (b *BinaryAttributeGroup) get(col, row int) []byte {
method appendToRowBuf (line 90) | func (b *BinaryAttributeGroup) appendToRowBuf(row int, buffer *bytes.B...
method resize (line 101) | func (b *BinaryAttributeGroup) resize(add int) {
FILE: base/bag_test.go
function TestBAGSimple (line 10) | func TestBAGSimple(t *testing.T) {
function TestBAG (line 70) | func TestBAG(t *testing.T) {
FILE: base/binary.go
type BinaryAttribute (line 10) | type BinaryAttribute struct
method MarshalJSON (line 15) | func (b *BinaryAttribute) MarshalJSON() ([]byte, error) {
method UnmarshalJSON (line 24) | func (b *BinaryAttribute) UnmarshalJSON(data []byte) error {
method GetName (line 36) | func (b *BinaryAttribute) GetName() string {
method SetName (line 41) | func (b *BinaryAttribute) SetName(name string) {
method GetType (line 46) | func (b *BinaryAttribute) GetType() int {
method GetSysValFromString (line 51) | func (b *BinaryAttribute) GetSysValFromString(userVal string) []byte {
method GetStringFromSysVal (line 64) | func (b *BinaryAttribute) GetStringFromSysVal(val []byte) string {
method Equals (line 72) | func (b *BinaryAttribute) Equals(other Attribute) bool {
method Compatible (line 82) | func (b *BinaryAttribute) Compatible(other Attribute) bool {
method String (line 91) | func (b *BinaryAttribute) String() string {
function NewBinaryAttribute (line 29) | func NewBinaryAttribute(name string) *BinaryAttribute {
FILE: base/categorical.go
type CategoricalAttribute (line 11) | type CategoricalAttribute struct
method MarshalJSON (line 17) | func (Attr *CategoricalAttribute) MarshalJSON() ([]byte, error) {
method UnmarshalJSON (line 28) | func (Attr *CategoricalAttribute) UnmarshalJSON(data []byte) error {
method GetValues (line 49) | func (Attr *CategoricalAttribute) GetValues() []string {
method GetName (line 54) | func (Attr *CategoricalAttribute) GetName() string {
method SetName (line 59) | func (Attr *CategoricalAttribute) SetName(name string) {
method GetType (line 64) | func (Attr *CategoricalAttribute) GetType() int {
method GetSysVal (line 70) | func (Attr *CategoricalAttribute) GetSysVal(userVal string) []byte {
method GetUsrVal (line 82) | func (Attr *CategoricalAttribute) GetUsrVal(sysVal []byte) string {
method GetSysValFromString (line 99) | func (Attr *CategoricalAttribute) GetSysValFromString(rawVal string) [...
method String (line 121) | func (Attr *CategoricalAttribute) String() string {
method GetStringFromSysVal (line 131) | func (Attr *CategoricalAttribute) GetStringFromSysVal(rawVal []byte) s...
method Equals (line 144) | func (Attr *CategoricalAttribute) Equals(other Attribute) bool {
method Compatible (line 171) | func (Attr *CategoricalAttribute) Compatible(other Attribute) bool {
function NewCategoricalAttribute (line 41) | func NewCategoricalAttribute() *CategoricalAttribute {
FILE: base/classifier.go
type Classifier (line 8) | type Classifier interface
type BaseClassifier (line 35) | type BaseClassifier struct
type BaseRegressor (line 39) | type BaseRegressor struct
FILE: base/conversion.go
function checkAllAttributesAreFloat (line 9) | func checkAllAttributesAreFloat(attrs []Attribute) error {
function ConvertRowToMat64 (line 22) | func ConvertRowToMat64(attrs []Attribute, f FixedDataGrid, r int) (*mat....
function ConvertAllRowsToMat64 (line 46) | func ConvertAllRowsToMat64(attrs []Attribute, f FixedDataGrid) ([]*mat.D...
FILE: base/csv.go
function ParseCSVGetRowsFromReader (line 14) | func ParseCSVGetRowsFromReader(r io.ReadSeeker) (int, error) {
function ParseCSVEstimateFilePrecisionFromReader (line 32) | func ParseCSVEstimateFilePrecisionFromReader(r io.ReadSeeker) (int, erro...
function ParseCSVGetAttributesFromReader (line 73) | func ParseCSVGetAttributesFromReader(r io.ReadSeeker, hasHeaders bool) [...
function ParseCSVSniffAttributeNamesFromReader (line 84) | func ParseCSVSniffAttributeNamesFromReader(r io.ReadSeeker, hasHeaders b...
function ParseCSVSniffAttributeTypesFromReader (line 111) | func ParseCSVSniffAttributeTypesFromReader(r io.ReadSeeker, hasHeaders b...
function ParseCSVBuildInstancesFromReader (line 159) | func ParseCSVBuildInstancesFromReader(r io.ReadSeeker, attrs []Attribute...
function ParseCSVToInstancesFromReader (line 205) | func ParseCSVToInstancesFromReader(r io.ReadSeeker, hasHeaders bool) (in...
function ParseMatchAttributes (line 239) | func ParseMatchAttributes(attrs, templateAttrs []Attribute) {
function ParseCSVToTemplatedInstancesFromReader (line 253) | func ParseCSVToTemplatedInstancesFromReader(r io.ReadSeeker, hasHeaders ...
function ParseCSVToInstancesWithAttributeGroupsFromReader (line 291) | func ParseCSVToInstancesWithAttributeGroupsFromReader(r io.ReadSeeker, a...
FILE: base/csv_test.go
function TestParseCSVGetRows (line 9) | func TestParseCSVGetRows(t *testing.T) {
function TestParseCSVGetAttributes (line 43) | func TestParseCSVGetAttributes(t *testing.T) {
function TestParseCSVSniffAttributeTypes (line 61) | func TestParseCSVSniffAttributeTypes(t *testing.T) {
function TestParseCSVSniffAttributeNames (line 75) | func TestParseCSVSniffAttributeNames(t *testing.T) {
function TestParseCSVToInstances (line 89) | func TestParseCSVToInstances(t *testing.T) {
FILE: base/data.go
type SortDirection (line 4) | type SortDirection
constant Descending (line 8) | Descending SortDirection = 1
constant Ascending (line 10) | Ascending SortDirection = 2
type DataGrid (line 14) | type DataGrid interface
type FixedDataGrid (line 33) | type FixedDataGrid interface
type UpdatableDataGrid (line 43) | type UpdatableDataGrid interface
FILE: base/dataframe_go.go
function ConvertDataFrameToInstances (line 13) | func ConvertDataFrameToInstances(df *dataframe.DataFrame, classAttrIndex...
FILE: base/dense.go
type DenseInstances (line 12) | type DenseInstances struct
method createAttributeGroup (line 106) | func (inst *DenseInstances) createAttributeGroup(name string, size int) {
method CreateAttributeGroup (line 139) | func (inst *DenseInstances) CreateAttributeGroup(name string, size int...
method AllAttributeGroups (line 157) | func (inst *DenseInstances) AllAttributeGroups() map[string]AttributeG...
method GetAttributeGroup (line 166) | func (inst *DenseInstances) GetAttributeGroup(name string) (AttributeG...
method AddAttribute (line 188) | func (inst *DenseInstances) AddAttribute(a Attribute) AttributeSpec {
method AddAttributeToAttributeGroup (line 236) | func (inst *DenseInstances) AddAttributeToAttributeGroup(newAttribute ...
method GetAttribute (line 263) | func (inst *DenseInstances) GetAttribute(get Attribute) (AttributeSpec...
method AllAttributes (line 279) | func (inst *DenseInstances) AllAttributes() []Attribute {
method AddClassAttribute (line 294) | func (inst *DenseInstances) AddClassAttribute(a Attribute) error {
method RemoveClassAttribute (line 309) | func (inst *DenseInstances) RemoveClassAttribute(a Attribute) error {
method AllClassAttributes (line 325) | func (inst *DenseInstances) AllClassAttributes() []Attribute {
method allClassAttributes (line 333) | func (inst *DenseInstances) allClassAttributes() []Attribute {
method realiseAttributeGroups (line 349) | func (inst *DenseInstances) realiseAttributeGroups() error {
method Extend (line 392) | func (inst *DenseInstances) Extend(rows int) error {
method Set (line 430) | func (inst *DenseInstances) Set(a AttributeSpec, row int, val []byte) {
method Get (line 437) | func (inst *DenseInstances) Get(a AttributeSpec, row int) []byte {
method RowString (line 442) | func (inst *DenseInstances) RowString(row int) string {
method MapOverRows (line 460) | func (inst *DenseInstances) MapOverRows(asv []AttributeSpec, mapFunc f...
method Size (line 480) | func (inst *DenseInstances) Size() (int, int) {
method swapRows (line 485) | func (inst *DenseInstances) swapRows(i, j int) {
method String (line 498) | func (inst *DenseInstances) String() string {
function NewDenseInstances (line 30) | func NewDenseInstances() *DenseInstances {
function copyFixedDataGridStructure (line 47) | func copyFixedDataGridStructure(of FixedDataGrid) (*DenseInstances, []At...
function NewStructuralCopy (line 74) | func NewStructuralCopy(of FixedDataGrid) *DenseInstances {
function NewDenseCopy (line 81) | func NewDenseCopy(of FixedDataGrid) *DenseInstances {
FILE: base/dense_test.go
function TestHighDimensionalInstancesLoad (line 8) | func TestHighDimensionalInstancesLoad(t *testing.T) {
function TestHighDimensionalInstancesLoad2 (line 14) | func TestHighDimensionalInstancesLoad2(t *testing.T) {
FILE: base/domain.go
type Estimator (line 14) | type Estimator interface
type Predictor (line 19) | type Predictor interface
type Model (line 25) | type Model interface
type BaseEstimator (line 29) | type BaseEstimator struct
function SaveEstimatorToGob (line 35) | func SaveEstimatorToGob(path string, e *Estimator) {
FILE: base/error.go
type GoLearnError (line 10) | type GoLearnError struct
method Error (line 25) | func (g *GoLearnError) Error() string {
method attachFormattedStack (line 38) | func (g *GoLearnError) attachFormattedStack() {
function wrapLinesWithTabPrefix (line 16) | func wrapLinesWithTabPrefix(s string) string {
function DescribeError (line 67) | func DescribeError(description string, err error) error {
function WrapError (line 75) | func WrapError(err error) error {
function FormatError (line 82) | func FormatError(err error, format string, args ...interface{}) error {
FILE: base/error_test.go
function TestId3 (line 8) | func TestId3(t *testing.T) {
FILE: base/filewrapper.go
function ParseCSVGetRows (line 8) | func ParseCSVGetRows(filepath string) (int, error) {
function ParseCSVEstimateFilePrecision (line 20) | func ParseCSVEstimateFilePrecision(filepath string) (int, error) {
function ParseCSVGetAttributes (line 33) | func ParseCSVGetAttributes(filepath string, hasHeaders bool) []Attribute {
function ParseCSVSniffAttributeNames (line 45) | func ParseCSVSniffAttributeNames(filepath string, hasHeaders bool) []str...
function ParseCSVSniffAttributeTypes (line 59) | func ParseCSVSniffAttributeTypes(filepath string, hasHeaders bool) []Att...
function ParseCSVToInstances (line 72) | func ParseCSVToInstances(filepath string, hasHeaders bool) (instances *D...
function ParseCSVToTemplatedInstances (line 85) | func ParseCSVToTemplatedInstances(filepath string, hasHeaders bool, temp...
function ParseCSVToInstancesWithAttributeGroups (line 99) | func ParseCSVToInstancesWithAttributeGroups(filepath string, attrGroups,...
FILE: base/filtered.go
type LazilyFilteredInstances (line 13) | type LazilyFilteredInstances struct
method GetAttribute (line 55) | func (l *LazilyFilteredInstances) GetAttribute(target Attribute) (Attr...
method AllAttributes (line 73) | func (l *LazilyFilteredInstances) AllAttributes() []Attribute {
method AddClassAttribute (line 91) | func (l *LazilyFilteredInstances) AddClassAttribute(cls Attribute) err...
method RemoveClassAttribute (line 111) | func (l *LazilyFilteredInstances) RemoveClassAttribute(cls Attribute) ...
method AllClassAttributes (line 130) | func (l *LazilyFilteredInstances) AllClassAttributes() []Attribute {
method transformNewToOldAttribute (line 140) | func (l *LazilyFilteredInstances) transformNewToOldAttribute(as Attrib...
method Get (line 157) | func (l *LazilyFilteredInstances) Get(as AttributeSpec, row int) []byte {
method MapOverRows (line 172) | func (l *LazilyFilteredInstances) MapOverRows(asv []AttributeSpec, map...
method RowString (line 198) | func (l *LazilyFilteredInstances) RowString(row int) string {
method Size (line 219) | func (l *LazilyFilteredInstances) Size() (int, int) {
method String (line 225) | func (l *LazilyFilteredInstances) String() string {
function NewLazilyFilteredInstances (line 24) | func NewLazilyFilteredInstances(src FixedDataGrid, f Filter) *LazilyFilt...
FILE: base/filters.go
type FilteredAttribute (line 5) | type FilteredAttribute struct
type Filter (line 12) | type Filter interface
FILE: base/fixed.go
type FixedAttributeGroup (line 10) | type FixedAttributeGroup struct
method String (line 19) | func (f *FixedAttributeGroup) String() string {
method RowSizeInBytes (line 24) | func (f *FixedAttributeGroup) RowSizeInBytes() int {
method Attributes (line 29) | func (f *FixedAttributeGroup) Attributes() []Attribute {
method AddAttribute (line 39) | func (f *FixedAttributeGroup) AddAttribute(a Attribute) error {
method setStorage (line 45) | func (f *FixedAttributeGroup) setStorage(a []byte) {
method Storage (line 51) | func (f *FixedAttributeGroup) Storage() []byte {
method offset (line 55) | func (f *FixedAttributeGroup) offset(col, row int) int {
method set (line 59) | func (f *FixedAttributeGroup) set(col int, row int, val []byte) {
method get (line 81) | func (f *FixedAttributeGroup) get(col int, row int) []byte {
method appendToRowBuf (line 86) | func (f *FixedAttributeGroup) appendToRowBuf(row int, buffer *bytes.Bu...
method resize (line 96) | func (f *FixedAttributeGroup) resize(add int) {
FILE: base/float.go
type FloatAttribute (line 11) | type FloatAttribute struct
method MarshalJSON (line 18) | func (f *FloatAttribute) MarshalJSON() ([]byte, error) {
method UnmarshalJSON (line 29) | func (f *FloatAttribute) UnmarshalJSON(data []byte) error {
method Compatible (line 50) | func (Attr *FloatAttribute) Compatible(other Attribute) bool {
method Equals (line 59) | func (Attr *FloatAttribute) Equals(other Attribute) bool {
method GetName (line 73) | func (Attr *FloatAttribute) GetName() string {
method SetName (line 78) | func (Attr *FloatAttribute) SetName(name string) {
method GetType (line 83) | func (Attr *FloatAttribute) GetType() int {
method String (line 89) | func (Attr *FloatAttribute) String() string {
method CheckSysValFromString (line 96) | func (Attr *FloatAttribute) CheckSysValFromString(rawVal string) ([]by...
method GetSysValFromString (line 111) | func (Attr *FloatAttribute) GetSysValFromString(rawVal string) []byte {
method GetFloatFromSysVal (line 120) | func (Attr *FloatAttribute) GetFloatFromSysVal(rawVal []byte) float64 {
method GetStringFromSysVal (line 126) | func (Attr *FloatAttribute) GetStringFromSysVal(rawVal []byte) string {
function NewFloatAttribute (line 44) | func NewFloatAttribute(name string) *FloatAttribute {
FILE: base/group.go
type AttributeGroup (line 9) | type AttributeGroup interface
FILE: base/lazy_sort_test.go
function TestLazySortDesc (line 8) | func TestLazySortDesc(t *testing.T) {
function TestLazySortAsc (line 39) | func TestLazySortAsc(t *testing.T) {
FILE: base/logger.go
function SetLogger (line 14) | func SetLogger(logger *log.Logger) {
function SetLoggerOut (line 22) | func SetLoggerOut(out io.Writer) {
function Silent (line 28) | func Silent() {
FILE: base/mat.go
type Mat64Instances (line 9) | type Mat64Instances struct
method GetAttribute (line 32) | func (m *Mat64Instances) GetAttribute(a Attribute) (AttributeSpec, err...
method AllAttributes (line 42) | func (m *Mat64Instances) AllAttributes() []Attribute {
method AddClassAttribute (line 51) | func (m *Mat64Instances) AddClassAttribute(a Attribute) error {
method RemoveClassAttribute (line 62) | func (m *Mat64Instances) RemoveClassAttribute(a Attribute) error {
method AllClassAttributes (line 73) | func (m *Mat64Instances) AllClassAttributes() []Attribute {
method Get (line 85) | func (m *Mat64Instances) Get(as AttributeSpec, row int) []byte {
method MapOverRows (line 91) | func (m *Mat64Instances) MapOverRows(as []AttributeSpec, f func([][]by...
method RowString (line 111) | func (m *Mat64Instances) RowString(row int) string {
method Size (line 116) | func (m *Mat64Instances) Size() (int, int) {
method String (line 121) | func (m *Mat64Instances) String() string {
function InstancesFromMat64 (line 17) | func InstancesFromMat64(rows, cols int, data *mat.Dense) *Mat64Instances {
FILE: base/mat_test.go
function TestInlineMat64Creation (line 9) | func TestInlineMat64Creation(t *testing.T) {
function TestStringWithExceedMaxRow (line 66) | func TestStringWithExceedMaxRow(t *testing.T) {
FILE: base/serialize.go
constant SerializationFormatVersion (line 16) | SerializationFormatVersion = "golearn 1.0"
type FunctionalTarReader (line 21) | type FunctionalTarReader struct
method GetNamedFile (line 35) | func (f *FunctionalTarReader) GetNamedFile(name string) ([]byte, error) {
function NewFunctionalTarReader (line 27) | func NewFunctionalTarReader(regenFunc func() *tar.Reader) *FunctionalTar...
function tarPrefix (line 71) | func tarPrefix(prefix string, suffix string) string {
type ClassifierMetadataV1 (line 80) | type ClassifierMetadataV1 struct
type ClassifierDeserializer (line 94) | type ClassifierDeserializer struct
method Prefix (line 102) | func (c *ClassifierDeserializer) Prefix(prefix string, suffix string) ...
method ReadMetadataAtPrefix (line 110) | func (c *ClassifierDeserializer) ReadMetadataAtPrefix(prefix string) (...
method GetBytesForKey (line 176) | func (c *ClassifierDeserializer) GetBytesForKey(key string) ([]byte, e...
method GetStringForKey (line 180) | func (c *ClassifierDeserializer) GetStringForKey(key string) (string, ...
method GetJSONForKey (line 189) | func (c *ClassifierDeserializer) GetJSONForKey(key string, v interface...
method GetInstancesForKey (line 198) | func (c *ClassifierDeserializer) GetInstancesForKey(key string) (Fixed...
method GetU64ForKey (line 203) | func (c *ClassifierDeserializer) GetU64ForKey(key string) (uint64, err...
method GetAttributeForKey (line 212) | func (c *ClassifierDeserializer) GetAttributeForKey(key string) (Attri...
method GetAttributesForKey (line 225) | func (c *ClassifierDeserializer) GetAttributesForKey(key string) ([]At...
method Close (line 246) | func (c *ClassifierDeserializer) Close() {
function ReadSerializedClassifierStub (line 119) | func ReadSerializedClassifierStub(filePath string) (*ClassifierDeseriali...
type ClassifierSerializer (line 251) | type ClassifierSerializer struct
method Close (line 260) | func (c *ClassifierSerializer) Close() error {
method WriteBytesForKey (line 291) | func (c *ClassifierSerializer) WriteBytesForKey(key string, b []byte) ...
method WriteU64ForKey (line 318) | func (c *ClassifierSerializer) WriteU64ForKey(key string, v uint64) er...
method WriteJSONForKey (line 324) | func (c *ClassifierSerializer) WriteJSONForKey(key string, v interface...
method WriteAttributeForKey (line 336) | func (c *ClassifierSerializer) WriteAttributeForKey(key string, a Attr...
method WriteAttributesForKey (line 345) | func (c *ClassifierSerializer) WriteAttributesForKey(key string, attrs...
method WriteInstancesForKey (line 363) | func (c *ClassifierSerializer) WriteInstancesForKey(key string, g Fixe...
method Prefix (line 369) | func (c *ClassifierSerializer) Prefix(prefix string, suffix string) st...
method WriteMetadataAtPrefix (line 377) | func (c *ClassifierSerializer) WriteMetadataAtPrefix(prefix string, me...
function CreateSerializedClassifierStub (line 383) | func CreateSerializedClassifierStub(filePath string, metadata Classifier...
FILE: base/serialize_attributes.go
function writeAttributesToFilePart (line 9) | func writeAttributesToFilePart(attrs []Attribute, f *tar.Writer, name st...
function MarshalAttribute (line 34) | func MarshalAttribute(a Attribute) (map[string]interface{}, error) {
function SerializeAttribute (line 47) | func SerializeAttribute(attr Attribute) ([]byte, error) {
function DeserializeAttribute (line 56) | func DeserializeAttribute(data []byte) (Attribute, error) {
function DeserializeAttributes (line 93) | func DeserializeAttributes(data []byte) ([]Attribute, error) {
function ReplaceDeserializedAttributeWithVersionFromInstances (line 115) | func ReplaceDeserializedAttributeWithVersionFromInstances(deserialized A...
function ReplaceDeserializedAttributesWithVersionsFromInstances (line 126) | func ReplaceDeserializedAttributesWithVersionsFromInstances(deserialized...
FILE: base/serialize_instances.go
function SerializeInstancesToFile (line 14) | func SerializeInstancesToFile(inst FixedDataGrid, path string) error {
function SerializeInstancesToCSV (line 32) | func SerializeInstancesToCSV(inst FixedDataGrid, path string) error {
function SerializeInstancesToCSVStream (line 46) | func SerializeInstancesToCSVStream(inst FixedDataGrid, f io.Writer) error {
function DeserializeInstancesFromTarReader (line 81) | func DeserializeInstancesFromTarReader(tr *FunctionalTarReader, prefix s...
function DeserializeInstances (line 187) | func DeserializeInstances(f io.ReadSeeker) (ret *DenseInstances, err err...
function SerializeInstances (line 223) | func SerializeInstances(inst FixedDataGrid, f io.Writer) error {
function SerializeInstancesToTarWriter (line 250) | func SerializeInstancesToTarWriter(inst FixedDataGrid, tw *tar.Writer, p...
FILE: base/serialize_test.go
function TestSerializeToCSV (line 13) | func TestSerializeToCSV(t *testing.T) {
function TestCreateAndReadClassifierStub (line 32) | func TestCreateAndReadClassifierStub(t *testing.T) {
function TestSerializeToFile (line 65) | func TestSerializeToFile(t *testing.T) {
FILE: base/sort.go
function sortXorOp (line 8) | func sortXorOp(b []byte) []byte {
type sortSpec (line 15) | type sortSpec struct
function createSortSpec (line 21) | func createSortSpec(inst FixedDataGrid, attrsArg []AttributeSpec) []sort...
function Sort (line 110) | func Sort(inst FixedDataGrid, direction SortDirection, attrs []Attribute...
function LazySort (line 133) | func LazySort(inst FixedDataGrid, direction SortDirection, attrs []Attri...
FILE: base/sort_test.go
function isSortedAsc (line 8) | func isSortedAsc(inst FixedDataGrid, attr AttributeSpec) bool {
function isSortedDesc (line 23) | func isSortedDesc(inst FixedDataGrid, attr AttributeSpec) bool {
function TestSortDesc (line 38) | func TestSortDesc(t *testing.T) {
function TestSortAsc (line 69) | func TestSortAsc(t *testing.T) {
FILE: base/spec.go
type AttributeSpec (line 10) | type AttributeSpec struct
method GetAttribute (line 34) | func (a *AttributeSpec) GetAttribute() Attribute {
method String (line 39) | func (a *AttributeSpec) String() string {
type byPosition (line 16) | type byPosition
method Len (line 18) | func (b byPosition) Len() int {
method Swap (line 21) | func (b byPosition) Swap(i, j int) {
method Less (line 24) | func (b byPosition) Less(i, j int) bool {
FILE: base/util.go
function PackU64ToBytesInline (line 10) | func PackU64ToBytesInline(val uint64, ret []byte) {
function PackFloatToBytesInline (line 23) | func PackFloatToBytesInline(val float64, ret []byte) {
function PackU64ToBytes (line 29) | func PackU64ToBytes(val uint64) []byte {
function UnpackBytesToU64 (line 44) | func UnpackBytesToU64(val []byte) uint64 {
function PackFloatToBytes (line 51) | func PackFloatToBytes(val float64) []byte {
function UnpackBytesToFloat (line 57) | func UnpackBytesToFloat(val []byte) float64 {
function byteSeqEqual (line 62) | func byteSeqEqual(a, b []byte) bool {
FILE: base/util_attributes.go
function NonClassFloatAttributes (line 12) | func NonClassFloatAttributes(d DataGrid) []Attribute {
function NonClassAttributes (line 36) | func NonClassAttributes(d DataGrid) []Attribute {
function ResolveAttributes (line 44) | func ResolveAttributes(d DataGrid, attrs []Attribute) []AttributeSpec {
function ResolveAllAttributes (line 62) | func ResolveAllAttributes(d DataGrid) []AttributeSpec {
function buildAttrSet (line 66) | func buildAttrSet(a []Attribute) map[Attribute]bool {
function AttributeIntersect (line 79) | func AttributeIntersect(a1, a2 []Attribute) []Attribute {
function AttributeIntersectReferences (line 102) | func AttributeIntersectReferences(a1, a2 []Attribute) []Attribute {
function AttributeDifference (line 120) | func AttributeDifference(a1, a2 []Attribute) []Attribute {
function AttributeDifferenceReferences (line 144) | func AttributeDifferenceReferences(a1, a2 []Attribute) []Attribute {
FILE: base/util_instances.go
function GeneratePredictionVector (line 13) | func GeneratePredictionVector(from FixedDataGrid) UpdatableDataGrid {
function CopyDenseInstances (line 27) | func CopyDenseInstances(template *DenseInstances, templateAttrs []Attrib...
function GetClass (line 61) | func GetClass(from DataGrid, row int) string {
function SetClass (line 90) | func SetClass(at UpdatableDataGrid, row int, class string) {
function GetAttributeByName (line 114) | func GetAttributeByName(inst FixedDataGrid, name string) Attribute {
function GetClassDistributionByBinaryFloatValue (line 125) | func GetClassDistributionByBinaryFloatValue(inst FixedDataGrid) []int {
function GetClassDistributionByCategoricalValue (line 158) | func GetClassDistributionByCategoricalValue(inst FixedDataGrid) []int {
function GetClassDistribution (line 188) | func GetClassDistribution(inst FixedDataGrid) map[string]int {
function GetClassDistributionAfterThreshold (line 200) | func GetClassDistributionAfterThreshold(inst FixedDataGrid, at Attribute...
function GetClassDistributionAfterSplit (line 236) | func GetClassDistributionAfterSplit(inst FixedDataGrid, at Attribute) ma...
function DecomposeOnNumericAttributeThreshold (line 269) | func DecomposeOnNumericAttributeThreshold(inst FixedDataGrid, at Attribu...
function DecomposeOnAttributeValues (line 325) | func DecomposeOnAttributeValues(inst FixedDataGrid, at Attribute) map[st...
function InstancesTrainTestSplit (line 376) | func InstancesTrainTestSplit(src FixedDataGrid, prop float64) (FixedData...
function LazyShuffle (line 400) | func LazyShuffle(from FixedDataGrid) FixedDataGrid {
function Shuffle (line 413) | func Shuffle(from FixedDataGrid) FixedDataGrid {
function SampleWithReplacement (line 431) | func SampleWithReplacement(from FixedDataGrid, size int) FixedDataGrid {
function CheckCompatible (line 443) | func CheckCompatible(s1 FixedDataGrid, s2 FixedDataGrid) []Attribute {
function CheckStrictlyCompatible (line 458) | func CheckStrictlyCompatible(s1 FixedDataGrid, s2 FixedDataGrid) bool {
function InstancesAreEqual (line 507) | func InstancesAreEqual(inst, other FixedDataGrid) bool {
FILE: base/util_test.go
function TestClassDistributionAfterSplit (line 8) | func TestClassDistributionAfterSplit(t *testing.T) {
function TestPackAndUnpack (line 25) | func TestPackAndUnpack(t *testing.T) {
function TestPackAndUnpackFloat (line 53) | func TestPackAndUnpackFloat(t *testing.T) {
function TestStrictlyCompatable (line 68) | func TestStrictlyCompatable(t *testing.T) {
function TestCategoricalEquality (line 99) | func TestCategoricalEquality(t *testing.T) {
FILE: base/view.go
type InstancesView (line 10) | type InstancesView struct
method addClassAttrsFromSrc (line 18) | func (v *InstancesView) addClassAttrsFromSrc(src FixedDataGrid) {
method resolveRow (line 35) | func (v *InstancesView) resolveRow(origRow int) int {
method GetAttribute (line 117) | func (v *InstancesView) GetAttribute(a Attribute) (AttributeSpec, erro...
method AllAttributes (line 136) | func (v *InstancesView) AllAttributes() []Attribute {
method AddClassAttribute (line 153) | func (v *InstancesView) AddClassAttribute(a Attribute) error {
method RemoveClassAttribute (line 171) | func (v *InstancesView) RemoveClassAttribute(a Attribute) error {
method AllClassAttributes (line 178) | func (v *InstancesView) AllClassAttributes() []Attribute {
method Get (line 194) | func (v *InstancesView) Get(as AttributeSpec, row int) []byte {
method MapOverRows (line 207) | func (v *InstancesView) MapOverRows(as []AttributeSpec, rowFunc func([...
method Size (line 231) | func (v *InstancesView) Size() (int, int) {
method String (line 250) | func (v *InstancesView) String() string {
method RowString (line 306) | func (v *InstancesView) RowString(row int) string {
function NewInstancesViewFromRows (line 57) | func NewInstancesViewFromRows(src FixedDataGrid, rows map[int]int) *Inst...
function NewInstancesViewFromVisible (line 78) | func NewInstancesViewFromVisible(src FixedDataGrid, rows []int, attrs []...
function NewInstancesViewFromAttrs (line 99) | func NewInstancesViewFromAttrs(src FixedDataGrid, attrs []Attribute) *In...
FILE: base/view_test.go
function TestInstancesViewRows (line 8) | func TestInstancesViewRows(t *testing.T) {
function TestInstancesViewFromVisible (line 33) | func TestInstancesViewFromVisible(t *testing.T) {
function TestInstancesViewAttrs (line 65) | func TestInstancesViewAttrs(t *testing.T) {
FILE: clustering/cluster_extra_test.go
function Test (line 8) | func Test(t *testing.T) {
FILE: clustering/cluster_test.go
function TestClusterEquality (line 8) | func TestClusterEquality(t *testing.T) {
FILE: clustering/clustering.go
type ClusterParameters (line 14) | type ClusterParameters struct
type ClusterMap (line 25) | type ClusterMap
method Invert (line 29) | func (ref ClusterMap) Invert() (map[int]int, error) {
method Equals (line 45) | func (ref ClusterMap) Equals(other ClusterMap) (bool, error) {
FILE: clustering/dbscan.go
type DBSCANParameters (line 12) | type DBSCANParameters struct
function regionQuery (line 25) | func regionQuery(p int, ret *big.Int, dist *mat.Dense, eps float64) *big...
function computePairwiseDistances (line 36) | func computePairwiseDistances(inst base.FixedDataGrid, attrs []base.Attr...
function DBSCAN (line 58) | func DBSCAN(inst base.FixedDataGrid, params DBSCANParameters) (ClusterMa...
function BitCount (line 145) | func BitCount(n *big.Int) int {
FILE: clustering/dbscan_test.go
function TestDBSCANDistanceQuery (line 16) | func TestDBSCANDistanceQuery(t *testing.T) {
function TestDBSCANSynthetic (line 48) | func TestDBSCANSynthetic(t *testing.T) {
function TestDBSCANDistanceMetric (line 75) | func TestDBSCANDistanceMetric(t *testing.T) {
function TestDBSCAN (line 94) | func TestDBSCAN(t *testing.T) {
FILE: clustering/em.go
type ExpectationMaximization (line 19) | type ExpectationMaximization struct
method Fit (line 42) | func (em *ExpectationMaximization) Fit(inst base.FixedDataGrid) error {
method Predict (line 89) | func (em *ExpectationMaximization) Predict(inst base.FixedDataGrid) (C...
type Params (line 27) | type Params struct
function NewExpectationMaximization (line 33) | func NewExpectationMaximization(n_comps int) (*ExpectationMaximization, ...
function expectation (line 122) | func expectation(X *mat.Dense, p Params, n_comps int) mat.Vector {
function maximization (line 128) | func maximization(X *mat.Dense, y mat.Vector, p Params, n_comps int) Par...
function estimateLogProb (line 160) | func estimateLogProb(X *mat.Dense, p Params, n_comps int) mat.Vector {
function shrunkCovariance (line 195) | func shrunkCovariance(X *mat.Dense) *mat.SymDense {
function initMeans (line 213) | func initMeans(X *mat.Dense, n_comps, n_feats int) *mat.Dense {
function initCovariance (line 229) | func initCovariance(n_comps, n_feats int) []*mat.SymDense {
function distance (line 240) | func distance(p Params, p_new Params) float64 {
function vectorMin (line 255) | func vectorMin(v mat.Vector) float64 {
function vectorMax (line 267) | func vectorMax(v mat.Vector) float64 {
function vecToInts (line 279) | func vecToInts(v mat.Vector) []int {
function means (line 289) | func means(X *mat.Dense) []float64 {
function where (line 304) | func where(X *mat.Dense, y mat.Vector, target int) *mat.Dense {
function identity (line 321) | func identity(N int) []float64 {
function symVals (line 336) | func symVals(M int, v float64) []float64 {
FILE: clustering/em_test.go
function TestExpectationMaximization (line 9) | func TestExpectationMaximization(t *testing.T) {
function BenchmarkExpectationMaximizationOneRow (line 85) | func BenchmarkExpectationMaximizationOneRow(b *testing.B) {
FILE: ensemble/multisvc.go
type MultiLinearSVC (line 13) | type MultiLinearSVC struct
method initializeOneVsAllModel (line 43) | func (m *MultiLinearSVC) initializeOneVsAllModel() {
method Fit (line 71) | func (m *MultiLinearSVC) Fit(instances base.FixedDataGrid) error {
method Predict (line 81) | func (m *MultiLinearSVC) Predict(from base.FixedDataGrid) (base.FixedD...
method GetClassifierMetadata (line 85) | func (m *MultiLinearSVC) GetClassifierMetadata() base.ClassifierMetada...
method Save (line 94) | func (m *MultiLinearSVC) Save(filePath string) error {
method SaveWithPrefix (line 108) | func (m *MultiLinearSVC) SaveWithPrefix(serializer *base.ClassifierSer...
method GetMetadata (line 130) | func (m *MultiLinearSVC) GetMetadata() base.ClassifierMetadataV1 {
method Load (line 139) | func (m *MultiLinearSVC) Load(filePath string) error {
method LoadWithPrefix (line 153) | func (m *MultiLinearSVC) LoadWithPrefix(reader *base.ClassifierDeseria...
function NewMultiLinearSVC (line 25) | func NewMultiLinearSVC(loss, penalty string, dual bool, C float64, eps f...
FILE: ensemble/multisvc_test.go
function TestMultiSVMUnweighted (line 11) | func TestMultiSVMUnweighted(t *testing.T) {
function TestMultiSVMWeighted (line 54) | func TestMultiSVMWeighted(t *testing.T) {
function TestMultiSVMSaved (line 101) | func TestMultiSVMSaved(t *testing.T) {
FILE: ensemble/randomforest.go
type RandomForest (line 14) | type RandomForest struct
method Fit (line 35) | func (f *RandomForest) Fit(on base.FixedDataGrid) error {
method Predict (line 56) | func (f *RandomForest) Predict(with base.FixedDataGrid) (base.FixedDat...
method String (line 61) | func (f *RandomForest) String() string {
method GetMetadata (line 65) | func (f *RandomForest) GetMetadata() base.ClassifierMetadataV1 {
method Save (line 74) | func (f *RandomForest) Save(filePath string) error {
method SaveWithPrefix (line 85) | func (f *RandomForest) SaveWithPrefix(writer *base.ClassifierSerialize...
method Load (line 89) | func (f *RandomForest) Load(filePath string) error {
method LoadWithPrefix (line 97) | func (f *RandomForest) LoadWithPrefix(reader *base.ClassifierDeseriali...
function NewRandomForest (line 24) | func NewRandomForest(forestSize int, features int) *RandomForest {
FILE: ensemble/randomforest_test.go
function TestRandomForest (line 15) | func TestRandomForest(t *testing.T) {
function TestRandomForestSerialization (line 60) | func TestRandomForestSerialization(t *testing.T) {
FILE: evaluation/confusion.go
type ConfusionMatrix (line 13) | type ConfusionMatrix
function GetConfusionMatrix (line 17) | func GetConfusionMatrix(ref base.FixedDataGrid, gen base.FixedDataGrid) ...
function GetTruePositives (line 42) | func GetTruePositives(class string, c ConfusionMatrix) float64 {
function GetFalsePositives (line 48) | func GetFalsePositives(class string, c ConfusionMatrix) float64 {
function GetFalseNegatives (line 61) | func GetFalseNegatives(class string, c ConfusionMatrix) float64 {
function GetTrueNegatives (line 74) | func GetTrueNegatives(class string, c ConfusionMatrix) float64 {
function GetPrecision (line 92) | func GetPrecision(class string, c ConfusionMatrix) float64 {
function GetRecall (line 101) | func GetRecall(class string, c ConfusionMatrix) float64 {
function GetF1Score (line 110) | func GetF1Score(class string, c ConfusionMatrix) float64 {
function GetAccuracy (line 118) | func GetAccuracy(c ConfusionMatrix) float64 {
function GetMicroPrecision (line 134) | func GetMicroPrecision(c ConfusionMatrix) float64 {
function GetMacroPrecision (line 146) | func GetMacroPrecision(c ConfusionMatrix) float64 {
function GetMicroRecall (line 156) | func GetMicroRecall(c ConfusionMatrix) float64 {
function GetMacroRecall (line 168) | func GetMacroRecall(c ConfusionMatrix) float64 {
function GetSummary (line 179) | func GetSummary(c ConfusionMatrix) string {
function ShowConfusionMatrix (line 204) | func ShowConfusionMatrix(c ConfusionMatrix) string {
FILE: evaluation/confusion_test.go
function TestMetrics (line 9) | func TestMetrics(t *testing.T) {
FILE: evaluation/cross_fold.go
function GetCrossValidatedMetric (line 10) | func GetCrossValidatedMetric(in []ConfusionMatrix, metric func(Confusion...
function GenerateCrossFoldValidationConfusionMatrices (line 34) | func GenerateCrossFoldValidationConfusionMatrices(data base.FixedDataGri...
FILE: evaluation/cross_fold_test.go
function TestCrossFold (line 10) | func TestCrossFold(t *testing.T) {
FILE: examples/averageperceptron/averageperceptionexample.go
function main (line 11) | func main() {
FILE: examples/crossfold/rf.go
function main (line 14) | func main() {
FILE: examples/instances/instances.go
function main (line 10) | func main() {
FILE: examples/knnclassifier/knnclassifier_iris.go
function main (line 11) | func main() {
FILE: examples/serialization/attributes.go
function main (line 11) | func main() {
FILE: examples/trees/cart/cart.go
function main (line 12) | func main() {
FILE: examples/trees/id3/trees.go
function main (line 16) | func main() {
FILE: examples/trees/isolationForest/isolation_forest.go
function main (line 12) | func main() {
FILE: filters/binary.go
type BinaryConvertFilter (line 16) | type BinaryConvertFilter struct
method AddAttribute (line 35) | func (b *BinaryConvertFilter) AddAttribute(a base.Attribute) error {
method GetAttributesAfterFiltering (line 41) | func (b *BinaryConvertFilter) GetAttributesAfterFiltering() []base.Fil...
method String (line 46) | func (b *BinaryConvertFilter) String() string {
method Transform (line 62) | func (b *BinaryConvertFilter) Transform(a base.Attribute, n base.Attri...
method Train (line 117) | func (b *BinaryConvertFilter) Train() error {
function NewBinaryConvertFilter (line 24) | func NewBinaryConvertFilter() *BinaryConvertFilter {
FILE: filters/binary_test.go
function TestBinaryFilterClassPreservation (line 9) | func TestBinaryFilterClassPreservation(t *testing.T) {
function TestBinaryFilter (line 43) | func TestBinaryFilter(t *testing.T) {
FILE: filters/binning.go
type BinningFilter (line 12) | type BinningFilter struct
method String (line 34) | func (b *BinningFilter) String() string {
method Train (line 40) | func (b *BinningFilter) Train() error {
method Transform (line 76) | func (b *BinningFilter) Transform(a base.Attribute, n base.Attribute, ...
method GetAttributesAfterFiltering (line 101) | func (b *BinningFilter) GetAttributesAfterFiltering() []base.FilteredA...
function NewBinningFilter (line 21) | func NewBinningFilter(d base.FixedDataGrid, bins int) *BinningFilter {
FILE: filters/binning_test.go
function TestBinning (line 9) | func TestBinning(t *testing.T) {
FILE: filters/chimerge.go
type ChiMergeFilter (line 14) | type ChiMergeFilter struct
method Train (line 116) | func (c *ChiMergeFilter) Train() error {
method GetAttributesAfterFiltering (line 147) | func (c *ChiMergeFilter) GetAttributesAfterFiltering() []base.Filtered...
method Transform (line 166) | func (c *ChiMergeFilter) Transform(a base.Attribute, n base.Attribute,...
method String (line 186) | func (c *ChiMergeFilter) String() string {
function NewChiMergeFilter (line 23) | func NewChiMergeFilter(d base.FixedDataGrid, significance float64) *ChiM...
function chiMerge (line 48) | func chiMerge(inst base.FixedDataGrid, attr base.Attribute, sig float64,...
FILE: filters/chimerge_freq.go
type FrequencyTableEntry (line 8) | type FrequencyTableEntry struct
method String (line 13) | func (t *FrequencyTableEntry) String() string {
FILE: filters/chimerge_funcs.go
function ChiMBuildFrequencyTable (line 8) | func ChiMBuildFrequencyTable(attr base.Attribute, inst base.FixedDataGri...
function chiSquaredPdf (line 44) | func chiSquaredPdf(k float64, x float64) float64 {
function chiSquaredPercentile (line 53) | func chiSquaredPercentile(k int, x float64) float64 {
function chiCountClasses (line 87) | func chiCountClasses(entries []*FrequencyTableEntry) map[string]int {
function chiComputeStatistic (line 97) | func chiComputeStatistic(entry1 *FrequencyTableEntry, entry2 *FrequencyT...
function chiMergeMergeZipAdjacent (line 159) | func chiMergeMergeZipAdjacent(freq []*FrequencyTableEntry, minIndex int)...
FILE: filters/chimerge_test.go
function TestChiMergeFrequencyTable (line 11) | func TestChiMergeFrequencyTable(t *testing.T) {
function TestChiSquaredDistribution (line 39) | func TestChiSquaredDistribution(t *testing.T) {
function TestChiMergeDiscretization (line 47) | func TestChiMergeDiscretization(t *testing.T) {
function TestChiMergeFilter (line 96) | func TestChiMergeFilter(t *testing.T) {
FILE: filters/disc.go
type AbstractDiscretizeFilter (line 8) | type AbstractDiscretizeFilter struct
method AddAttribute (line 16) | func (d *AbstractDiscretizeFilter) AddAttribute(a base.Attribute) error {
method GetAttributesAfterFiltering (line 30) | func (d *AbstractDiscretizeFilter) GetAttributesAfterFiltering() []bas...
method getAttributeSpecs (line 45) | func (d *AbstractDiscretizeFilter) getAttributeSpecs() []base.Attribut...
FILE: filters/float.go
type FloatConvertFilter (line 16) | type FloatConvertFilter struct
method AddAttribute (line 35) | func (f *FloatConvertFilter) AddAttribute(a base.Attribute) error {
method GetAttributesAfterFiltering (line 41) | func (f *FloatConvertFilter) GetAttributesAfterFiltering() []base.Filt...
method String (line 46) | func (f *FloatConvertFilter) String() string {
method Transform (line 53) | func (f *FloatConvertFilter) Transform(a base.Attribute, n base.Attrib...
method Train (line 107) | func (f *FloatConvertFilter) Train() error {
function NewFloatConvertFilter (line 24) | func NewFloatConvertFilter() *FloatConvertFilter {
FILE: filters/float_test.go
function TestFloatFilter (line 9) | func TestFloatFilter(t *testing.T) {
FILE: kdtree/heap.go
type heapNode (line 3) | type heapNode struct
type heap (line 9) | type heap struct
method maximum (line 21) | func (h *heap) maximum() heapNode {
method extractMax (line 30) | func (h *heap) extractMax() {
method insert (line 63) | func (h *heap) insert(value []float64, length float64, srcRowNo int) {
method size (line 81) | func (h *heap) size() int {
function newHeap (line 14) | func newHeap() *heap {
FILE: kdtree/heap_test.go
function TestHeap (line 9) | func TestHeap(t *testing.T) {
FILE: kdtree/kdtree.go
type node (line 10) | type node struct
type Tree (line 19) | type Tree struct
method Build (line 42) | func (t *Tree) Build(data [][]float64) error {
method buildHandle (line 72) | func (t *Tree) buildHandle(data []int, featureIndex int) *node {
method Search (line 116) | func (t *Tree) Search(k int, disType pairwise.PairwiseDistanceFunc, ta...
method searchHandle (line 141) | func (t *Tree) searchHandle(k int, disType pairwise.PairwiseDistanceFu...
method searchAllNodes (line 194) | func (t *Tree) searchAllNodes(k int, disType pairwise.PairwiseDistance...
type SortData (line 24) | type SortData struct
method Len (line 30) | func (d SortData) Len() int { return len(d.Data) }
method Less (line 31) | func (d SortData) Less(i, j int) bool {
method Swap (line 34) | func (d SortData) Swap(i, j int) { d.Data[i], d.Data[j] = d.Data[j], d...
function New (line 37) | func New() *Tree {
FILE: kdtree/kdtree_test.go
function TestKdtree (line 10) | func TestKdtree(t *testing.T) {
FILE: knn/euclidean.c
function euclidean_distance (line 9) | void euclidean_distance (
FILE: knn/knn.go
type KNNClassifier (line 24) | type KNNClassifier struct
method Fit (line 46) | func (KNN *KNNClassifier) Fit(trainingData base.FixedDataGrid) error {
method canUseOptimisations (line 51) | func (KNN *KNNClassifier) canUseOptimisations(what base.FixedDataGrid)...
method Predict (line 102) | func (KNN *KNNClassifier) Predict(what base.FixedDataGrid) (base.Fixed...
method String (line 268) | func (KNN *KNNClassifier) String() string {
method vote (line 272) | func (KNN *KNNClassifier) vote(maxmap map[string]int, values []int) st...
method weightedVote (line 300) | func (KNN *KNNClassifier) weightedVote(maxmap map[string]float64, valu...
method GetMetadata (line 329) | func (KNN *KNNClassifier) GetMetadata() base.ClassifierMetadataV1 {
method Save (line 347) | func (KNN *KNNClassifier) Save(filePath string) error {
method SaveWithPrefix (line 357) | func (KNN *KNNClassifier) SaveWithPrefix(writer *base.ClassifierSerial...
method Load (line 367) | func (KNN *KNNClassifier) Load(filePath string) error {
method LoadWithPrefix (line 377) | func (KNN *KNNClassifier) LoadWithPrefix(reader *base.ClassifierDeseri...
function NewKnnClassifier (line 35) | func NewKnnClassifier(distfunc, algorithm string, neighbours int) *KNNCl...
function ReloadKNNClassifier (line 408) | func ReloadKNNClassifier(filePath string) (*KNNClassifier, error) {
type KNNRegressor (line 418) | type KNNRegressor struct
method Fit (line 431) | func (KNN *KNNRegressor) Fit(values []float64, numbers []float64, rows...
method Predict (line 440) | func (KNN *KNNRegressor) Predict(vector *mat.Dense, K int) float64 {
function NewKnnRegressor (line 425) | func NewKnnRegressor(distfunc string) *KNNRegressor {
FILE: knn/knn.h
type dist (line 6) | struct dist {
type dist (line 14) | struct dist
FILE: knn/knn_bench_test.go
function readMnist (line 11) | func readMnist() (*base.DenseInstances, *base.DenseInstances) {
function BenchmarkKNNWithOpts (line 43) | func BenchmarkKNNWithOpts(b *testing.B) {
function BenchmarkKNNWithNoOpts (line 61) | func BenchmarkKNNWithNoOpts(b *testing.B) {
FILE: knn/knn_cov_test.go
function TestKnnClassifierCov (line 10) | func TestKnnClassifierCov(t *testing.T) {
FILE: knn/knn_kdtree_test.go
function TestKnnClassifierWithoutOptimisationsWithKdtree (line 10) | func TestKnnClassifierWithoutOptimisationsWithKdtree(t *testing.T) {
function TestKnnClassifierWithTemplatedInstances1WithKdtree (line 41) | func TestKnnClassifierWithTemplatedInstances1WithKdtree(t *testing.T) {
function TestKnnClassifierWithTemplatedInstances1SubsetWithKdtree (line 56) | func TestKnnClassifierWithTemplatedInstances1SubsetWithKdtree(t *testing...
function TestKnnClassifierImplementsClassifierWithKdtree (line 71) | func TestKnnClassifierImplementsClassifierWithKdtree(t *testing.T) {
FILE: knn/knn_opt_euclidean.go
type dist (line 12) | type dist
type distanceRecs (line 14) | type distanceRecs
method Len (line 16) | func (d distanceRecs) Len() int { return len(d) }
method Swap (line 17) | func (d distanceRecs) Swap(i, j int) { d[i], d[j] = d[j], d[i] }
method Less (line 18) | func (d distanceRecs) Less(i, j int) bool { return d[i].dist < d[j].di...
method optimisedEuclideanPredict (line 20) | func (KNN *KNNClassifier) optimisedEuclideanPredict(d *base.DenseInstanc...
FILE: knn/knn_test.go
function TestKnnClassifierWithoutOptimisations (line 11) | func TestKnnClassifierWithoutOptimisations(t *testing.T) {
function TestKnnSaveAndReload (line 42) | func TestKnnSaveAndReload(t *testing.T) {
function TestKnnClassifierWithOptimisations (line 72) | func TestKnnClassifierWithOptimisations(t *testing.T) {
function TestKnnClassifierWithTemplatedInstances1 (line 103) | func TestKnnClassifierWithTemplatedInstances1(t *testing.T) {
function TestKnnClassifierWithTemplatedInstances1Subset (line 118) | func TestKnnClassifierWithTemplatedInstances1Subset(t *testing.T) {
function TestKnnClassifierImplementsClassifier (line 133) | func TestKnnClassifierImplementsClassifier(t *testing.T) {
FILE: knn/knn_weighted_test.go
function TestWeightedKnnClassifierWithoutOptimisationsWithKdtree (line 10) | func TestWeightedKnnClassifierWithoutOptimisationsWithKdtree(t *testing....
function TestWeightedKnnClassifierWithTemplatedInstances1WithKdtree (line 42) | func TestWeightedKnnClassifierWithTemplatedInstances1WithKdtree(t *testi...
function TestWeightedKnnClassifierWithTemplatedInstances1SubsetWithKdtree (line 58) | func TestWeightedKnnClassifierWithTemplatedInstances1SubsetWithKdtree(t ...
function TestWeightedKnnClassifierImplementsClassifierWithKdtree (line 74) | func TestWeightedKnnClassifierImplementsClassifierWithKdtree(t *testing....
function TestWeightedKnnClassifierWithoutOptimisations (line 83) | func TestWeightedKnnClassifierWithoutOptimisations(t *testing.T) {
function TestWeightedKnnClassifierWithTemplatedInstances1 (line 115) | func TestWeightedKnnClassifierWithTemplatedInstances1(t *testing.T) {
function TestWeightedKnnClassifierWithTemplatedInstances1Subset (line 131) | func TestWeightedKnnClassifierWithTemplatedInstances1Subset(t *testing.T) {
function TestWeightedKnnClassifierImplementsClassifier (line 147) | func TestWeightedKnnClassifierImplementsClassifier(t *testing.T) {
FILE: linear_models/blas.h
type fcomplex (line 12) | typedef struct { float r, i; } fcomplex;
type dcomplex (line 13) | typedef struct { double r, i; } dcomplex;
type blasbool (line 14) | typedef int blasbool;
FILE: linear_models/daxpy.c
function daxpy_ (line 3) | int daxpy_(int *n, double *sa, double *sx, int *incx, double *sy,
FILE: linear_models/ddot.c
function ddot_ (line 3) | double ddot_(int *n, double *sx, int *incx, double *sy, int *incy)
FILE: linear_models/dnrm2.c
function dnrm2_ (line 4) | double dnrm2_(int *n, double *x, int *incx)
FILE: linear_models/dscal.c
function dscal_ (line 3) | int dscal_(int *n, double *sa, double *sx, int *incx)
FILE: linear_models/liblinear.go
type Problem (line 10) | type Problem struct
type Parameter (line 14) | type Parameter struct
type Model (line 18) | type Model struct
constant L2R_LR (line 23) | L2R_LR = C.L2R_LR
constant L2R_L2LOSS_SVC_DUAL (line 24) | L2R_L2LOSS_SVC_DUAL = C.L2R_L2LOSS_SVC_DUAL
constant L2R_L2LOSS_SVC (line 25) | L2R_L2LOSS_SVC = C.L2R_L2LOSS_SVC
constant L2R_L1LOSS_SVC_DUAL (line 26) | L2R_L1LOSS_SVC_DUAL = C.L2R_L1LOSS_SVC_DUAL
constant MCSVM_CS (line 27) | MCSVM_CS = C.MCSVM_CS
constant L1R_L2LOSS_SVC (line 28) | L1R_L2LOSS_SVC = C.L1R_L2LOSS_SVC
constant L1R_LR (line 29) | L1R_LR = C.L1R_LR
constant L2R_LR_DUAL (line 30) | L2R_LR_DUAL = C.L2R_LR_DUAL
function NewParameter (line 33) | func NewParameter(solver_type int, C float64, eps float64) *Parameter {
function NewProblem (line 45) | func NewProblem(X [][]float64, y []float64, bias float64) *Problem {
function Train (line 61) | func Train(prob *Problem, param *Parameter) *Model {
function Export (line 68) | func Export(model *Model, filePath string) error {
function Load (line 76) | func Load(model *Model, filePath string) error {
function Predict (line 84) | func Predict(model *Model, x []float64) float64 {
function convert_vector (line 90) | func convert_vector(x []float64, bias float64) *C.struct_feature_node {
function convert_features (line 116) | func convert_features(X [][]float64, bias float64) **C.struct_feature_no...
FILE: linear_models/liblinear_print.go
function libLinearPrintFunc (line 19) | func libLinearPrintFunc(str *C.char) {
function libLinearHookPrintFunc (line 23) | func libLinearHookPrintFunc() {
FILE: linear_models/liblinear_print_11.go
function libLinearPrintFunc (line 10) | func libLinearPrintFunc(str *C.char) {
function libLinearHookPrintFunc (line 14) | func libLinearHookPrintFunc() {
FILE: linear_models/linear.cpp
function swap (line 10) | static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
function T (line 12) | static inline T min(T x,T y) { return (x<y)?x:y; }
function T (line 15) | static inline T max(T x,T y) { return (x>y)?x:y; }
function clone (line 17) | static inline void clone(T*& dst, S* src, int n)
function print_string_stdout (line 25) | static void print_string_stdout(const char *s)
function info (line 34) | static void info(const char *fmt,...)
function info (line 44) | static void info(const char *fmt,...) {}
class l2r_lr_fun (line 47) | class l2r_lr_fun: public function
class l2r_l2_svc_fun (line 191) | class l2r_l2_svc_fun: public function
class l2r_l2_svr_fun (line 355) | class l2r_l2_svr_fun: public l2r_l2_svc_fun
class Solver_MCSVM_CS (line 456) | class Solver_MCSVM_CS
function compare_double (line 492) | int compare_double(const void *a, const void *b)
function solve_l2r_l1l2_svc (line 778) | static void solve_l2r_l1l2_svc(
function solve_l2r_l1l2_svr (line 996) | static void solve_l2r_l1l2_svr(
function solve_l2r_lr_dual (line 1223) | void solve_l2r_lr_dual(const problem *prob, double *w, double eps, doubl...
function solve_l1r_l2_svc (line 1397) | static void solve_l1r_l2_svc(
function solve_l1r_lr (line 1684) | static void solve_l1r_lr(
function transpose (line 2049) | static void transpose(const problem *prob, feature_node **x_space_ret, p...
function group_classes (line 2106) | static void group_classes(const problem *prob, int *nr_class_ret, int **...
function train_one (line 2181) | static void train_one(const problem *prob, const parameter *param, doubl...
function model (line 2293) | model* train(const problem *prob, const parameter *param)
function cross_validation (line 2430) | void cross_validation(const problem *prob, const parameter *param, int n...
function predict_values (line 2488) | double predict_values(const struct model *model_, const struct feature_n...
function predict (line 2537) | double predict(const model *model_, const feature_node *x)
function predict_probability (line 2545) | double predict_probability(const struct model *model_, const struct feat...
function save_model (line 2587) | int save_model(const char *model_file_name, const struct model *model_)
type model (line 2642) | struct model
function get_nr_feature (line 2753) | int get_nr_feature(const model *model_)
function get_nr_class (line 2758) | int get_nr_class(const model *model_)
function get_labels (line 2763) | void get_labels(const model *model_, int* label)
function free_model_content (line 2770) | void free_model_content(struct model *model_ptr)
function free_and_destroy_model (line 2778) | void free_and_destroy_model(struct model **model_ptr_ptr)
function destroy_param (line 2788) | void destroy_param(parameter* param)
function check_probability_model (line 2823) | int check_probability_model(const struct model *model_)
function set_print_string_function (line 2830) | void set_print_string_function(void (*print_func)(const char*))
FILE: linear_models/linear.h
type feature_node (line 8) | struct feature_node
type problem (line 14) | struct problem
type parameter (line 24) | struct parameter
type model (line 37) | struct model
type model (line 47) | struct model
type problem (line 47) | struct problem
type parameter (line 47) | struct parameter
type problem (line 48) | struct problem
type parameter (line 48) | struct parameter
type model (line 50) | struct model
type feature_node (line 50) | struct feature_node
type model (line 51) | struct model
type feature_node (line 51) | struct feature_node
type model (line 52) | struct model
type feature_node (line 52) | struct feature_node
type model (line 54) | struct model
type model (line 55) | struct model
type model (line 57) | struct model
type model (line 58) | struct model
type model (line 59) | struct model
type model (line 61) | struct model
type model (line 62) | struct model
type parameter (line 63) | struct parameter
type problem (line 65) | struct problem
type parameter (line 65) | struct parameter
type model (line 66) | struct model
FILE: linear_models/linear_models_test.go
function TestLogisticRegression (line 9) | func TestLogisticRegression(t *testing.T) {
FILE: linear_models/linear_regression.go
type LinearRegression (line 18) | type LinearRegression struct
method Fit (line 30) | func (lr *LinearRegression) Fit(inst base.FixedDataGrid) error {
method Predict (line 110) | func (lr *LinearRegression) Predict(X base.FixedDataGrid) (base.FixedD...
function NewLinearRegression (line 26) | func NewLinearRegression() *LinearRegression {
FILE: linear_models/linear_regression_test.go
function TestLinearRegression (line 10) | func TestLinearRegression(t *testing.T) {
function BenchmarkLinearRegressionOneRow (line 68) | func BenchmarkLinearRegressionOneRow(b *testing.B) {
FILE: linear_models/linearsvc.go
type LinearSVCParams (line 30) | type LinearSVCParams struct
method Copy (line 40) | func (p *LinearSVCParams) Copy() *LinearSVCParams {
method SetKindFromStrings (line 58) | func (p *LinearSVCParams) SetKindFromStrings(loss, penalty string) err...
method convertToNativeFormat (line 106) | func (p *LinearSVCParams) convertToNativeFormat() *Parameter {
type LinearSVC (line 111) | type LinearSVC struct
method Fit (line 150) | func (lr *LinearSVC) Fit(X base.FixedDataGrid) error {
method Predict (line 187) | func (lr *LinearSVC) Predict(X base.FixedDataGrid) (base.FixedDataGrid...
method GetMetadata (line 217) | func (lr *LinearSVC) GetMetadata() base.ClassifierMetadataV1 {
method Save (line 239) | func (lr *LinearSVC) Save(filePath string) error {
method SaveWithPrefix (line 251) | func (lr *LinearSVC) SaveWithPrefix(writer *base.ClassifierSerializer,...
method Load (line 294) | func (lr *LinearSVC) Load(filePath string) error {
method LoadWithPrefix (line 306) | func (lr *LinearSVC) LoadWithPrefix(reader *base.ClassifierDeserialize...
method String (line 334) | func (lr *LinearSVC) String() string {
function NewLinearSVC (line 126) | func NewLinearSVC(loss, penalty string, dual bool, C float64, eps float6...
function NewLinearSVCFromParams (line 139) | func NewLinearSVCFromParams(params *LinearSVCParams) (*LinearSVC, error) {
FILE: linear_models/linearsvc_test.go
function TestLinearSVC (line 12) | func TestLinearSVC(t *testing.T) {
FILE: linear_models/logistic.go
type LogisticRegression (line 9) | type LogisticRegression struct
method Fit (line 30) | func (lr *LogisticRegression) Fit(X base.FixedDataGrid) error {
method Predict (line 38) | func (lr *LogisticRegression) Predict(X base.FixedDataGrid) (base.Fixe...
method String (line 67) | func (lr *LogisticRegression) String() string {
function NewLogisticRegression (line 14) | func NewLogisticRegression(penalty string, C float64, eps float64) (*Log...
FILE: linear_models/logistic_test.go
function TestLogistic (line 9) | func TestLogistic(t *testing.T) {
FILE: linear_models/tron.cpp
function T (line 8) | static inline T min(T x,T y) { return (x<y)?x:y; }
function T (line 12) | static inline T max(T x,T y) { return (x>y)?x:y; }
function default_print (line 28) | static void default_print(const char *buf)
FILE: linear_models/tron.h
function class (line 4) | class function
function class (line 15) | class TRON
FILE: linear_models/util.go
function generateClassWeightVectorFromDist (line 8) | func generateClassWeightVectorFromDist(X base.FixedDataGrid) []float64 {
function generateClassWeightVectorFromFixed (line 21) | func generateClassWeightVectorFromFixed(X base.FixedDataGrid) []float64 {
function convertInstancesToProblemVec (line 37) | func convertInstancesToProblemVec(X base.FixedDataGrid) [][]float64 {
function convertInstancesToLabelVec (line 61) | func convertInstancesToLabelVec(X base.FixedDataGrid) []float64 {
FILE: meta/bagging.go
type BaggedModel (line 14) | type BaggedModel struct
method generateTrainingAttrs (line 25) | func (b *BaggedModel) generateTrainingAttrs(model int, from base.Fixed...
method generatePredictionInstances (line 61) | func (b *BaggedModel) generatePredictionInstances(model int, from base...
method generateTrainingInstances (line 69) | func (b *BaggedModel) generateTrainingInstances(model int, from base.F...
method AddModel (line 77) | func (b *BaggedModel) AddModel(m base.Classifier) {
method Fit (line 83) | func (b *BaggedModel) Fit(from base.FixedDataGrid) {
method Predict (line 103) | func (b *BaggedModel) Predict(from base.FixedDataGrid) (base.FixedData...
method String (line 184) | func (b *BaggedModel) String() string {
method GetMetadata (line 193) | func (b *BaggedModel) GetMetadata() base.ClassifierMetadataV1 {
method Save (line 204) | func (b *BaggedModel) Save(filePath string) error {
method Load (line 214) | func (b *BaggedModel) Load(filePath string) error {
method SaveWithPrefix (line 232) | func (b *BaggedModel) SaveWithPrefix(writer *base.ClassifierSerializer...
method LoadWithPrefix (line 292) | func (b *BaggedModel) LoadWithPrefix(reader *base.ClassifierDeserializ...
FILE: meta/bagging_test.go
function BenchmarkBaggingRandomForestFit (line 17) | func BenchmarkBaggingRandomForestFit(t *testing.B) {
function BenchmarkBaggingRandomForestPredict (line 42) | func BenchmarkBaggingRandomForestPredict(t *testing.B) {
function TestBaggedModelRandomForest (line 68) | func TestBaggedModelRandomForest(t *testing.T) {
function TestBaggedModelRandomForestSerialization (line 108) | func TestBaggedModelRandomForestSerialization(t *testing.T) {
FILE: meta/one_v_all.go
type OneVsAllModel (line 12) | type OneVsAllModel struct
method Fit (line 37) | func (m *OneVsAllModel) Fit(using base.FixedDataGrid) {
method Predict (line 95) | func (m *OneVsAllModel) Predict(what base.FixedDataGrid) (base.FixedDa...
method Load (line 132) | func (m *OneVsAllModel) Load(filePath string) error {
method LoadWithPrefix (line 143) | func (m *OneVsAllModel) LoadWithPrefix(reader *base.ClassifierDeserial...
method GetMetadata (line 243) | func (m *OneVsAllModel) GetMetadata() base.ClassifierMetadataV1 {
method Save (line 252) | func (m *OneVsAllModel) Save(filePath string) error {
method SaveWithPrefix (line 260) | func (m *OneVsAllModel) SaveWithPrefix(writer *base.ClassifierSerializ...
method generateAttributes (line 330) | func (m *OneVsAllModel) generateAttributes(from base.FixedDataGrid) ma...
function NewOneVsAllModel (line 23) | func NewOneVsAllModel(f func(string) base.Classifier) *OneVsAllModel {
type oneVsAllFilter (line 352) | type oneVsAllFilter struct
method AddAttribute (line 358) | func (f *oneVsAllFilter) AddAttribute(a base.Attribute) error {
method GetAttributesAfterFiltering (line 362) | func (f *oneVsAllFilter) GetAttributesAfterFiltering() []base.Filtered...
method String (line 372) | func (f *oneVsAllFilter) String() string {
method Transform (line 376) | func (f *oneVsAllFilter) Transform(old, to base.Attribute, seq []byte)...
method Train (line 387) | func (f *oneVsAllFilter) Train() error {
FILE: meta/one_v_all_test.go
function TestOneVsAllModel (line 14) | func TestOneVsAllModel(t *testing.T) {
FILE: metrics/pairwise/chebyshev.go
type Chebyshev (line 10) | type Chebyshev struct
method Distance (line 16) | func (c *Chebyshev) Distance(vectorX *mat.Dense, vectorY *mat.Dense) f...
function NewChebyshev (line 12) | func NewChebyshev() *Chebyshev {
FILE: metrics/pairwise/chebyshev_test.go
function TestChebyshev (line 10) | func TestChebyshev(t *testing.T) {
FILE: metrics/pairwise/cosine.go
type Cosine (line 9) | type Cosine struct
method Dot (line 16) | func (c *Cosine) Dot(vectorX *mat.Dense, vectorY *mat.Dense) float64 {
method Distance (line 26) | func (c *Cosine) Distance(vectorX *mat.Dense, vectorY *mat.Dense) floa...
function NewCosine (line 11) | func NewCosine() *Cosine {
FILE: metrics/pairwise/cosine_test.go
function TestCosine (line 10) | func TestCosine(t *testing.T) {
FILE: metrics/pairwise/cranberra.go
type Cranberra (line 10) | type Cranberra struct
method Distance (line 23) | func (c *Cranberra) Distance(vectorX *mat.Dense, vectorY *mat.Dense) f...
function NewCranberra (line 12) | func NewCranberra() *Cranberra {
function cranberraDistanceStep (line 16) | func cranberraDistanceStep(num float64, denom float64) float64 {
FILE: metrics/pairwise/cranberra_test.go
function TestCranberrra (line 10) | func TestCranberrra(t *testing.T) {
FILE: metrics/pairwise/euclidean.go
type Euclidean (line 9) | type Euclidean struct
method InnerProduct (line 16) | func (e *Euclidean) InnerProduct(vectorX *mat.Dense, vectorY *mat.Dens...
method Distance (line 26) | func (e *Euclidean) Distance(vectorX *mat.Dense, vectorY *mat.Dense) f...
function NewEuclidean (line 11) | func NewEuclidean() *Euclidean {
FILE: metrics/pairwise/euclidean_test.go
function TestEuclidean (line 10) | func TestEuclidean(t *testing.T) {
FILE: metrics/pairwise/manhattan.go
type Manhattan (line 10) | type Manhattan struct
method Distance (line 18) | func (m *Manhattan) Distance(vectorX *mat.Dense, vectorY *mat.Dense) f...
function NewManhattan (line 12) | func NewManhattan() *Manhattan {
FILE: metrics/pairwise/manhattan_test.go
function TestManhattan (line 10) | func TestManhattan(t *testing.T) {
FILE: metrics/pairwise/pairwise.go
type PairwiseDistanceFunc (line 8) | type PairwiseDistanceFunc interface
FILE: metrics/pairwise/poly_kernel.go
type PolyKernel (line 9) | type PolyKernel struct
method InnerProduct (line 20) | func (p *PolyKernel) InnerProduct(vectorX *mat.Dense, vectorY *mat.Den...
method Distance (line 30) | func (p *PolyKernel) Distance(vectorX *mat.Dense, vectorY *mat.Dense) ...
function NewPolyKernel (line 14) | func NewPolyKernel(degree int) *PolyKernel {
FILE: metrics/pairwise/poly_kernel_test.go
function TestPolyKernel (line 10) | func TestPolyKernel(t *testing.T) {
FILE: metrics/pairwise/rbf_kernel.go
type RBFKernel (line 9) | type RBFKernel struct
method InnerProduct (line 20) | func (r *RBFKernel) InnerProduct(vectorX *mat.Dense, vectorY *mat.Dens...
function NewRBFKernel (line 14) | func NewRBFKernel(gamma float64) *RBFKernel {
FILE: metrics/pairwise/rbf_kernel_test.go
function TestRBFKernel (line 10) | func TestRBFKernel(t *testing.T) {
FILE: naive/bernoulli_nb.go
type BernoulliNBClassifier (line 40) | type BernoulliNBClassifier struct
method String (line 59) | func (nb *BernoulliNBClassifier) String() string {
method GetMetadata (line 63) | func (nb *BernoulliNBClassifier) GetMetadata() base.ClassifierMetadata...
method Save (line 72) | func (nb *BernoulliNBClassifier) Save(filePath string) error {
method Load (line 82) | func (nb *BernoulliNBClassifier) Load(filePath string) error {
method LoadWithPrefix (line 91) | func (nb *BernoulliNBClassifier) LoadWithPrefix(reader *base.Classifie...
method SaveWithPrefix (line 138) | func (nb *BernoulliNBClassifier) SaveWithPrefix(writer *base.Classifie...
method Fit (line 189) | func (nb *BernoulliNBClassifier) Fit(X base.FixedDataGrid) error {
method PredictOne (line 277) | func (nb *BernoulliNBClassifier) PredictOne(vector [][]byte) string {
method Predict (line 321) | func (nb *BernoulliNBClassifier) Predict(what base.FixedDataGrid) (bas...
function NewBernoulliNBClassifier (line 179) | func NewBernoulliNBClassifier() *BernoulliNBClassifier {
FILE: naive/bernoulli_nb_test.go
function TestNoFit (line 12) | func TestNoFit(t *testing.T) {
function convertToBinary (line 23) | func convertToBinary(src base.FixedDataGrid) base.FixedDataGrid {
function TestSerialize (line 35) | func TestSerialize(t *testing.T) {
function TestSimple (line 69) | func TestSimple(t *testing.T) {
FILE: neural/layered.go
type MultiLayerNet (line 21) | type MultiLayerNet struct
method String (line 51) | func (m *MultiLayerNet) String() string {
method convertToFloatInsts (line 55) | func (m *MultiLayerNet) convertToFloatInsts(X base.FixedDataGrid) base...
method Predict (line 73) | func (m *MultiLayerNet) Predict(X base.FixedDataGrid) base.FixedDataGr...
method Fit (line 164) | func (m *MultiLayerNet) Fit(X base.FixedDataGrid) {
function NewMultiLayerNet (line 37) | func NewMultiLayerNet(layers []int) *MultiLayerNet {
FILE: neural/layered_test.go
function TestLayerStructureNoHidden (line 10) | func TestLayerStructureNoHidden(t *testing.T) {
function TestLayeredXOR (line 110) | func TestLayeredXOR(t *testing.T) {
function TestLayeredXORInline (line 159) | func TestLayeredXORInline(t *testing.T) {
FILE: neural/network.go
type Network (line 13) | type Network struct
method String (line 46) | func (n *Network) String() string {
method GetWeight (line 68) | func (n *Network) GetWeight(src, target int) float64 {
method SetWeight (line 76) | func (n *Network) SetWeight(src, target int, v float64) {
method SetBias (line 83) | func (n *Network) SetBias(node int, v float64) {
method GetBias (line 92) | func (n *Network) GetBias(node int) float64 {
method Activate (line 107) | func (n *Network) Activate(with *mat.Dense, maxIterations int) {
method UpdateWeights (line 131) | func (n *Network) UpdateWeights(out, err *mat.Dense, learnRate float64) {
method UpdateBias (line 154) | func (n *Network) UpdateBias(err *mat.Dense, learnRate float64) {
method Error (line 175) | func (n *Network) Error(outArg, errArg *mat.Dense, maxIterations int) ...
function NewNetwork (line 28) | func NewNetwork(size int, input int, f NeuralFunction) *Network {
FILE: neural/network_test.go
function TestNetworkWith1Layer (line 9) | func TestNetworkWith1Layer(t *testing.T) {
FILE: neural/neural.go
type ActivationFunction (line 8) | type ActivationFunction
type NeuralFunction (line 12) | type NeuralFunction struct
type LayerFunc (line 19) | type LayerFunc
FILE: pca/pca.go
type PCA (line 8) | type PCA struct
method FitTransform (line 20) | func (pca *PCA) FitTransform(X *mat.Dense) *mat.Dense {
method Fit (line 25) | func (pca *PCA) Fit(X *mat.Dense) *PCA {
method Transform (line 44) | func (pca *PCA) Transform(X *mat.Dense) *mat.Dense {
function NewPCA (line 14) | func NewPCA(num_components int) *PCA {
function mean (line 67) | func mean(matrix *mat.Dense) *mat.Dense {
function matrixSubVector (line 78) | func matrixSubVector(mat, vec *mat.Dense) *mat.Dense {
function compute (line 93) | func compute(X, Y mat.Matrix) *mat.Dense {
FILE: pca/pca_test.go
function TestPCAWithZeroComponents (line 10) | func TestPCAWithZeroComponents(t *testing.T) {
function TestPCAWithNComponents (line 38) | func TestPCAWithNComponents(t *testing.T) {
function TestPCAFitAndTransformSeparately (line 64) | func TestPCAFitAndTransformSeparately(t *testing.T) {
function TestPCAWithNilSVD (line 81) | func TestPCAWithNilSVD(t *testing.T) {
function TestPCAWithLessThanZeroComponents (line 90) | func TestPCAWithLessThanZeroComponents(t *testing.T) {
function TestMatrixAndVectorMismatchDim (line 99) | func TestMatrixAndVectorMismatchDim(t *testing.T) {
function TestPCAComponentBiggerThanFeature (line 108) | func TestPCAComponentBiggerThanFeature(t *testing.T) {
FILE: perceptron/average.go
constant MaxEpochs (line 8) | MaxEpochs = 10
type AveragePerceptron (line 10) | type AveragePerceptron struct
method updateWeights (line 29) | func (p *AveragePerceptron) updateWeights(features []float64, correcti...
method average (line 43) | func (p *AveragePerceptron) average() {
method score (line 54) | func (p *AveragePerceptron) score(datum instance) float64 {
method Fit (line 68) | func (p *AveragePerceptron) Fit(trainingData base.FixedDataGrid) {
method Predict (line 101) | func (p *AveragePerceptron) Predict(what base.FixedDataGrid) base.Fixe...
type instance (line 22) | type instance struct
type instances (line 27) | type instances
function processData (line 142) | func processData(x base.FixedDataGrid) instances {
function NewAveragePerceptron (line 188) | func NewAveragePerceptron(features int, learningRate float64, startingTh...
FILE: perceptron/average_test.go
function TestProcessData (line 11) | func TestProcessData(t *testing.T) {
function TestFit (line 32) | func TestFit(t *testing.T) {
function TestPredict (line 56) | func TestPredict(t *testing.T) {
function TestCreateAveragePerceptron (line 92) | func TestCreateAveragePerceptron(t *testing.T) {
function BenchmarkFit (line 102) | func BenchmarkFit(b *testing.B) {
FILE: trees/cart_classifier.go
constant GINI (line 15) | GINI string = "gini"
constant ENTROPY (line 16) | ENTROPY string = "entropy"
type classifierNode (line 21) | type classifierNode struct
type CARTDecisionTreeClassifier (line 34) | type CARTDecisionTreeClassifier struct
method Fit (line 172) | func (tree *CARTDecisionTreeClassifier) Fit(X base.FixedDataGrid) error {
method String (line 319) | func (tree *CARTDecisionTreeClassifier) String() string {
method Predict (line 375) | func (tree *CARTDecisionTreeClassifier) Predict(X_test base.FixedDataG...
method Evaluate (line 395) | func (tree *CARTDecisionTreeClassifier) Evaluate(test base.FixedDataGr...
function convertToMap (line 43) | func convertToMap(y []int64, labels []int64) map[int64]int {
function computeGiniImpurityAndModeLabel (line 55) | func computeGiniImpurityAndModeLabel(y []int64, labels []int64) (float64...
function computeEntropyAndModeLabel (line 72) | func computeEntropyAndModeLabel(y []int64, labels []int64) (float64, int...
function calculateClassificationLoss (line 92) | func calculateClassificationLoss(y []int64, labels []int64, criterion st...
function classifierCreateSplit (line 108) | func classifierCreateSplit(data [][]float64, feature int64, y []int64, t...
function NewDecisionTreeClassifier (line 130) | func NewDecisionTreeClassifier(criterion string, maxDepth int64, labels ...
function classifierReOrderData (line 140) | func classifierReOrderData(featureVal []float64, data [][]float64, y []i...
function classifierUpdateSplit (line 158) | func classifierUpdateSplit(left [][]float64, leftY []int64, right [][]fl...
function classifierBestSplit (line 193) | func classifierBestSplit(tree CARTDecisionTreeClassifier, data [][]float...
function classifierPrintTreeFromNode (line 324) | func classifierPrintTreeFromNode(tree classifierNode, spacing string) st...
function classifierPredictSingle (line 358) | func classifierPredictSingle(tree classifierNode, instance []float64) in...
function classifierPredictFromNode (line 383) | func classifierPredictFromNode(tree classifierNode, test [][]float64) []...
function classifierEvaluateFromNode (line 406) | func classifierEvaluateFromNode(tree classifierNode, xTest [][]float64, ...
function classifierConvertInstancesToLabelVec (line 419) | func classifierConvertInstancesToLabelVec(X base.FixedDataGrid) ([]int64...
FILE: trees/cart_regressor.go
constant MAE (line 15) | MAE string = "mae"
constant MSE (line 16) | MSE string = "mse"
type regressorNode (line 22) | type regressorNode struct
type CARTDecisionTreeRegressor (line 35) | type CARTDecisionTreeRegressor struct
method Fit (line 160) | func (tree *CARTDecisionTreeRegressor) Fit(X base.FixedDataGrid) error {
method String (line 289) | func (tree *CARTDecisionTreeRegressor) String() string {
method Predict (line 347) | func (tree *CARTDecisionTreeRegressor) Predict(X_test base.FixedDataGr...
function average (line 43) | func average(y []float64) float64 {
function meanAbsoluteError (line 53) | func meanAbsoluteError(y []float64, yBar float64) float64 {
function computeMaeImpurityAndAverage (line 63) | func computeMaeImpurityAndAverage(y []float64) (float64, float64) {
function meanSquaredError (line 69) | func meanSquaredError(y []float64, yBar float64) float64 {
function computeMseImpurityAndAverage (line 80) | func computeMseImpurityAndAverage(y []float64) (float64, float64) {
function calculateRegressionLoss (line 85) | func calculateRegressionLoss(y []float64, criterion string) (float64, fl...
function regressorCreateSplit (line 98) | func regressorCreateSplit(data [][]float64, feature int64, y []float64, ...
function NewDecisionTreeRegressor (line 119) | func NewDecisionTreeRegressor(criterion string, maxDepth int64) *CARTDec...
function regressorReOrderData (line 128) | func regressorReOrderData(featureVal []float64, data [][]float64, y []fl...
function regressorUpdateSplit (line 146) | func regressorUpdateSplit(left [][]float64, leftY []float64, right [][]f...
function regressorBestSplit (line 180) | func regressorBestSplit(tree CARTDecisionTreeRegressor, data [][]float64...
function regressorPrintTreeFromNode (line 295) | func regressorPrintTreeFromNode(tree regressorNode, spacing string) stri...
function regressorPredictSingle (line 329) | func regressorPredictSingle(tree regressorNode, instance []float64) floa...
function regressorPredictFromNode (line 355) | func regressorPredictFromNode(tree regressorNode, test [][]float64) []fl...
function regressorConvertInstancesToProblemVec (line 365) | func regressorConvertInstancesToProblemVec(X base.FixedDataGrid) [][]flo...
function regressorConvertInstancesToLabelVec (line 390) | func regressorConvertInstancesToLabelVec(X base.FixedDataGrid) ([]float6...
FILE: trees/cart_test.go
function TestRegressor (line 9) | func TestRegressor(t *testing.T) {
FILE: trees/cart_utils.go
function findUnique (line 8) | func findUnique(data []float64) []float64 {
function getFeature (line 21) | func getFeature(data [][]float64, feature int64) []float64 {
function validate (line 31) | func validate(triedSplits [][]float64, feature int64, threshold float64)...
function convertInstancesToProblemVec (line 43) | func convertInstancesToProblemVec(X base.FixedDataGrid) [][]float64 {
FILE: trees/entropy.go
type InformationGainRuleGenerator (line 15) | type InformationGainRuleGenerator struct
method GenerateSplitRule (line 23) | func (r *InformationGainRuleGenerator) GenerateSplitRule(f base.FixedD...
method GetSplitRuleFromSelection (line 36) | func (r *InformationGainRuleGenerator) GetSplitRuleFromSelection(consi...
type numericSplitRef (line 84) | type numericSplitRef struct
type splitVec (line 89) | type splitVec
method Len (line 91) | func (a splitVec) Len() int { return len(a) }
method Swap (line 92) | func (a splitVec) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
method Less (line 93) | func (a splitVec) Less(i, j int) bool { return a[i].val < a[j].val }
function getNumericAttributeEntropy (line 95) | func getNumericAttributeEntropy(f base.FixedDataGrid, attr *base.FloatAt...
function getSplitEntropyFast (line 165) | func getSplitEntropyFast(s [2][]int) float64 {
function getSplitEntropy (line 190) | func getSplitEntropy(s map[string]map[string]int) float64 {
function getBaseEntropy (line 213) | func getBaseEntropy(s map[string]int) float64 {
FILE: trees/gini.go
type GiniCoefficientRuleGenerator (line 14) | type GiniCoefficientRuleGenerator struct
method GenerateSplitRule (line 22) | func (g *GiniCoefficientRuleGenerator) GenerateSplitRule(f base.FixedD...
method GetSplitRuleFromSelection (line 35) | func (g *GiniCoefficientRuleGenerator) GetSplitRuleFromSelection(consi...
function computeGini (line 72) | func computeGini(s map[string]int) float64 {
function computeAverageGiniIndex (line 92) | func computeAverageGiniIndex(s map[string]map[string]int) float64 {
FILE: trees/gr.go
type InformationGainRatioRuleGenerator (line 14) | type InformationGainRatioRuleGenerator struct
method GenerateSplitRule (line 22) | func (r *InformationGainRatioRuleGenerator) GenerateSplitRule(f base.F...
method GetSplitRuleFromSelection (line 35) | func (r *InformationGainRatioRuleGenerator) GetSplitRuleFromSelection(...
FILE: trees/id3.go
type NodeType (line 14) | type NodeType
constant LeafNode (line 18) | LeafNode NodeType = 1
constant RuleNode (line 20) | RuleNode NodeType = 2
type RuleGenerator (line 25) | type RuleGenerator interface
type DecisionTreeRule (line 30) | type DecisionTreeRule struct
method MarshalJSON (line 35) | func (d *DecisionTreeRule) MarshalJSON() ([]byte, error) {
method unmarshalJSON (line 56) | func (d *DecisionTreeRule) unmarshalJSON(data []byte) error {
method UnmarshalJSON (line 86) | func (d *DecisionTreeRule) UnmarshalJSON(data []byte) error {
method String (line 92) | func (d *DecisionTreeRule) String() string {
type DecisionTreeNode (line 105) | type DecisionTreeNode struct
method Save (line 120) | func (d *DecisionTreeNode) Save(filePath string) error {
method SaveWithPrefix (line 137) | func (d *DecisionTreeNode) SaveWithPrefix(writer *base.ClassifierSeria...
method Load (line 156) | func (d *DecisionTreeNode) Load(filePath string) error {
method LoadWithPrefix (line 167) | func (d *DecisionTreeNode) LoadWithPrefix(reader *base.ClassifierDeser...
method getNestedString (line 275) | func (d *DecisionTreeNode) getNestedString(level int) string {
method String (line 305) | func (d *DecisionTreeNode) String() string {
method Prune (line 316) | func (d *DecisionTreeNode) Prune(using base.FixedDataGrid) {
method Predict (line 356) | func (d *DecisionTreeNode) Predict(what base.FixedDataGrid) (base.Fixe...
function getClassAttr (line 114) | func getClassAttr(from base.FixedDataGrid) base.Attribute {
function InferID3Tree (line 191) | func InferID3Tree(from base.FixedDataGrid, with RuleGenerator) *Decision...
function computeAccuracy (line 310) | func computeAccuracy(predictions base.FixedDataGrid, from base.FixedData...
type ClassProba (line 414) | type ClassProba struct
type ClassesProba (line 419) | type ClassesProba
method Len (line 421) | func (o ClassesProba) Len() int {
method Swap (line 424) | func (o ClassesProba) Swap(i, j int) {
method Less (line 427) | func (o ClassesProba) Less(i, j int) bool {
type ID3DecisionTree (line 507) | type ID3DecisionTree struct
method PredictProba (line 432) | func (t *ID3DecisionTree) PredictProba(what base.FixedDataGrid) (Class...
method Fit (line 538) | func (t *ID3DecisionTree) Fit(on base.FixedDataGrid) error {
method Predict (line 550) | func (t *ID3DecisionTree) Predict(what base.FixedDataGrid) (base.Fixed...
method String (line 555) | func (t *ID3DecisionTree) String() string {
method GetMetadata (line 559) | func (t *ID3DecisionTree) GetMetadata() base.ClassifierMetadataV1 {
method Save (line 568) | func (t *ID3DecisionTree) Save(filePath string) error {
method SaveWithPrefix (line 577) | func (t *ID3DecisionTree) SaveWithPrefix(writer *base.ClassifierSerial...
method Load (line 581) | func (t *ID3DecisionTree) Load(filePath string) error {
method LoadWithPrefix (line 589) | func (t *ID3DecisionTree) LoadWithPrefix(reader *base.ClassifierDeseri...
function NewID3DecisionTree (line 517) | func NewID3DecisionTree(prune float64) *ID3DecisionTree {
function NewID3DecisionTreeFromRule (line 528) | func NewID3DecisionTreeFromRule(prune float64, rule RuleGenerator) *ID3D...
FILE: trees/id3_test.go
function TestId3 (line 11) | func TestId3(t *testing.T) {
FILE: trees/isolation.go
type IsolationForest (line 10) | type IsolationForest struct
method Fit (line 138) | func (iForest *IsolationForest) Fit(X base.FixedDataGrid) {
method Predict (line 208) | func (iForest *IsolationForest) Predict(X base.FixedDataGrid) []float64 {
function selectFeature (line 18) | func selectFeature(data [][]float64) int64 {
function minMax (line 23) | func minMax(feature int64, data [][]float64) (float64, float64) {
function selectValue (line 42) | func selectValue(min, max float64) float64 {
function splitData (line 53) | func splitData(val float64, feature int64, data [][]float64) ([][]float6...
function checkData (line 66) | func checkData(data [][]float64) bool {
function buildTree (line 78) | func buildTree(data [][]float64, upperNode regressorNode, depth int, max...
function getRandomData (line 120) | func getRandomData(data [][]float64, subSpace int) [][]float64 {
function NewIsolationForest (line 129) | func NewIsolationForest(nTrees int, maxDepth int, subSpace int) Isolatio...
function pathLength (line 156) | func pathLength(tree regressorNode, instance []float64, path float64) fl...
function evaluateInstance (line 182) | func evaluateInstance(instance []float64, forest []regressorNode) []floa...
function cFactor (line 191) | func cFactor(n int) float64 {
function anomalyScore (line 196) | func anomalyScore(instance []float64, forest []regressorNode, n int) flo...
function preprocessData (line 220) | func preprocessData(X base.FixedDataGrid) [][]float64 {
FILE: trees/isolation_test.go
function TestIsolation (line 9) | func TestIsolation(t *testing.T) {
FILE: trees/random.go
type RandomTreeRuleGenerator (line 10) | type RandomTreeRuleGenerator struct
method GenerateSplitRule (line 17) | func (r *RandomTreeRuleGenerator) GenerateSplitRule(f base.FixedDataGr...
type RandomTree (line 51) | type RandomTree struct
method Fit (line 71) | func (rt *RandomTree) Fit(from base.FixedDataGrid) error {
method Predict (line 77) | func (rt *RandomTree) Predict(from base.FixedDataGrid) (base.FixedData...
method String (line 82) | func (rt *RandomTree) String() string {
method Prune (line 88) | func (rt *RandomTree) Prune(with base.FixedDataGrid) {
method Save (line 93) | func (rt *RandomTree) Save(filePath string) error {
method SaveWithPrefix (line 105) | func (rt *RandomTree) SaveWithPrefix(writer *base.ClassifierSerializer...
method Load (line 110) | func (rt *RandomTree) Load(filePath string) error {
method LoadWithPrefix (line 119) | func (rt *RandomTree) LoadWithPrefix(reader *base.ClassifierDeserializ...
method GetMetadata (line 125) | func (rt *RandomTree) GetMetadata() base.ClassifierMetadataV1 {
function NewRandomTree (line 59) | func NewRandomTree(attrs int) *RandomTree {
FILE: trees/sorter.go
type Slice (line 7) | type Slice struct
method Swap (line 12) | func (s Slice) Swap(i, j int) {
function NewSlice (line 17) | func NewSlice(n []float64) *Slice {
FILE: trees/tree_bench_test.go
function BenchmarkRandomForestFit (line 9) | func BenchmarkRandomForestFit(b *testing.B) {
FILE: trees/tree_test.go
function testCanSaveLoadPredictions (line 14) | func testCanSaveLoadPredictions(trainData, testData base.FixedDataGrid) {
function verifyTreeClassification (line 48) | func verifyTreeClassification(trainData, testData base.FixedDataGrid) {
function TestRandomTreeClassificationAfterDiscretisation (line 176) | func TestRandomTreeClassificationAfterDiscretisation(t *testing.T) {
function TestRandomTreeClassificationWithoutDiscretisation (line 194) | func TestRandomTreeClassificationWithoutDiscretisation(t *testing.T) {
function TestPRIVATEgetSplitEntropy (line 205) | func TestPRIVATEgetSplitEntropy(t *testing.T) {
function TestID3Inference (line 221) | func TestID3Inference(t *testing.T) {
function TestPRIVATEgetNumericAttributeEntropy (line 243) | func TestPRIVATEgetNumericAttributeEntropy(t *testing.T) {
function itBuildsTheCorrectDecisionTree (line 258) | func itBuildsTheCorrectDecisionTree(root *DecisionTreeNode) {
FILE: utilities/utilities.go
type sortedIntMap (line 10) | type sortedIntMap struct
method Len (line 15) | func (sm *sortedIntMap) Len() int {
method Less (line 19) | func (sm *sortedIntMap) Less(i, j int) bool {
method Swap (line 23) | func (sm *sortedIntMap) Swap(i, j int) {
function SortIntMap (line 27) | func SortIntMap(m map[int]float64) []int {
function FloatsToMatrix (line 40) | func FloatsToMatrix(floats []float64) *mat.Dense {
function VectorToMatrix (line 44) | func VectorToMatrix(vector mat.Vector) *mat.Dense {
Copy disabled (too large)
Download .json
Condensed preview — 246 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (11,483K chars).
[
{
"path": ".gitignore",
"chars": 128,
"preview": "# Mac\n.DS_Store\n\n# Intellij\n.idea/\n*.iml\n*.iws\n\n# C/C++\n*.o\n*.a\n*.so\n*.dll\n\n# go test coverprofiles\n*.coverprofile\n\n#vim"
},
{
"path": ".travis.yml",
"chars": 681,
"preview": "language: go\ngo:\n - 1.13.x\n - 1.14.x\nenv:\n # Temporary workaround for Go 1.6+\n - GODEBUG=cgocheck=0\nbefore_install:\n - s"
},
{
"path": "Dockerfile",
"chars": 315,
"preview": "FROM\t\talpine\nRUN apk update && apk add make gcc linux-headers git perl musl-dev go\nRUN\t\tgit clone https://gi"
},
{
"path": "LICENSE.md",
"chars": 1087,
"preview": "The MIT License (MIT)\n\nCopyright (c) {{{year}}} {{{fullname}}}\n\nPermission is hereby granted, free of charge, to any per"
},
{
"path": "README.md",
"chars": 3319,
"preview": "GoLearn\n=======\n\n<img src=\"http://talks.golang.org/2013/advconc/gopherhat.jpg\" width=125><br>\n[\n\n// SerializeInstanc"
},
{
"path": "base/arff_test.go",
"chars": 4076,
"preview": "package base\n\nimport (\n\t. \"github.com/smartystreets/goconvey/convey\"\n\t\"io/ioutil\"\n\t\"testing\"\n)\n\nfunc TestParseARFFGetRow"
},
{
"path": "base/attributes.go",
"chars": 1833,
"preview": "package base\n\nimport (\n\t\"encoding/json\"\n)\n\nconst (\n\t// CategoricalType is for Attributes which represent values distinct"
},
{
"path": "base/attributes_test.go",
"chars": 1898,
"preview": "package base\n\nimport (\n\t. \"github.com/smartystreets/goconvey/convey\"\n\t\"testing\"\n)\n\nfunc TestFloatAttributeSysVal(t *test"
},
{
"path": "base/bag.go",
"chars": 2331,
"preview": "package base\n\nimport (\n\t\"bytes\"\n\t\"fmt\"\n)\n\n// BinaryAttributeGroups contain only BinaryAttributes\n// Compact each Attribu"
},
{
"path": "base/bag_test.go",
"chars": 3033,
"preview": "package base\n\nimport (\n\t\"fmt\"\n\t. \"github.com/smartystreets/goconvey/convey\"\n\t\"math/rand\"\n\t\"testing\"\n)\n\nfunc TestBAGSimpl"
},
{
"path": "base/binary.go",
"chars": 2096,
"preview": "package base\n\nimport (\n\t\"encoding/json\"\n\t\"fmt\"\n\t\"strconv\"\n)\n\n// BinaryAttributes can only represent 1 or 0.\ntype BinaryA"
},
{
"path": "base/categorical.go",
"chars": 5312,
"preview": "package base\n\nimport (\n\t\"encoding/json\"\n\t\"fmt\"\n)\n\n// CategoricalAttribute is an Attribute implementation\n// which stores"
},
{
"path": "base/classifier.go",
"chars": 1204,
"preview": "package base\n\nimport (\n\t\"gonum.org/v1/gonum/mat\"\n)\n\n// Classifier implementations predict categorical class labels.\ntype"
},
{
"path": "base/conversion.go",
"chars": 1625,
"preview": "package base\n\nimport (\n\t\"fmt\"\n\n\t\"gonum.org/v1/gonum/mat\"\n)\n\nfunc checkAllAttributesAreFloat(attrs []Attribute) error {\n\t"
},
{
"path": "base/csv.go",
"chars": 8838,
"preview": "package base\n\nimport (\n\t\"bufio\"\n\t\"encoding/csv\"\n\t\"fmt\"\n\t\"io\"\n\t\"regexp\"\n\t\"runtime\"\n\t\"strings\"\n)\n\n// ParseCSVGetRowsFromRe"
},
{
"path": "base/csv_test.go",
"chars": 4366,
"preview": "package base\n\nimport (\n\t\"testing\"\n\n\t. \"github.com/smartystreets/goconvey/convey\"\n)\n\nfunc TestParseCSVGetRows(t *testing."
},
{
"path": "base/data.go",
"chars": 1776,
"preview": "package base\n\n// SortDirection specifies sorting direction...\ntype SortDirection int\n\nconst (\n\t// Descending says that I"
},
{
"path": "base/dataframe_go.go",
"chars": 1587,
"preview": "package base\n\nimport (\n\t\"fmt\"\n\t\"reflect\"\n\t\"strconv\"\n\n\t\"github.com/rocketlaunchr/dataframe-go\"\n)\n\n// ConvertDataFrameToIn"
},
{
"path": "base/dense.go",
"chars": 13787,
"preview": "package base\n\nimport (\n\t\"bytes\"\n\t\"fmt\"\n\t\"os\"\n\t\"sync\"\n)\n\n// DenseInstances stores each Attribute value explicitly\n// in a"
},
{
"path": "base/dense_test.go",
"chars": 987,
"preview": "package base\n\nimport (\n\t. \"github.com/smartystreets/goconvey/convey\"\n\t\"testing\"\n)\n\nfunc TestHighDimensionalInstancesLoad"
},
{
"path": "base/domain.go",
"chars": 993,
"preview": "// Package base provides base interfaces for GoLearn objects to implement.\n// It also provides a raw base for those obje"
},
{
"path": "base/error.go",
"chars": 2045,
"preview": "package base\n\nimport (\n\t\"fmt\"\n\t\"os\"\n\t\"runtime/debug\"\n\t\"strings\"\n)\n\ntype GoLearnError struct {\n\tWrappedError error\n\tCurre"
},
{
"path": "base/error_test.go",
"chars": 473,
"preview": "package base\n\nimport (\n\t. \"github.com/smartystreets/goconvey/convey\"\n\t\"testing\"\n)\n\nfunc TestId3(t *testing.T) {\n\tConvey("
},
{
"path": "base/filewrapper.go",
"chars": 3224,
"preview": "package base\n\nimport (\n\t\"os\"\n)\n\n// ParseCSVGetRows returns the number of rows in a given file.\nfunc ParseCSVGetRows(file"
},
{
"path": "base/filtered.go",
"chars": 7221,
"preview": "package base\n\nimport (\n\t\"bytes\"\n\t\"fmt\"\n)\n\n// Maybe included a TransformedAttribute struct\n// so we can map from ClassAtt"
},
{
"path": "base/filters.go",
"chars": 647,
"preview": "package base\n\n// FilteredAttributes represent a mapping from the output\n// generated by a filter to the original value.\n"
},
{
"path": "base/fixed.go",
"chars": 2508,
"preview": "package base\n\nimport (\n\t\"bytes\"\n\t\"fmt\"\n)\n\n// FixedAttributeGroups contain a particular number of rows of\n// a particular"
},
{
"path": "base/float.go",
"chars": 3650,
"preview": "package base\n\nimport (\n\t\"encoding/json\"\n\t\"fmt\"\n\t\"strconv\"\n)\n\n// FloatAttribute is an implementation which stores floatin"
},
{
"path": "base/group.go",
"chars": 810,
"preview": "package base\n\nimport (\n\t\"bytes\"\n)\n\n// AttributeGroups store related sequences of system values\n// in memory for the Dens"
},
{
"path": "base/lazy_sort_test.go",
"chars": 2276,
"preview": "package base\n\nimport (\n\t. \"github.com/smartystreets/goconvey/convey\"\n\t\"testing\"\n)\n\nfunc TestLazySortDesc(t *testing.T) {"
},
{
"path": "base/logger.go",
"chars": 850,
"preview": "package base\n\nimport (\n\t\"io\"\n\t\"log\"\n\t\"os\"\n)\n\n// Logger is the default logger for the entire golearn package. It writes\n/"
},
{
"path": "base/mat.go",
"chars": 3922,
"preview": "package base\n\nimport (\n\t\"bytes\"\n\t\"fmt\"\n\t\"gonum.org/v1/gonum/mat\"\n)\n\ntype Mat64Instances struct {\n\tattributes []Attribute"
},
{
"path": "base/mat_test.go",
"chars": 2063,
"preview": "package base\n\nimport (\n\t. \"github.com/smartystreets/goconvey/convey\"\n\t\"gonum.org/v1/gonum/mat\"\n\t\"testing\"\n)\n\nfunc TestIn"
},
{
"path": "base/serialize.go",
"chars": 11771,
"preview": "package base\n\nimport (\n\t\"archive/tar\"\n\t\"compress/gzip\"\n\t\"encoding/json\"\n\t\"fmt\"\n\t\"io\"\n\t\"io/ioutil\"\n\t\"log\"\n\t\"os\"\n\t\"reflect"
},
{
"path": "base/serialize_attributes.go",
"chars": 3368,
"preview": "package base\n\nimport (\n\t\"archive/tar\"\n\t\"encoding/json\"\n\t\"fmt\"\n)\n\nfunc writeAttributesToFilePart(attrs []Attribute, f *ta"
},
{
"path": "base/serialize_instances.go",
"chars": 9453,
"preview": "package base\n\nimport (\n\t\"archive/tar\"\n\t\"compress/gzip\"\n\t\"encoding/csv\"\n\t\"fmt\"\n\t\"io\"\n\t\"os\"\n\t\"reflect\"\n\t\"runtime\"\n)\n\nfunc "
},
{
"path": "base/serialize_test.go",
"chars": 3962,
"preview": "package base\n\nimport (\n\t\"archive/tar\"\n\t\"compress/gzip\"\n\t\"fmt\"\n\t. \"github.com/smartystreets/goconvey/convey\"\n\t\"io\"\n\t\"io/i"
},
{
"path": "base/sort.go",
"chars": 3927,
"preview": "package base\n\nimport (\n\t\"bytes\"\n\t\"encoding/binary\"\n)\n\nfunc sortXorOp(b []byte) []byte {\n\tret := make([]byte, len(b))\n\tco"
},
{
"path": "base/sort_test.go",
"chars": 2805,
"preview": "package base\n\nimport (\n\t. \"github.com/smartystreets/goconvey/convey\"\n\t\"testing\"\n)\n\nfunc isSortedAsc(inst FixedDataGrid, "
},
{
"path": "base/spec.go",
"chars": 980,
"preview": "package base\n\nimport (\n\t\"fmt\"\n)\n\n// AttributeSpec is a pointer to a particular Attribute\n// within a particular Instance"
},
{
"path": "base/util.go",
"chars": 1956,
"preview": "package base\n\nimport (\n\t\"math\"\n\t\"unsafe\"\n)\n\n// PackU64ToBytesInline fills ret with the byte values of\n// val. Ret must h"
},
{
"path": "base/util_attributes.go",
"chars": 3738,
"preview": "package base\n\nimport (\n\t\"fmt\"\n\t\"sort\"\n)\n\n// This file contains utility functions relating to Attributes and Attribute sp"
},
{
"path": "base/util_instances.go",
"chars": 14486,
"preview": "package base\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n)\n\n// This file contains utility functions relating to efficiently\n// generat"
},
{
"path": "base/util_test.go",
"chars": 3741,
"preview": "package base\n\nimport (\n\t. \"github.com/smartystreets/goconvey/convey\"\n\t\"testing\"\n)\n\nfunc TestClassDistributionAfterSplit("
},
{
"path": "base/view.go",
"chars": 7817,
"preview": "package base\n\nimport (\n\t\"bytes\"\n\t\"fmt\"\n)\n\n// InstancesViews hide or re-order Attributes and rows from\n// a given DataGri"
},
{
"path": "base/view_test.go",
"chars": 4634,
"preview": "package base\n\nimport (\n\t. \"github.com/smartystreets/goconvey/convey\"\n\t\"testing\"\n)\n\nfunc TestInstancesViewRows(t *testing"
},
{
"path": "clustering/cluster_extra_test.go",
"chars": 2216,
"preview": "package clustering\n\nimport (\n\t. \"github.com/smartystreets/goconvey/convey\"\n\t\"testing\"\n)\n\nfunc Test(t *testing.T) {\n\tConv"
},
{
"path": "clustering/cluster_test.go",
"chars": 1716,
"preview": "package clustering\n\nimport (\n\t. \"github.com/smartystreets/goconvey/convey\"\n\t\"testing\"\n)\n\nfunc TestClusterEquality(t *tes"
},
{
"path": "clustering/clustering.go",
"chars": 3038,
"preview": "/* This package implements clustering algorithms */\npackage clustering\n\nimport (\n\t\"fmt\"\n\n\t\"github.com/sjwhitworth/golear"
},
{
"path": "clustering/dbscan.csv",
"chars": 22551,
"preview": "0.494260967249,1.45106696541\n-1.42808099324,-0.83706376669\n0.338559182384,1.03875870939\n0.119001013781,-1.05397553336\n1."
},
{
"path": "clustering/dbscan.go",
"chars": 4823,
"preview": "package clustering\n\nimport (\n\t\"github.com/sjwhitworth/golearn/base\"\n\t\"github.com/sjwhitworth/golearn/metrics/pairwise\"\n\t"
},
{
"path": "clustering/dbscan_labels.csv",
"chars": 1518,
"preview": "2\n0\n2\n1\n2\n0\n0\n1\n2\n2\n0\n0\n0\n1\n0\n2\n-1\n0\n0\n1\n1\n1\n1\n1\n0\n0\n1\n2\n2\n1\n2\n0\n0\n2\n0\n2\n1\n2\n2\n1\n1\n0\n0\n0\n0\n0\n2\n1\n2\n0\n1\n1\n0\n0\n1\n1\n0\n2\n1\n0"
},
{
"path": "clustering/dbscan_test.go",
"chars": 3401,
"preview": "package clustering\n\nimport (\n\t\"bufio\"\n\t\"github.com/sjwhitworth/golearn/base\"\n\t\"github.com/sjwhitworth/golearn/metrics/pa"
},
{
"path": "clustering/em.go",
"chars": 8115,
"preview": "package clustering\n\nimport (\n\t\"errors\"\n\t\"github.com/sjwhitworth/golearn/base\"\n\t\"gonum.org/v1/gonum/mat\"\n\t\"gonum.org/v1/g"
},
{
"path": "clustering/em_test.go",
"chars": 2686,
"preview": "package clustering\n\nimport (\n\t\"github.com/sjwhitworth/golearn/base\"\n\t. \"github.com/smartystreets/goconvey/convey\"\n\t\"test"
},
{
"path": "clustering/gaussian_mixture.csv",
"chars": 29681,
"preview": "0.680267254224,-0.0163423512499\n3.80951843864,0.79848348127\n-1.66137239579,-0.577666950966\n-0.257307254106,-0.1555517250"
},
{
"path": "clustering/gaussian_mixture.py",
"chars": 734,
"preview": "import numpy as np\nimport itertools\n\nfrom sklearn import mixture\nfrom sklearn import preprocessing\n\n# Number of samples "
},
{
"path": "clustering/gaussian_mixture_labels.csv",
"chars": 2000,
"preview": "0\n0\n1\n0\n0\n0\n0\n0\n1\n1\n0\n1\n1\n0\n0\n0\n1\n0\n0\n1\n1\n0\n1\n0\n0\n0\n1\n0\n0\n1\n1\n1\n1\n0\n0\n0\n1\n1\n1\n0\n0\n1\n0\n1\n0\n0\n0\n0\n0\n0\n1\n0\n0\n1\n0\n0\n0\n0\n0\n0\n"
},
{
"path": "clustering/gaussian_mixture_single_obs.csv",
"chars": 32,
"preview": "0.680267254224,-0.0163423512499\n"
},
{
"path": "clustering/gen_test.py",
"chars": 1013,
"preview": "#\n# Generate sample data for the DBSCAN test \n# \n# Lifted from http://scikit-learn.org/stable/auto_examples/cluster/plot"
},
{
"path": "clustering/synthetic.csv",
"chars": 20,
"preview": "0,4\n1,4\n2,3\n2,4\n3,1\n"
},
{
"path": "coverage.sh",
"chars": 456,
"preview": "#!/bin/sh\n\nset -e\n\nworkdir=.cover\nprofile=${workdir}/cover.out\nmode=count\n\ngenerate_cover_data() {\n rm -rf ${workdir}"
},
{
"path": "doc/zh_CN/AddingAttributes.md",
"chars": 0,
"preview": ""
},
{
"path": "doc/zh_CN/AttributeSpecifications.md",
"chars": 0,
"preview": ""
},
{
"path": "doc/zh_CN/CSVFiles.md",
"chars": 0,
"preview": ""
},
{
"path": "doc/zh_CN/Classification/KNN.md",
"chars": 0,
"preview": ""
},
{
"path": "doc/zh_CN/Classification/Regression.md",
"chars": 0,
"preview": ""
},
{
"path": "doc/zh_CN/Classification/Trees.md",
"chars": 0,
"preview": ""
},
{
"path": "doc/zh_CN/Classification/liblinear.md",
"chars": 0,
"preview": ""
},
{
"path": "doc/zh_CN/Contributing.md",
"chars": 0,
"preview": ""
},
{
"path": "doc/zh_CN/CustomDataGrids.md",
"chars": 0,
"preview": ""
},
{
"path": "doc/zh_CN/Filtering.md",
"chars": 0,
"preview": ""
},
{
"path": "doc/zh_CN/FloatAttributePrecision.md",
"chars": 0,
"preview": ""
},
{
"path": "doc/zh_CN/Home.md",
"chars": 1031,
"preview": "GoLearn\n=======\n\n<img src=\"http://talks.golang.org/2013/advconc/gopherhat.jpg\" width=125><br>\n\n欢迎阅读 GoLearn 中文文档. GoLear"
},
{
"path": "doc/zh_CN/Installation.md",
"chars": 0,
"preview": ""
},
{
"path": "doc/zh_CN/Instances.md",
"chars": 0,
"preview": ""
},
{
"path": "doc/zh_TW/AddingAttributes.md",
"chars": 0,
"preview": ""
},
{
"path": "doc/zh_TW/AttributeSpecifications.md",
"chars": 0,
"preview": ""
},
{
"path": "doc/zh_TW/CSVFiles.md",
"chars": 0,
"preview": ""
},
{
"path": "doc/zh_TW/Classification/KNN.md",
"chars": 557,
"preview": "**參照:\n[`examples/knnclassifier/knnclassifier_iris.go`](https://github.com/sjwhitworth/golearn/blob/master/examples/knncl"
},
{
"path": "doc/zh_TW/Classification/Regression.md",
"chars": 0,
"preview": ""
},
{
"path": "doc/zh_TW/Classification/Trees.md",
"chars": 0,
"preview": ""
},
{
"path": "doc/zh_TW/Classification/liblinear.md",
"chars": 0,
"preview": ""
},
{
"path": "doc/zh_TW/Contributing.md",
"chars": 0,
"preview": ""
},
{
"path": "doc/zh_TW/CustomDataGrids.md",
"chars": 0,
"preview": ""
},
{
"path": "doc/zh_TW/Filtering.md",
"chars": 0,
"preview": ""
},
{
"path": "doc/zh_TW/FloatAttributePrecision.md",
"chars": 0,
"preview": ""
},
{
"path": "doc/zh_TW/Home.md",
"chars": 1202,
"preview": "GoLearn\n=======\n\n<img src=\"http://talks.golang.org/2013/advconc/gopherhat.jpg\" width=125><br>\n[。\n\n## 安裝\n\n### 系統依賴\n* 你需要先"
},
{
"path": "doc/zh_TW/Instances.md",
"chars": 0,
"preview": ""
},
{
"path": "ensemble/ensemble.go",
"chars": 268,
"preview": "//\n//\n//\tEnsemble contains classifiers which combine other classifiers.\n//\n//\tRandomForest:\n//\t\tGenerates ForestSize bag"
},
{
"path": "ensemble/multisvc.go",
"chars": 4904,
"preview": "package ensemble\n\nimport (\n\t\"github.com/sjwhitworth/golearn/base\"\n\t\"github.com/sjwhitworth/golearn/linear_models\"\n\t\"gith"
},
{
"path": "ensemble/multisvc_test.go",
"chars": 2872,
"preview": "package ensemble\n\nimport (\n\t\"github.com/sjwhitworth/golearn/base\"\n\t\"github.com/sjwhitworth/golearn/evaluation\"\n\t. \"githu"
},
{
"path": "ensemble/randomforest.go",
"chars": 2791,
"preview": "package ensemble\n\nimport (\n\t\"errors\"\n\t\"fmt\"\n\n\t\"github.com/sjwhitworth/golearn/base\"\n\t\"github.com/sjwhitworth/golearn/met"
},
{
"path": "ensemble/randomforest_test.go",
"chars": 3182,
"preview": "package ensemble\n\nimport (\n\t\"testing\"\n\n\t\"io/ioutil\"\n\t\"os\"\n\n\t\"github.com/sjwhitworth/golearn/base\"\n\t\"github.com/sjwhitwor"
},
{
"path": "evaluation/confusion.go",
"chars": 6642,
"preview": "package evaluation\n\nimport (\n\t\"bytes\"\n\t\"errors\"\n\t\"fmt\"\n\t\"text/tabwriter\"\n\n\t\"github.com/sjwhitworth/golearn/base\"\n)\n\n// C"
},
{
"path": "evaluation/confusion_test.go",
"chars": 3540,
"preview": "package evaluation\n\nimport (\n\t\"github.com/sjwhitworth/golearn/base\"\n\t. \"github.com/smartystreets/goconvey/convey\"\n\t\"test"
},
{
"path": "evaluation/cross_fold.go",
"chars": 2122,
"preview": "package evaluation\n\nimport (\n\t\"github.com/sjwhitworth/golearn/base\"\n\t\"math/rand\"\n)\n\n// GetCrossValidatedMetric returns t"
},
{
"path": "evaluation/cross_fold_test.go",
"chars": 765,
"preview": "package evaluation\n\nimport (\n\t\"github.com/sjwhitworth/golearn/base\"\n\t\"github.com/sjwhitworth/golearn/knn\"\n\t. \"github.com"
},
{
"path": "examples/averageperceptron/averageperceptionexample.go",
"chars": 828,
"preview": "package main\n\nimport (\n\t\"fmt\"\n\tbase \"github.com/sjwhitworth/golearn/base\"\n\tevaluation \"github.com/sjwhitworth/golearn/ev"
},
{
"path": "examples/crossfold/rf.go",
"chars": 925,
"preview": "// Demonstrates decision tree classification\n\npackage main\n\nimport (\n\t\"fmt\"\n\t\"github.com/sjwhitworth/golearn/base\"\n\t\"git"
},
{
"path": "examples/datasets/boston_house_prices.csv",
"chars": 14502,
"preview": "7,208500\r\n6,181500\r\n7,223500\r\n7,140000\r\n8,250000\r\n5,143000\r\n8,307000\r\n7,200000\r\n7,129900\r\n5,118000\r\n5,129500\r\n9,345000\r\n"
},
{
"path": "examples/datasets/c45-numeric.csv",
"chars": 165,
"preview": "Attribute1,Attribute2,Attribute3,Class\nA,70,T,A\nA,90,T,B\nA,85,F,B\nA,95,F,B\nA,70,F,A\nB,90,T,A\nB,78,F,A\nB,65,T,A\nB,75,F,A\n"
},
{
"path": "examples/datasets/chim.csv",
"chars": 455,
"preview": "A,class\n1.3,c1\n1.3,c3\n1.3,c3\n1.3,c3\n1.3,c3\n1.4,c2\n1.8,c1\n1.8,c2\n1.8,c3\n2.4,c1\n2.4,c1\n2.4,c1\n2.4,c1\n2.4,c1\n2.4,c1\n2.4,c3\n"
},
{
"path": "examples/datasets/exam.csv",
"chars": 37,
"preview": "EXAM1,EXAM2,EXAM3,FINAL\n73,80,75,152\n"
},
{
"path": "examples/datasets/exams.csv",
"chars": 350,
"preview": "EXAM1,EXAM2,EXAM3,FINAL\n73,80,75,152\n93,88,93,185\n89,91,90,180\n96,98,100,196\n73,66,70,142\n53,46,55,101\n69,74,77,149\n47,5"
},
{
"path": "examples/datasets/gaussian_outliers.csv",
"chars": 39353,
"preview": "F1,F2\n0.023702107253236473,-0.7203357749463722\n-1.056344272753271,-1.5563880111353698\n1.7366544678265408,-1.022266759478"
},
{
"path": "examples/datasets/house-votes-84.csv",
"chars": 21771,
"preview": "v16,v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,party\n1,-1,1,-1,1,1,1,-1,-1,-1,1,-1,1,1,1,-1,republican\n-1,-1,1,-"
},
{
"path": "examples/datasets/iris.arff",
"chars": 7486,
"preview": "% 1. Title: Iris Plants Database\n% \n% 2. Sources:\n% (a) Creator: R.A. Fisher\n% (b) Donor: Michael Marshall (MA"
},
{
"path": "examples/datasets/iris.csv",
"chars": 4550,
"preview": "5.1,3.5,1.4,0.2,Iris-setosa\n4.9,3.0,1.4,0.2,Iris-setosa\n4.7,3.2,1.3,0.2,Iris-setosa\n4.6,3.1,1.5,0.2,Iris-setosa\n5.0,3.6,"
},
{
"path": "examples/datasets/iris_binned.csv",
"chars": 4615,
"preview": "Sepal length,Sepal width,Petal length, Petal width,Species\n5.02,3.5,1.4,0.2,Iris-setosa\n4.66,3,1.4,0.2,Iris-setosa\n4.66,"
},
{
"path": "examples/datasets/iris_headers.csv",
"chars": 4611,
"preview": "Sepal length, Sepal width,Petal length, Petal width, Species\n5.1,3.5,1.4,0.2,Iris-setosa\n4.9,3.0,1.4,0.2,Iris-setosa\n4.7"
},
{
"path": "examples/datasets/iris_headers_subset.csv",
"chars": 3890,
"preview": " Sepal width,Petal length, Petal width, Species\n3.5,1.4,0.2,Iris-setosa\n3,1.4,0.2,Iris-setosa\n3.2,1.3,0.2,Iris-setosa\n3."
},
{
"path": "examples/datasets/iris_sorted_asc.csv",
"chars": 4611,
"preview": "Sepal length, Sepal width,Petal length, Petal width, Species\n4.3,3.0,1.1,0.1,Iris-setosa\n4.4,2.9,1.4,0.2,Iris-setosa\n4.4"
},
{
"path": "examples/datasets/iris_sorted_desc.csv",
"chars": 4611,
"preview": "Sepal length, Sepal width,Petal length, Petal width, Species\n7.9,3.8,6.4,2.0,Iris-virginica\n7.7,3.8,6.7,2.2,Iris-virgini"
},
{
"path": "examples/datasets/mnist_test.csv",
"chars": 919002,
"preview": "label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,pixel10,pixel11,pixel12,pixel13,pixel14,pixe"
},
{
"path": "examples/datasets/mnist_train.csv",
"chars": 9145009,
"preview": "label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,pixel10,pixel11,pixel12,pixel13,pixel14,pixe"
},
{
"path": "examples/datasets/randomdata.csv",
"chars": 16319,
"preview": "var1,var2,label\n3.82,2.95,36.94\n5.13,13.09,47.70\n3.99,18.22,36.66\n6.87,10.31,29.10\n6.44,13.14,9.95\n6.68,7.01,10.75\n6.81,"
},
{
"path": "examples/datasets/sources.txt",
"chars": 259,
"preview": "c45-numeric.csv: www.mgt.ncu.edu.tw/~wabble/School/C45.ppt\ntennis.csv: \"Machine Learning\", Tom Mitchell, McGraw-Hill, 19"
},
{
"path": "examples/datasets/tennis.csv",
"chars": 408,
"preview": "outlook,temp,humidity,windy,play\nsunny,hot,high,false,no\nsunny,hot,high,true,no\novercast,hot,high,false,yes\nrainy,mild,h"
},
{
"path": "examples/datasets/titanic.csv",
"chars": 7999,
"preview": "3,1,2,0\r\n1,0,0,1\r\n3,0,2,1\r\n1,0,2,1\r\n3,1,2,0\r\n3,1,1,0\r\n1,1,2,0\r\n3,1,2,0\r\n3,0,2,1\r\n2,0,0,1\r\n3,0,2,1\r\n1,0,2,1\r\n3,1,2,0\r\n3,1"
},
{
"path": "examples/datasets/weather.arff",
"chars": 489,
"preview": "@relation weather\n\n@attribute outlook {sunny, overcast, rainy}\n@attribute temperature real\n@attribute humidity real\n@att"
},
{
"path": "examples/instances/instances.go",
"chars": 2710,
"preview": "package main\n\n// This example program demonstrates Instances\n\nimport (\n\t\"fmt\"\n\t\"github.com/sjwhitworth/golearn/base\"\n)\n\n"
},
{
"path": "examples/knnclassifier/knnclassifier_iris.go",
"chars": 923,
"preview": "package main\n\nimport (\n\t\"fmt\"\n\n\t\"github.com/sjwhitworth/golearn/base\"\n\t\"github.com/sjwhitworth/golearn/evaluation\"\n\t\"git"
},
{
"path": "examples/serialization/attributes.go",
"chars": 615,
"preview": "// Demonstrates decision tree classification\n\npackage main\n\nimport (\n\t\"encoding/json\"\n\t\"fmt\"\n\t\"github.com/sjwhitworth/go"
},
{
"path": "examples/trees/cart/cart.go",
"chars": 2740,
"preview": "// Example of how to use CART trees for both Classification and Regression\n\npackage main\n\nimport (\n\t\"fmt\"\n\n\t\"github.com/"
},
{
"path": "examples/trees/id3/trees.go",
"chars": 3528,
"preview": "// Demonstrates decision tree classification\n\npackage main\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\n\t\"github.com/sjwhitworth/golea"
},
{
"path": "examples/trees/isolationForest/isolation_forest.go",
"chars": 1857,
"preview": "// Example of how to use Isolation Forest for outlier detection\n\npackage main\n\nimport (\n\t\"fmt\"\n\n\t\"github.com/sjwhitworth"
},
{
"path": "filters/binary.go",
"chars": 5054,
"preview": "package filters\n\nimport (\n\t\"fmt\"\n\t\"github.com/sjwhitworth/golearn/base\"\n)\n\n// BinaryConvertFilters convert a given DataG"
},
{
"path": "filters/binary_test.csv",
"chars": 130,
"preview": "floatAttr,shouldBe1Binary,shouldBe3Binary,arbitraryClass\n1.0,true,stoicism,hi\n1.0,false,heroism,there\n0.0,false,romantic"
},
{
"path": "filters/binary_test.go",
"chars": 4641,
"preview": "package filters\n\nimport (\n\t\"github.com/sjwhitworth/golearn/base\"\n\t. \"github.com/smartystreets/goconvey/convey\"\n\t\"testing"
},
{
"path": "filters/binning.go",
"chars": 3184,
"preview": "package filters\n\nimport (\n\t\"fmt\"\n\t\"math\"\n\n\t\"github.com/sjwhitworth/golearn/base\"\n)\n\n// BinningFilter does equal-width bi"
},
{
"path": "filters/binning_test.go",
"chars": 1418,
"preview": "package filters\n\nimport (\n\t\"github.com/sjwhitworth/golearn/base\"\n\t. \"github.com/smartystreets/goconvey/convey\"\n\t\"testing"
},
{
"path": "filters/chimerge.go",
"chars": 4843,
"preview": "package filters\n\nimport (\n\t\"fmt\"\n\t\"github.com/sjwhitworth/golearn/base\"\n\t\"math\"\n)\n\n// ChiMergeFilter implements supervis"
},
{
"path": "filters/chimerge_freq.go",
"chars": 296,
"preview": "package filters\n\nimport (\n\t\"fmt\"\n)\n\n// FrequencyTableEntry is a struct holding a value and a map of frequency\ntype Frequ"
},
{
"path": "filters/chimerge_funcs.go",
"chars": 4784,
"preview": "package filters\n\nimport (\n\t\"github.com/sjwhitworth/golearn/base\"\n\t\"math\"\n)\n\nfunc ChiMBuildFrequencyTable(attr base.Attri"
},
{
"path": "filters/chimerge_test.go",
"chars": 5196,
"preview": "package filters\n\nimport (\n\t\"fmt\"\n\t\"github.com/sjwhitworth/golearn/base\"\n\t\"testing\"\n\n\t. \"github.com/smartystreets/goconve"
},
{
"path": "filters/disc.go",
"chars": 1643,
"preview": "package filters\n\nimport (\n\t\"fmt\"\n\t\"github.com/sjwhitworth/golearn/base\"\n)\n\ntype AbstractDiscretizeFilter struct {\n\tattrs"
},
{
"path": "filters/float.go",
"chars": 4667,
"preview": "package filters\n\nimport (\n\t\"fmt\"\n\t\"github.com/sjwhitworth/golearn/base\"\n)\n\n// FloatConvertFilters convert a given DataGr"
},
{
"path": "filters/float_test.go",
"chars": 4168,
"preview": "package filters\n\nimport (\n\t\"github.com/sjwhitworth/golearn/base\"\n\t. \"github.com/smartystreets/goconvey/convey\"\n\t\"testing"
},
{
"path": "go.mod",
"chars": 393,
"preview": "module github.com/sjwhitworth/golearn\n\ngo 1.15\n\nrequire (\n\tgithub.com/gonum/blas v0.0.0-20181208220705-f22b278b28ac\n\tgit"
},
{
"path": "go.sum",
"chars": 46025,
"preview": "bazil.org/fuse v0.0.0-20160811212531-371fbbdaa898/go.mod h1:Xbm+BRKSBEpa4q4hTSxohYNQpsxXPbPry4JJWOB3LB8=\ncloud.google.co"
},
{
"path": "golearn.go",
"chars": 72,
"preview": "//Package golearn is a machine learning library for Go.\npackage golearn\n"
},
{
"path": "kdtree/heap.go",
"chars": 1579,
"preview": "package kdtree\n\ntype heapNode struct {\n\tvalue []float64\n\tlength float64\n\tsrcRowNo int\n}\n\ntype heap struct {\n\ttree ["
},
{
"path": "kdtree/heap_test.go",
"chars": 1026,
"preview": "package kdtree\n\nimport (\n\t\"testing\"\n\n\t. \"github.com/smartystreets/goconvey/convey\"\n)\n\nfunc TestHeap(t *testing.T) {\n\tCon"
},
{
"path": "kdtree/kdtree.go",
"chars": 5365,
"preview": "package kdtree\n\nimport (\n\t\"errors\"\n\t\"github.com/sjwhitworth/golearn/metrics/pairwise\"\n\t\"gonum.org/v1/gonum/mat\"\n\t\"sort\"\n"
},
{
"path": "kdtree/kdtree_test.go",
"chars": 3560,
"preview": "package kdtree\n\nimport (\n\t\"testing\"\n\n\t\"github.com/sjwhitworth/golearn/metrics/pairwise\"\n\t. \"github.com/smartystreets/goc"
},
{
"path": "knn/euclidean.c",
"chars": 969,
"preview": "// #cgo CFLAGS: -Og -march=native -ffast-math\n\n#include <stdio.h>\n#include <string.h>\n#include \"knn.h\"\n\n/* Works out the"
},
{
"path": "knn/knn.go",
"chars": 13361,
"preview": "// Package knn implements a K Nearest Neighbors object, capable of both classification\n// and regression. It accepts dat"
},
{
"path": "knn/knn.h",
"chars": 659,
"preview": "#ifndef _H_FUNCS\n#define _H_FUNCS\n\n#include <stdint.h>\n\nstruct dist {\n float dist;\n uint32_t p;\n};\n\n/* Works out t"
},
{
"path": "knn/knn_bench_test.go",
"chars": 1779,
"preview": "package knn\n\nimport (\n\t\"fmt\"\n\t\"testing\"\n\n\t\"github.com/sjwhitworth/golearn/base\"\n\t\"github.com/sjwhitworth/golearn/evaluat"
},
{
"path": "knn/knn_cov_test.go",
"chars": 2934,
"preview": "package knn\n\nimport (\n\t\"testing\"\n\n\t\"github.com/sjwhitworth/golearn/base\"\n\t. \"github.com/smartystreets/goconvey/convey\"\n)"
},
{
"path": "knn/knn_kdtree_test.go",
"chars": 2360,
"preview": "package knn\n\nimport (\n\t\"testing\"\n\n\t\"github.com/sjwhitworth/golearn/base\"\n\t. \"github.com/smartystreets/goconvey/convey\"\n)"
},
{
"path": "knn/knn_opt_euclidean.go",
"chars": 2346,
"preview": "package knn\n\n// #include \"knn.h\"\nimport \"C\"\n\nimport (\n\t\"github.com/sjwhitworth/golearn/base\"\n\t\"sort\"\n\t\"unsafe\"\n)\n\ntype d"
},
{
"path": "knn/knn_test.go",
"chars": 4272,
"preview": "package knn\n\nimport (\n\t\"testing\"\n\n\t\"fmt\"\n\t\"github.com/sjwhitworth/golearn/base\"\n\t. \"github.com/smartystreets/goconvey/co"
},
{
"path": "knn/knn_test_1.csv",
"chars": 27,
"preview": "1.2,1.2,1.5,blue\n5,5,5,red\n"
},
{
"path": "knn/knn_test_2.csv",
"chars": 73,
"preview": "weather,hours_of_daylight,number_of_customers\ncloudy,10.1,0\nsunny,10.1,0\n"
},
{
"path": "knn/knn_test_2_subset.csv",
"chars": 60,
"preview": "weather,hours_of_daylight,number_of_customers\ncloudy,10.1,0\n"
},
{
"path": "knn/knn_train_1.csv",
"chars": 42,
"preview": "1,1,1,blue\n1,1,1,blue\n3,3,3,red\n6,6,6,red\n"
},
{
"path": "knn/knn_train_2.csv",
"chars": 107,
"preview": "weather,hours_of_daylight,number_of_customers\nsunny,10.5,200\nsunny,10.1,188\ncloudy,10.0,150\ncloudy,8.5,100\n"
},
{
"path": "knn/knn_weighted_test.go",
"chars": 4797,
"preview": "package knn\n\nimport (\n\t\"testing\"\n\n\t\"github.com/sjwhitworth/golearn/base\"\n\t. \"github.com/smartystreets/goconvey/convey\"\n)"
},
{
"path": "linear_models/blas.h",
"chars": 702,
"preview": "/* blas.h -- C header file for BLAS Ver 1.0 */\n/* Jesse Bennett "
},
{
"path": "linear_models/blasp.h",
"chars": 16460,
"preview": "/* blasp.h -- C prototypes for BLAS Ver 1.0 */\n/* Jesse Bennett "
},
{
"path": "linear_models/cfuncs.go",
"chars": 171,
"preview": "// +build go1.2\n\npackage linear_models\n\n/*\n\nvoid libLinearPrintFunc(char *);\n\nvoid golearn_liblinear_print_func_cgo(char"
},
{
"path": "linear_models/daxpy.c",
"chars": 1205,
"preview": "#include \"blas.h\"\n\nint daxpy_(int *n, double *sa, double *sx, int *incx, double *sy,\n int *incy)\n{\n long int "
},
{
"path": "linear_models/ddot.c",
"chars": 1211,
"preview": "#include \"blas.h\"\n\ndouble ddot_(int *n, double *sx, int *incx, double *sy, int *incy)\n{\n long int i, m, nn, iincx, iinc"
},
{
"path": "linear_models/dnrm2.c",
"chars": 1306,
"preview": "#include <math.h> /* Needed for fabs() and sqrt() */\n#include \"blas.h\"\n\ndouble dnrm2_(int *n, double *x, int *incx)\n{\n "
},
{
"path": "linear_models/doc.go",
"chars": 100,
"preview": "/*\nPackage linear_models implements linear\nand logistic regression models.\n*/\npackage linear_models\n"
},
{
"path": "linear_models/dscal.c",
"chars": 1035,
"preview": "#include \"blas.h\"\n\nint dscal_(int *n, double *sa, double *sx, int *incx)\n{\n long int i, m, nincx, nn, iincx;\n double s"
},
{
"path": "linear_models/liblinear.go",
"chars": 3409,
"preview": "package linear_models\n\n/*\n#include \"linear.h\"\n*/\nimport \"C\"\nimport \"fmt\"\nimport \"unsafe\"\n\ntype Problem struct {\n\tc_prob "
},
{
"path": "linear_models/liblinear_print.go",
"chars": 462,
"preview": "// +build go1.2\n\npackage linear_models\n\n/*\n#include \"linear.h\"\n\ntypedef void (*print_func)(char *);\nvoid golearn_libline"
},
{
"path": "linear_models/liblinear_print_11.go",
"chars": 214,
"preview": "// +build go1.1\n// +build !go1.2\n// +build !go1.3\n\npackage linear_models\n\nimport \"C\"\n\n//export libLinearPrintFunc\nfunc l"
},
{
"path": "linear_models/linear.cpp",
"chars": 56067,
"preview": "#include <math.h>\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <stdarg.h>\n#include <locale.h>\n#in"
},
{
"path": "linear_models/linear.h",
"chars": 2008,
"preview": "#ifndef _LIBLINEAR_H\n#define _LIBLINEAR_H\n\n#ifdef __cplusplus\nextern \"C\" {\n#endif\n\nstruct feature_node\n{\n\tint index;\n\tdo"
},
{
"path": "linear_models/linear_models_test.go",
"chars": 976,
"preview": "package linear_models\n\nimport (\n\t\"github.com/sjwhitworth/golearn/base\"\n\t. \"github.com/smartystreets/goconvey/convey\"\n\t\"t"
},
{
"path": "linear_models/linear_regression.go",
"chars": 3360,
"preview": "package linear_models\n\nimport (\n\t\"errors\"\n\n\t\"github.com/sjwhitworth/golearn/base\"\n\n\t\"fmt\"\n\t_ \"github.com/gonum/blas\"\n\t\"g"
},
{
"path": "linear_models/linear_regression_test.go",
"chars": 2142,
"preview": "package linear_models\n\nimport (\n\t\"github.com/sjwhitworth/golearn/base\"\n\t. \"github.com/smartystreets/goconvey/convey\"\n\t\"s"
},
{
"path": "linear_models/linearsvc.go",
"chars": 8559,
"preview": "package linear_models\n\nimport \"C\"\n\nimport (\n\t\"encoding/json\"\n\t\"fmt\"\n\t\"github.com/sjwhitworth/golearn/base\"\n\t\"io/ioutil\"\n"
},
{
"path": "linear_models/linearsvc_test.go",
"chars": 2341,
"preview": "package linear_models\n\nimport (\n\t\"github.com/sjwhitworth/golearn/base\"\n\t//\"github.com/sjwhitworth/golearn/filters\"\n\t. \"g"
},
{
"path": "linear_models/logistic.go",
"chars": 1824,
"preview": "package linear_models\n\nimport (\n\t\"errors\"\n\t\"fmt\"\n\t\"github.com/sjwhitworth/golearn/base\"\n)\n\ntype LogisticRegression struc"
},
{
"path": "linear_models/logistic_test.go",
"chars": 1046,
"preview": "package linear_models\n\nimport (\n\t\"github.com/sjwhitworth/golearn/base\"\n\t. \"github.com/smartystreets/goconvey/convey\"\n\t\"t"
},
{
"path": "linear_models/test.csv",
"chars": 40,
"preview": "1.0,1.0,0.0,0.0,1.0\n0.0,0.0,1.0,1.0,-1.0"
},
{
"path": "linear_models/train.csv",
"chars": 97,
"preview": "0.0, 0.0, 0.0, 1.0, -1.0\n0.0, 0.0, 1.0, 0.0, -1.0\n0.0, 1.0, 0.0, 0.0, 1.0\n1.0, 0.0, 0.0, 0.0, 1.0"
},
{
"path": "linear_models/tron.cpp",
"chars": 5186,
"preview": "#include <math.h>\n#include <stdio.h>\n#include <string.h>\n#include <stdarg.h>\n#include \"tron.h\"\n\n#ifndef min\ntemplate <cl"
},
{
"path": "linear_models/tron.h",
"chars": 687,
"preview": "#ifndef _TRON_H\n#define _TRON_H\n\nclass function\n{\npublic:\n\tvirtual double fun(double *w) = 0 ;\n\tvirtual void grad(double"
},
{
"path": "linear_models/util.go",
"chars": 2253,
"preview": "package linear_models\n\nimport (\n\t\"fmt\"\n\t\"github.com/sjwhitworth/golearn/base\"\n)\n\nfunc generateClassWeightVectorFromDist("
},
{
"path": "meta/bagging.go",
"chars": 9996,
"preview": "package meta\n\nimport (\n\t\"fmt\"\n\t\"github.com/sjwhitworth/golearn/base\"\n\t\"math/rand\"\n\t\"runtime\"\n\t\"strings\"\n\t\"sync\"\n)\n\n// Ba"
},
{
"path": "meta/bagging_test.go",
"chars": 4523,
"preview": "package meta\n\nimport (\n\t\"math/rand\"\n\t\"testing\"\n\t\"time\"\n\n\t\"github.com/sjwhitworth/golearn/base\"\n\t\"github.com/sjwhitworth/"
},
{
"path": "meta/meta.go",
"chars": 323,
"preview": "/*\n\n\tMeta contains base.Classifier implementations which\n\t\tcombine the outputs of others defined elsewhere.\n\n\tBagging:\n\t"
},
{
"path": "meta/one_v_all.go",
"chars": 11627,
"preview": "package meta\n\nimport (\n\t\"fmt\"\n\t\"github.com/sjwhitworth/golearn/base\"\n)\n\n// OneVsAllModel replaces class Attributes with "
},
{
"path": "meta/one_v_all_test.go",
"chars": 1855,
"preview": "package meta\n\nimport (\n\t\"fmt\"\n\t\"github.com/sjwhitworth/golearn/base\"\n\t\"github.com/sjwhitworth/golearn/evaluation\"\n\t\"gith"
},
{
"path": "metrics/pairwise/chebyshev.go",
"chars": 526,
"preview": "package pairwise\n\nimport (\n\t\"math\"\n\n\t\"github.com/gonum/matrix\"\n\t\"gonum.org/v1/gonum/mat\"\n)\n\ntype Chebyshev struct{}\n\nfun"
},
{
"path": "metrics/pairwise/chebyshev_test.go",
"chars": 1023,
"preview": "package pairwise\n\nimport (\n\t\"testing\"\n\n\t. \"github.com/smartystreets/goconvey/convey\"\n\t\"gonum.org/v1/gonum/mat\"\n)\n\nfunc T"
},
{
"path": "metrics/pairwise/cosine.go",
"chars": 747,
"preview": "package pairwise\n\nimport (\n\t\"math\"\n\n\t\"gonum.org/v1/gonum/mat\"\n)\n\ntype Cosine struct{}\n\nfunc NewCosine() *Cosine {\n\tretur"
},
{
"path": "metrics/pairwise/cosine_test.go",
"chars": 712,
"preview": "package pairwise\n\nimport (\n\t\"testing\"\n\n\t. \"github.com/smartystreets/goconvey/convey\"\n\t\"gonum.org/v1/gonum/mat\"\n)\n\nfunc T"
},
{
"path": "metrics/pairwise/cranberra.go",
"chars": 750,
"preview": "package pairwise\n\nimport (\n\t\"math\"\n\n\t\"github.com/gonum/matrix\"\n\t\"gonum.org/v1/gonum/mat\"\n)\n\ntype Cranberra struct{}\n\nfun"
},
{
"path": "metrics/pairwise/cranberra_test.go",
"chars": 1290,
"preview": "package pairwise\n\nimport (\n\t\"testing\"\n\n\t. \"github.com/smartystreets/goconvey/convey\"\n\t\"gonum.org/v1/gonum/mat\"\n)\n\nfunc T"
},
{
"path": "metrics/pairwise/euclidean.go",
"chars": 740,
"preview": "package pairwise\n\nimport (\n\t\"math\"\n\n\t\"gonum.org/v1/gonum/mat\"\n)\n\ntype Euclidean struct{}\n\nfunc NewEuclidean() *Euclidean"
},
{
"path": "metrics/pairwise/euclidean_test.go",
"chars": 740,
"preview": "package pairwise\n\nimport (\n\t\"testing\"\n\n\t. \"github.com/smartystreets/goconvey/convey\"\n\t\"gonum.org/v1/gonum/mat\"\n)\n\nfunc T"
}
]
// ... and 46 more files (download for full content)
About this extraction
This page contains the full source code of the sjwhitworth/golearn GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 246 files (68.5 MB), approximately 2.9M tokens, and a symbol index with 950 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.