Full Code of etsy/Conjecture for AI

master a32d61966b12 cached
117 files
449.7 KB
119.2k tokens
966 symbols
1 requests
Download .txt
Showing preview only (488K chars total). Download the full file or copy to clipboard to get everything.
Repository: etsy/Conjecture
Branch: master
Commit: a32d61966b12
Files: 117
Total size: 449.7 KB

Directory structure:
gitextract__actrptd/

├── .gitignore
├── .travis.yml
├── LICENSE.md
├── README.md
├── bin/
│   ├── demo.sh
│   ├── model_diff.py
│   ├── model_param.py
│   └── prediction_inspection.py
├── build.sbt
├── clients/
│   └── phplib/
│       └── Conjecture/
│           ├── BinaryClassifier.php
│           ├── Config.php
│           ├── ConjectureException.php
│           ├── Finder.php
│           ├── Instance.php
│           ├── MulticlassClassifier.php
│           ├── MulticlassLogisticRegressionClassifier.php
│           ├── MulticlassOneVsAllClassifier.php
│           ├── Text.php
│           ├── TextSequence.php
│           └── Vector.php
├── data/
│   └── iris.tsv
├── project/
│   ├── build.properties
│   └── plugins.sbt
├── sbt
└── src/
    ├── main/
    │   ├── java/
    │   │   └── com/
    │   │       └── etsy/
    │   │           └── conjecture/
    │   │               ├── GenericPair.java
    │   │               ├── PrimitivePair.java
    │   │               ├── Utilities.java
    │   │               ├── data/
    │   │               │   ├── AbstractInstance.java
    │   │               │   ├── BinaryLabel.java
    │   │               │   ├── BinaryLabeledInstance.java
    │   │               │   ├── ByteArrayDoubleHashMap.java
    │   │               │   ├── ClusterLabel.java
    │   │               │   ├── ClusterPrediction.java
    │   │               │   ├── Instance.java
    │   │               │   ├── InstanceFactory.java
    │   │               │   ├── InstanceInterface.java
    │   │               │   ├── Label.java
    │   │               │   ├── LabeledInstance.java
    │   │               │   ├── LazyVector.java
    │   │               │   ├── MulticlassLabel.java
    │   │               │   ├── MulticlassLabeledInstance.java
    │   │               │   ├── MulticlassPrediction.java
    │   │               │   ├── RealValueLabeledInstance.java
    │   │               │   ├── RealValuedLabel.java
    │   │               │   ├── Recommendation.java
    │   │               │   └── StringKeyedVector.java
    │   │               ├── evaluation/
    │   │               │   ├── BinaryModelEvaluation.java
    │   │               │   ├── ConfusionMatrix.java
    │   │               │   ├── EvaluationAggregator.java
    │   │               │   ├── ModelEvaluation.java
    │   │               │   ├── MulticlassConfusionMatrix.java
    │   │               │   ├── MulticlassModelEvaluation.java
    │   │               │   ├── MulticlassReceiverOperatingCharacteristic.java
    │   │               │   ├── ReceiverOperatingCharacteristic.java
    │   │               │   └── RegressionModelEvaluation.java
    │   │               ├── model/
    │   │               │   ├── AdagradOptimizer.java
    │   │               │   ├── ClusteringModel.java
    │   │               │   ├── ControlOptimizer.java
    │   │               │   ├── Decomposable.java
    │   │               │   ├── ElasticNetOptimizer.java
    │   │               │   ├── FTRLOptimizer.java
    │   │               │   ├── Hinge.java
    │   │               │   ├── KMeans.java
    │   │               │   ├── LeastSquaresRegressionModel.java
    │   │               │   ├── LogisticRegression.java
    │   │               │   ├── MIRA.java
    │   │               │   ├── MIRAOptimizer.java
    │   │               │   ├── Model.java
    │   │               │   ├── PassiveAggressiveOptimizer.java
    │   │               │   ├── SGDOptimizer.java
    │   │               │   ├── UpdateableLinearModel.java
    │   │               │   ├── UpdateableModel.java
    │   │               │   └── UpdateableMulticlassLinearModel.java
    │   │               └── topics/
    │   │                   └── lda/
    │   │                       ├── LDADenseTopics.java
    │   │                       ├── LDADict.java
    │   │                       ├── LDADoc.java
    │   │                       ├── LDAPartialSparseTopics.java
    │   │                       ├── LDAPartialTopics.java
    │   │                       ├── LDARandomTopics.java
    │   │                       ├── LDASparseTopics.java
    │   │                       ├── LDATopics.java
    │   │                       └── LDAUtils.java
    │   └── scala/
    │       └── com/
    │           └── etsy/
    │               ├── conjecture/
    │               │   ├── VWReader.scala
    │               │   ├── demo/
    │               │   │   ├── DemoLinearHyperparameterSearch.scala
    │               │   │   ├── IrisDataToMulticlassLabeledInstances.scala
    │               │   │   └── LearnMulticlassClassifier.scala
    │               │   ├── scalding/
    │               │   │   ├── ALSJob.scala
    │               │   │   ├── FastKNN.scala
    │               │   │   ├── LSH.scala
    │               │   │   ├── NNMF.scala
    │               │   │   ├── SVD.scala
    │               │   │   ├── evaluate/
    │               │   │   │   ├── GenericCrossValidator.scala
    │               │   │   │   └── GenericEvaluator.scala
    │               │   │   ├── factorize/
    │               │   │   │   └── FactorizationTools.scala
    │               │   │   ├── train/
    │               │   │   │   ├── AbstractModelTrainer.scala
    │               │   │   │   ├── BinaryModelTrainer.scala
    │               │   │   │   ├── ClusteringModelTrainer.scala
    │               │   │   │   ├── LargeModelTrainer.scala
    │               │   │   │   ├── ModelTrainerStrategy.scala
    │               │   │   │   ├── MulticlassModelTrainer.scala
    │               │   │   │   ├── RegressionModelTrainer.scala
    │               │   │   │   └── SmallModelTrainer.scala
    │               │   │   └── util/
    │               │   │       ├── BaseGridSearcher.scala
    │               │   │       ├── DynamicOptions.scala
    │               │   │       └── HyperparameterSearcher.scala
    │               │   └── text/
    │               │       ├── FeatureHelper.scala
    │               │       ├── Text.scala
    │               │       └── TextSequence.scala
    │               └── scalding/
    │                   └── jobs/
    │                       └── conjecture/
    │                           ├── AdHocClassifier.scala
    │                           ├── AdHocClusterer.scala
    │                           ├── AdHocMulticlassClassifier.scala
    │                           ├── AdHocPredictor.scala
    │                           └── NNMFTest.scala
    └── test/
        └── java/
            └── com/
                └── etsy/
                    └── conjecture/
                        ├── data/
                        │   ├── LazyVectorTest.java
                        │   └── StringKeyedVectorTest.java
                        ├── evaluation/
                        │   └── TestReceiverOperatingCharacteristic.java
                        └── model/
                            └── UpdateableLinearModelTest.java

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
*.class
*.log
*.swp
*.swo

# sbt specific
dist/*
target/
lib_managed/
src_managed/
project/boot/
project/plugins/project/

# Scala-IDE specific
.scala_dependencies

#java

*.class

# Package Files #
*.jar
*.war
*.ear

*~
*\#
.history
.idea


================================================
FILE: .travis.yml
================================================
sudo: false
language: scala
script:
    - sbt +test

================================================
FILE: LICENSE.md
================================================
The MIT License
===============

Copyright (c) 2009 Anton Grigoryev

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

================================================
FILE: README.md
================================================
# Conjecture [![Build Status](https://travis-ci.org/etsy/Conjecture.svg?branch=master)](https://travis-ci.org/etsy/Conjecture)

Conjecture is a framework for building machine learning models in Hadoop using the Scalding DSL.
The goal of this project is to enable the development of statistical models as viable components
in a wide range of product settings. Applications include classification and categorization,
recommender systems, ranking, filtering, and regression (predicting real-valued numbers).
Conjecture has been designed with a primary emphasis on flexibility and can handle a wide variety of inputs.
Integration with Hadoop and scalding enable seamless handling of extremely large data volumes,
and integration with established ETL processes. Predicted labels can either be consumed directly
by the web stack using the dataset loader, or models can be deployed and consumed by live web code.
Currently, binary classification (assigning one of two possible labels to input data points)
is the most mature component of the Conjecture package.

# Tutorial
There are a few stages involved in training a machine learning model using Conjecture.

## Create Training Data
We represent the training data as "feature vectors" which are just mappings of feature names to real values.
In this case we represent them as a java map of strings to doubles
(although we have a class StringKeyedVector which provides convenience methods for feature vector construction).
We also need the true label of each instance, which we represent as 0 and 1
(the mapping of these binary labels to e.g., "male" and "female" is up to the user).
We construct BinaryLabeledInstances, which are just wrappers for a feature vector and a label.

    val bl = new BinaryLabeledInstance(0.0)
    bl.addTerm("bias", 1.0)
    bl.addTerm("some_feature", 0.5)

## Training a Classifier
Classifiers are essentially trained by presenting the labeled instances to them.  There are several kinds 
of linear classifiers we implement, among them:

* Logistic regression,
* Perceptron,
* MIRA (a large margin perceptron model),
* Passive aggressive.

These models all have several options, such as learning rate, regularization parameters and so on.  We supply
reasonable defaults for these parameters although they can be changed readily.  To train a linear model
simply call the update function with the labeled instance:

    val p = new LogisticRegression()
    p.update(bl)

In order to make this procedure tractable for large datasets, we provided scalding wrappers for the training.
These operate by training several small models on mappers, then aggregating them into a final complete model
on the reducers.  This wrapper is called like so:

    new BinaryModelTrainer(args)
      .train(instances, 'instance, 'model)
      .write(SequenceFile("model"))
      .map('model -> 'model){ x : UpdateableBinaryModel => new com.google.gson.Gson.toJson(x) }
      .write(Tsv("model_json"))

This code segment will train a model using a pipe called instances which has a field called instance which contains
the BinaryLabeledInstance objects.  It produces a pipe with a single field containing the completed model, which can
then be written to disk.

This class uses the command line args object from scalding, in order to let you set some options on the command line.
Some useful options are:

| Argument                            | Possible values                               | Default            | Meaning                                          |
|-------------------------------------|-----------------------------------------------|--------------------|--------------------------------------------------|
| --model                             | mira, logistic_regression, passive_aggressive | passive_aggressive | The type of model to use.                        |
| --iters                             | 1, 2, 3...                                    | 1                  | The number of iterations of training to perform. |
| --zero_class_prob, --one_class_prob | [0, 1]                                        | 1                  |                                                  |

To see all the command line options, see the BinaryModelTrainer class.

## Evaluating a Classifier
It is important to get a sense of the performance you can expect out of your classifier on unseen data.
In order to do this we recommend to use cross validation.
In essence, your input set of instances is split up into testing and training portions (multiple different ways),
then a classifier is trained on each training portion, and evaluated (against the true labels which are present)
using the testing portion.
This is all wrapped up in a class called BinaryCrossValidator, it is used like so:

    new BinaryCrossValidator(args, 5)
      .crossValidate(instances, 'instance)
      .write(Tsv("model_xval"))

This class also takes the command line arguments, which it passes to a model trainer for each fold.
This allows the specification of options to the cross validated models on the command line.
The output contains statistics about the performance of the model as well as the confusion matrices
for each fold.

A script is included which cross validates a logistic regression model on the iris dataset.





================================================
FILE: bin/demo.sh
================================================
#!/bin/bash

# - make monolithic conjecture jar.
sbt clean assembly
# - make the instances.
java -cp target/conjecture-assembly-*.jar com.twitter.scalding.Tool com.etsy.conjecture.demo.IrisDataToMulticlassLabeledInstances --input_file data/iris.tsv --output_file iris_model/instances --local
# - construct the classifier.
java -cp target/conjecture-assembly-*.jar com.twitter.scalding.Tool com.etsy.conjecture.demo.LearnMulticlassClassifier --input iris_model/instances --output iris_model --class_names Iris-versicolor,Iris-virginica,Iris-setosa --iters 5 --folds 3 --local


================================================
FILE: bin/model_diff.py
================================================
import json
import sys
import math

if __name__ == '__main__':
  if len(sys.argv) != 3:
    sys.exit("Usage: python " +  sys.argv[0] + " [model file] [model file]")
  a = json.load(open(sys.argv[1]))['param']['vector']
  b = json.load(open(sys.argv[2]))['param']['vector']
  features = set(a.keys()) | set(b.keys())
  diff = []
  for f in features:
    dv = a.get(f, 0.0) - b.get(f, 0.0)
    if math.fabs(dv) > 0.01:
      diff.append((f, dv, a.get(f), b.get(f)))
  diff.sort( key = lambda tup: -math.fabs(tup[1]))
  for t in diff:
    print t[0] + "\t" + str(t[2]) + "\t" + str(t[3]) + "\t(" + str(t[1]) + ")"



================================================
FILE: bin/model_param.py
================================================
import json
import sys
import math

if __name__ == '__main__':
  if len(sys.argv) != 2:
    sys.exit("Usage: python " +  sys.argv[0] + " [model file]")
  vec = json.load(open(sys.argv[1]))['param']['vector'].items()
  vec.sort(key = lambda tup: -math.fabs(tup[1]))
  for v in vec:
    print v[0] + "\t" + str(v[1])


================================================
FILE: bin/prediction_inspection.py
================================================
import json
import sys
from optparse import OptionParser
from math import floor

colors = ["FF0000", "FF1000", "FF2000", "FF3000", "FF4000", "FF5000", "FF6000",
          "FF7000", "FF8000", "FF9000", "FFA000", "FFB000", "FFC000", "FFD000",
          "FFE000", "FFF000", "FFFF00", "F0FF00", "E0FF00", "D0FF00", "C0FF00",
          "B0FF00", "A0FF00", "90FF00", "80FF00", "70FF00", "60FF00", "50FF00",
          "40FF00", "30FF00", "20FF00", "10FF00"]
bins = len(colors)


parser = OptionParser(usage="""builds a simple web page providing introspection on predictions made by conjecture models.
Depends on the supporting data provided in the instance itself, currently only supporting binary
classification problems
Usage: %prog [options]
""")

parser.add_option('-o', '--out', dest='out', default=False, action='store',
                  help="[optional] destination of the generated html. Defaults to standard out")
parser.add_option('-f', '--file', dest='file', default=False, action='store',
                  help="[optional] file storing input predictions and instances. Defaults to standard in")
parser.add_option('-l', '--label', dest='label', default=False, action='store',
                  help="[optional] only keep examples with this label")
parser.add_option('-L', '--limit', dest='limit', default=1000, action='store',
                  help="maximum number of prediction examples to display. Default: 1000")


(options, args) = parser.parse_args()

output = open(options.out, 'w') if (options.out) else sys.stdout
input = open(options.file, 'r') if(options.file) else sys.stdin

limit = int(options.limit)

output.write("<html>")
ct = 0

for line in input:
    parts = line.strip().split("\t")
    content = json.loads(parts[0])
    label = int(content['label']['value'])
    pred = float(parts[2])

    if (options.label and str(label) != options.label):
        continue

    error = min(1.0, abs(pred-label))
    bin = bins - int(floor(error*bins)) - 1

    color = "#" + colors[bin]
    out = ""

    support = json.loads(content['supporting_data'])

    for key in support.keys():
        out = out + "<b>" + key + "</b></br>" + support[key] + "<br/>"

    if (len(out) < 10000 and ct < limit):
        try:
            output.write("<div style='background-color: "  + color + "; width: 700px;'>");
            output.write("%d (%f)<br/>" %( label, pred))
            output.write(out)
            output.write("</div><p>")
            ct = ct + 1
        except:
            pass

    if (ct >= limit):
        break

output.write("</html>");
output.flush()
output.close()


================================================
FILE: build.sbt
================================================
import sbt._

name := "conjecture"

version := "0.3.1-SNAPSHOT"

organization := "com.etsy"

scalaVersion := "2.11.11"
crossScalaVersions := Seq("2.11.11", "2.12.4")

scalacOptions ++= Seq("-unchecked", "-deprecation")

//Because some of our (legal!) java code confuses scaladoc, we must skip it for 2.12
//See: https://github.com/scala/bug/issues/10723
scalacOptions in (Compile, doc) += {if(scalaBinaryVersion.value == "2.12") "-no-java-comments" else ""}

javacOptions ++= Seq("-Xlint:none", "-source", "1.7", "-target", "1.7")

compileOrder := CompileOrder.JavaThenScala

resolvers ++= {
  Seq(
    "Concurrent Maven Repo" at "http://conjars.org/repo"
  )
}

libraryDependencies ++= Seq(
  "cascading" % "cascading-core" % "2.6.1",
  "cascading" % "cascading-local" % "2.6.1" exclude("com.google.guava", "guava"),
  "cascading" % "cascading-hadoop" % "2.6.1",
  "com.google.code.gson" % "gson" % "2.2.2",
  "com.twitter" %% "algebird-core" % "0.13.0" excludeAll ExclusionRule(organization="org.scala-lang", name="scala-library"),
  "com.twitter" %% "scalding-core" % "0.17.4" excludeAll ExclusionRule(organization="org.scala-lang", name="scala-library"),
  "commons-lang" % "commons-lang" % "2.4",
  "com.joestelmach" % "natty" % "0.7",
  "io.spray" %% "spray-json" % "1.3.2" excludeAll ExclusionRule(organization="org.scala-lang", name="scala-library"),
  "com.google.guava" % "guava" % "13.0.1",
  "org.apache.commons" % "commons-math3" % "3.2",
  "org.apache.hadoop" % "hadoop-common" % "2.5.0" excludeAll(
    ExclusionRule(organization="commons-daemon", name="commons-daemon"),
    ExclusionRule(organization="com.google.guava", name="guava")
    ),
  "org.apache.hadoop" % "hadoop-hdfs" % "2.5.0" excludeAll(
    ExclusionRule(organization="commons-daemon", name="commons-daemon"),
    ExclusionRule(organization="com.google.guava", name="guava")
    ),
  "org.scala-lang" % "scala-reflect" % scalaVersion.value,
  "net.sf.trove4j" % "trove4j" % "3.0.3",
  "com.novocode" % "junit-interface" % "0.10" % "test"
)

parallelExecution in Test := false

publishArtifact in Test := false

xerial.sbt.Sonatype.sonatypeSettings

publishTo := {
  if (System.getProperty("release") != null) {
    publishTo.value
  } else {
    val v = version.value
    val archivaURL = "http://ivy.etsycorp.com/repository"
    if (v.trim.endsWith("SNAPSHOT")) {
      Some("publish-snapshots" at (archivaURL + "/snapshots"))
    } else {
      Some("publish-releases"  at (archivaURL + "/internal"))
    }
  }
}

publishMavenStyle := true

overridePublishBothSettings

pomIncludeRepository := { x => false }

pomExtra := <url>https://github.com/etsy/Conjecture</url>
  <licenses>
    <license>
      <name>MIT License</name>
      <url>http://opensource.org/licenses/MIT</url>
      <distribution>repo</distribution>
    </license>
  </licenses>
  <scm>
    <url>git@github.com:etsy/Conjecture.git</url>
    <connection>scm:git:git@github.com:etsy/Conjecture.git</connection>
  </scm>
  <developers>
    <developer>
      <id>jattenberg</id>
      <name>Josh Attenberg</name>
      <url>github.com/jattenberg</url>
    </developer>
    <developer>
      <id>rjhall</id>
      <name>Rob Hall</name>
      <url>github.com/rjhall</url>
    </developer>
  </developers>


pomIncludeRepository := { _ => false }

// Uncomment if you don't want to run all the tests before building assembly
// test in assembly := {}

// Janino includes a broken signature, and is not needed:
assemblyExcludedJars in assembly <<= (fullClasspath in assembly) map { cp =>
  val excludes = Set("jsp-api-2.1-6.1.14.jar", "jsp-2.1-6.1.14.jar",
    "jasper-compiler-5.5.12.jar", "janino-2.5.16.jar")
  cp filter { jar => excludes(jar.data.getName)}
}

// Some of these files have duplicates, let's ignore:
assemblyMergeStrategy in assembly <<= (mergeStrategy in assembly) { (old) =>
{
  case s if s.endsWith(".class") => MergeStrategy.last
  case s if s.endsWith("project.clj") => MergeStrategy.concat
  case s if s.endsWith(".html") => MergeStrategy.last
  case s if s.contains("servlet") => MergeStrategy.last
  case x => old(x)
}
}


================================================
FILE: clients/phplib/Conjecture/BinaryClassifier.php
================================================
<?php

class Conjecture_BinaryClassifier {
    private $param = null;

    function __construct($param_vec) {
        $this->param = $param_vec;
    }

    public function dot($instance_vec) {
        return $this->param->dot($instance_vec);
    }

    public function predict($instance_vec) {
        $dot = $this->dot($instance_vec);
        $exd = exp($dot);
        return $exd / (1.0 + $exd);
    }

    public function getParams() {
        return $this->param->getParams();
    }

    public function explain($instance_vec, $n = 10) {
        $keys = array_intersect_key($this->param->getParams(), $instance_vec->getParams());
        $keys = array_map('abs', $keys);
        arsort($keys);
        $res = array_slice($keys, 0, (count($keys) < $n ? count($keys) : $n));
        foreach ($res as $k => $v) {
            $res[$k] = "$k(" . round($this->param->getParam($k), 2) . ")";
        }
        return implode(", ", $res);
    }
}


================================================
FILE: clients/phplib/Conjecture/Config.php
================================================
<?php

interface Conjecture_Config {

    public function useDummyConjectureModel();
    public function getConjectureModelPath();
    public function getMaxFileSize();
}

================================================
FILE: clients/phplib/Conjecture/ConjectureException.php
================================================
<?php

class Conjecture_ConjectureException extends Exception{}

================================================
FILE: clients/phplib/Conjecture/Finder.php
================================================
<?php

class Conjecture_Finder {

    private $config = null;

    public function __construct(Conjecture_Config $config) {
        $this->config = $config;
    }


    /**
     * Loads a model local to a user's vm.
     */
    public function getLocalModel($local_file_path) {
        $model = json_decode($this->parseFile($local_file_path));
        $cv = new Conjecture_Vector($model->param->vector);
        $binary_classifier = new Conjecture_BinaryClassifier($cv);
        return $binary_classifier;
    }

    /**
     * Decode model json at a given filepath.
     */
    private function parseFile($fp) {
        if (filesize($fp) > $this->config->getMaxFileSize()) {
            throw new Conjecture_ConjectureException("model too big: " . $fp . " is " . filesize($fp) . "bytes");
        }

        $res = file($fp);
        if ($res) {
            $res = implode("", $res);
            $res = stripslashes($res);
            return $res;
        } else {
            throw new Conjecture_ConjectureException("model file not found: $fp");
        }
    }

    private function getLatestModelJsonForProblem($file_name) {
        if ($this->config->useDummyConjectureModel()) {
            return self::getDummyModel();
        }

        $fp = $this->config->getConjectureModelPath() . "/" . $file_name;
        return $this->parseFile($fp);
    }

    public function getLatestModelForProblem($file_name) {
        $json = $this->getLatestModelJsonForProblem($file_name);
        return json_decode($json);
    }

    public function getLatestBinaryClassificationVectorForProblem($file_name) {
        $model = $this->getLatestModelForProblem($file_name);
        return new Conjecture_Vector($model->param->vector);
    }

    public function getLatestBinaryClassifierForProblem($file_name) {
        return new Conjecture_BinaryClassifier($this->getLatestBinaryClassificationVectorForProblem($file_name));
    }

    public function getOneVsAllClassifier($file_name) {
        $model_array = $this->getLatestModelForProblem($file_name);

        foreach ($model_array as $cat => $params) {
            $category_params[$cat] = new Conjecture_BinaryClassifier(new Conjecture_Vector($params));
        }

        return new Conjecture_MulticlassOneVsAllClassifier($category_params);
    }


    public function getMulticlassClassifier($file_name) {
        $model_array = $this->getLatestModelForProblem($file_name);
        $model_type = $model_array->modelType;
        $category_params = [];

        foreach ($model_array->param as $cat => $category_model) {
            $category_params[$cat] = new Conjecture_Vector($category_model->vector);
        }

        switch ($model_type) {
            case "multiclass_logistic_regression":
                return new Conjecture_MulticlassLogisticRegressionClassifier($category_params);
            default:
                return new Conjecture_MulticlassClassifier($category_params);
        }
    }


    static function build(Conjecture_Config $config) {
        return new Conjecture_Finder($config);
    }

    /**
     * Creates and returns a JSON dummy model with no vectors
     * used for development settings where "real" JSON models
     * may not be present
     */
    private static function getDummyModel() {

        $dummy_model = array("param" => array(
            "vector" => array(),
            "modelType" => "dummy",
            "regularizationWeights" => array(),
            "epoch" => 1,
            "period" => 1,
            "truncationUpdate" => 0,
            "truncationThreshold" => 0,
            "initialLearningRate" => .1,
            "useExponentialLearningRate" => false,
            "exponentialLearningRate" => 1.0,
            "examplesPerEpoch" => 1,
        ));
        return json_encode($dummy_model);
    }
}


================================================
FILE: clients/phplib/Conjecture/Instance.php
================================================
<?php

  /**
   * container class representing instances that are considered
   * as input to predictive models in Conjecture. Has a rich set
   * of adders and setters that mirrors the API of the java code,
   * https://github.etsycorp.com/Engineering/Conjecture
   */
class Conjecture_Instance extends Conjecture_Vector{

    private static $NAMESPACE_SEP = "___";

    private $id = null;
    private $label = null;


    public function __construct(array $vector = array()) {
        parent::__construct($vector);
    }

    public function getId() {
        return $this->id;
    }

    public function setId($id) {
        $this->id = $id;
        return $this;
    }

    public function put($key, $value = 1.0) {
        $this->vector[$key] = $value;
    }

    public function update($key, $value = 1.0) {
        if (array_key_exists($key, $this->vector)) {
            $this->vector[$key] = $this->vector[$key] + $value;
        } else {
            $this->vector[$key] = $value;
        }
        return $this;
    }

    //some methods to mirror java maps that this class mirrors

    public function putAll(array $vector) {
        foreach ($vector as $key => $value) {
            $this->put($key, $value);
        }
    }

    public function containsKey($key) {
        return array_key_exists($key, $this->vector);
    }

    public function containsValue($key) {
        return in_array($key, $this->vector);
    }


    public function keySet() {
        return array_keys($this->vector);
    }

    public function values() {
        return array_values($this->vector);
    }

    public function size() {
        return count($this->vector);
    }

    public function isEmpty() {
        return empty($this->vector);
    }

    public function remove($key) {
        unset($this->vector[$key]);
    }

    public function toString() {
        return json_encode($this->vector);
    }

    public function addTerm($term, $featureWeight = 1.0, $namespace = "") {
        $key = $namespace == "" ? $term : $namespace . self::$NAMESPACE_SEP . $term;
        $this->update($key, $featureWeight);
        return $this;
    }

    public function addTerms(array $terms, $featureWeight = 1.0, $namespace = "") {
        foreach ($terms as $term) {
            $this->addTerm($term, $featureWeight, $namespace);
        }
        return $this;
    }

    public function addNumericArray(array $numberValues, $namespace = "") {
        for ($i = 0; $i < count($numberValues); $i++ ) {
            $this->addTerm((string)$i, $numberValues[$i], $namespace);
        }
        return $this;
    }

}


================================================
FILE: clients/phplib/Conjecture/MulticlassClassifier.php
================================================
<?php


class Conjecture_MulticlassClassifier {

    private $param = null;

    /**
     * each param is a Conjecture_Vector
     */
    function __construct($param) {
        $this->param = $param;
    }

    public function predict($instance_vec) {
        $category_results = [];
        $total = 0;

        foreach ($this->param as $category => $classifier) {
            $prediction = $classifier->dot($instance_vec);
            $category_results[$category] = $prediction;
            $total += $prediction;
        }

        return array_map( function($prob) use ($total) {
                return $prob / $total;
        }, $category_results);
    }

    public function getParams() {
        return $this->param;
    }

    public function explain($instance_vec, $n = 10) {
        $explains = [];

        foreach ($this->param as $category => $category_model) {
            $explains[$category] = $this->categoryExplain($instance_vec, $category_model, $n);
        }

        return implode(", ", $explains);
    }


    private function categoryExplain($instance_vec, $category_model, $n = 10) {

        $keys = array_intersect_key($category_model->getParams(), $instance_vec->getParams());
        $keys = array_map('abs', $keys);
        arsort($keys);
        $res = array_slice($keys, 0, (count($keys) < $n ? count($keys) : $n));

        foreach ($res as $k => $v) {
            $res[$k] = "$k(" . round($category_model->getParams($k), 2) . ")";
        }

        return implode(", ", $res);
    }
}

================================================
FILE: clients/phplib/Conjecture/MulticlassLogisticRegressionClassifier.php
================================================
<?php


class Conjecture_MulticlassLogisticRegressionClassifier extends Conjecture_MulticlassClassifier {

    private $param = null;

    public function predict($instance_vec) {
        $category_results = [];
        $total = 0;

        foreach ($this->param as $category => $classifier) {
            $prediction = exp($classifier->dot($instance_vec));
            $category_results[$category] = $prediction;
            $total += $prediction;
        }

        return array_map( function($prob) use ($total) {
                return $prob / $total;
        }, $category_results);
    }

}

================================================
FILE: clients/phplib/Conjecture/MulticlassOneVsAllClassifier.php
================================================
<?php


class Conjecture_MulticlassOneVsAllClassifier {

    private $param = null;

    /**
     * $param is an array that maps category to a Conjecture_BinaryClassifier
     * that represents that class
     */
    function __construct($param) {
        $this->param = $param;
    }

    public function predict($instance_vec) {
        $category_results = [];
        $total = 0;

        foreach ($this->param as $category => $classifier) {
            $prediction = $classifier->predict($instance_vec);
            $category_results[$category] = $prediction;
            $total += $prediction;
        }

        return array_map( function($prob) use ($total) {
                return $prob / $total;
        }, $category_results);
    }

    public function getParams() {
        $out_params = [];

        foreach ($this->param as $category => $classifier) {
            $out_params[$category] = $classifier->getParams();
        }

        return $out_params;
    }

    public function explain($instance_vec, $n = 10) {
        $explains = [];

        foreach ($this->param as $category => $classifier) {
            $explains[$category] = $classifier->explain($instance_vec, $n);
        }

        return implode(", ", $explains);
    }

}

================================================
FILE: clients/phplib/Conjecture/Text.php
================================================
<?php

// This is bascially an exact replica of com.etsy.conjecture.text.Text
class Conjecture_Text {

    private $input = null;

    static function build($text) {
        return new Conjecture_Text($text);
    }

    function __construct($text) {
        $this->input = $text;
    }

    function toString() {
        return $this->input;
    }

    function replaceNumbers($replacement = "_num_") {
        $text = preg_replace("/[0-9]+/", $replacement, $this->input);
        return new Conjecture_Text(preg_replace("/".$replacement."\\s+".$replacement."/", $replacement, $text));
    }

    function replaceHTMLEscapes($replacement = " ") {
        return new Conjecture_Text(preg_replace("/&[^;]+;/", $replacement, $this->input));
    }

    function removeHTMLTags() {
        return new Conjecture_Text(preg_replace("/<.*?>/", " ", $this->input));
    }

    function replaceHTMLTags($replacement = " ") {
        return new Conjecture_Text(preg_replace("/<[^>]+>/", " ", $this->input));
    }

    function replaceNonAlphaNumeric($replacement = " ") {
        return new Conjecture_Text(preg_replace("/[^a-zA-Z0-9\\.\\s\\-]+/", $replacement, $this->input));
    }

    function replaceNonAlphaNumericUnderscore($replacement = " ") {
        return new Conjecture_Text(preg_replace("/[^a-zA-Z0-9\\.\\s\\-_]+/", $replacement, $this->input));
    }

    function replaceNonAlpha($replacement = " ") {
        return new Conjecture_Text(preg_replace("/[^a-zA-Z]+/", $replacement, $this->input));
    }

    function collapseHyphens() {
        return new Conjecture_Text(preg_replace("/--+/", "--", $this->input));
    }

    function collapseUnderscores() {
        return new Conjecture_Text(preg_replace("/__+/", "__", $this->input));
    }

    function collapsePeriods() {
        return new Conjecture_Text(preg_replace("/\.\.+/", "..", $this->input));
    }

    function stripPunctuation() {
        $temp = preg_replace("^[^A-Za-z0-9]", "", $this->input);
        return new Conjecture_Text(preg_replace("[^A-Za-z0-9]$", "", $temp));
    }

    // compact any white space
    function collapse() {
        return new Conjecture_Text(preg_replace("/\\s+/", " ", $this->input));
    }

    // remove any whitespace from the right of a string
    function rstrip() {
        return new Conjecture_Text(preg_replace("/\\s+$/", "", $this->input));
    }

    // remove any whitespace from the left of a string
    function lstrip() {
        return new Conjecture_Text(preg_replace("/^\\s+/", "", $this->input));
    }

    // remove any leading or trailing whitespace
    function strip() {
        return $this->rstrip()->lstrip();
    }

    // clean up any whitespace
    function wsclean() {
        return $this->strip()->collapse();
    }

    // remove any unprintable non-ASCII characters
    function removeUnprintables() {
        return new Conjecture_Text(preg_replace("/[^\\x20-\\x7E]/", "", $this->input));
    }

    function collapseWhitespaceAndPunc() {
        $text = $this->collapse()->collapseHyphens();
        return new Conjecture_Text(preg_replace("/\\.\\.+/", ".", $text->toString()));
    }

    function toLowerCase() {
        return new Conjecture_Text(strtolower($this->input));
    }

    function standardTextFilter() {
        return $this->removeHTMLTags()
                    ->replaceHTMLEscapes()
                    ->replaceNumbers()
                    ->replaceNonAlphaNumericUnderscore()
                    ->collapseHyphens()
                    ->collapseUnderscores()
                    ->wsclean();
    }

    function toArrayFromShingles($n) {
        $shingles = array();

        $chars = str_split($this->input);
        for ($i = 0; $i < count($chars) - $n + 1; $i++) {
            $shingle = array_slice($chars, $i, $n);
            $shingles[] = implode("", $shingle);
        }

        return $shingles;
    }

    function toSequenceFromShingles($n) {
        return new Conjecture_TextSequence($this->toArrayFromShingles($n));
    }
}


================================================
FILE: clients/phplib/Conjecture/TextSequence.php
================================================
<?php

// This is bascially an exact replica of com.etsy.conjecture.text.TextSequence
class Conjecture_TextSequence {

    private $tokens = null;

    function __construct(array $tokens) {
        $this->tokens = $tokens;
    }

    /**
     * concatenates two TextSequences into an additional text sequence
     */
    function concat($other) {
        return new Conjecture_TextSequence(array_merge($this->tokens, $other->tokens));
    }

    function mkString($glue = " ") {
        return implode($glue, $this->tokens);
    }

    function toString() {
        return $this->mkString(" ");
    }

    function getTokens() {
        return $this->tokens;
    }

    function filterBlank() {
        return new Conjecture_TextSequence(array_filter($this->tokens,
                                                        function($x) {
                                                            return $x !== "";
                                                        }
                                                       )
                                          );
    }

    function filterStopwords() {
        return new Conjecture_TextSequence(array_filter($this->tokens,
                                                        function($x) {
                                                            return !in_array($x, self::$stopwordList);
                                                        }
                                                       )
                                          );
    }

    function stopwords() {
        return new Conjecture_TextSequence(array_filter($this->tokens,
                                                        function($x) {
                                                            return in_array($x, self::$stopwordList);
                                                        }
                                                       )
                                          );
    }


    function filterBadwords() {
        return new Conjecture_TextSequence(array_filter($this->tokens,
                                                        function($x) {
                                                            return !in_array($x, self::$badwordList);
                                                        }
                                                       )
                                          );
    }

    function badwords() {
        return new Conjecture_TextSequence(array_filter($this->tokens,
                                                        function($x) {
                                                            return in_array($x, self::$badwordList);
                                                        }
                                                       )
                                          );
    }

    function filterAllCaps() {
        return new Conjecture_TextSequence(array_filter($this->tokens,
                                                        function($x) {
                                                            return !preg_match('/^[A-Z]+$/', $x);
                                                        }
                                                       )
                                          );
    }

    function AllCaps() {
        return new Conjecture_TextSequence(array_filter($this->tokens,
                                                        function($x) {
                                                            return preg_match('/^[A-Z]+$/', $x);
                                                        }
                                                       )
                                          );
    }

    function filterCapitalized() {
        return new Conjecture_TextSequence(array_filter($this->tokens,
                                                        function($x) {
                                                            return !preg_match('/^[A-Z][^A-Z]+$/', $x);
                                                        }
                                                       )
                                          );
    }

    function capitalized() {
        return new Conjecture_TextSequence(array_filter($this->tokens,
                                                        function($x) {
                                                            return preg_match('/^[A-Z][^A-Z]+$/', $x);
                                                        }
                                                       )
                                          );
    }

    function filterLowercase() {
        return new Conjecture_TextSequence(array_filter($this->tokens,
                                                        function($x) {
                                                            return !preg_match('/^[a-z]+$/', $x);
                                                        }
                                                       )
                                          );
    }

    function allLowercase() {
        return new Conjecture_TextSequence(array_filter($this->tokens,
                                                        function($x) {
                                                            return preg_match('/^[a-z]+$/', $x);
                                                        }
                                                       )
                                          );
    }

    function filterURLs() {
        return new Conjecture_TextSequence(array_filter($this->tokens,
                                                        function($x) {
                                                            return !preg_match('/^https?://.+/', $x);
                                                        }
                                                       )
                                          );
    }

    function allURLs() {
        return new Conjecture_TextSequence(array_filter($this->tokens,
                                                        function($x) {
                                                            return preg_match('/^https?://.+/', $x);
                                                        }
                                                       )
                                          );
    }

    function filterListings() {
        return new Conjecture_TextSequence(array_filter($this->tokens,
                                                        function($x) {
                                                            return !preg_match('/^https?://.+etsy.+/listing/[0-9]+.*/', $x);
                                                        }
                                                       )
                                          );
    }

    function allListings() {
        return new Conjecture_TextSequence(array_filter($this->tokens,
                                                        function($x) {
                                                            return preg_match('/^https?://.+etsy.+/listing/[0-9]+.*/', $x);
                                                        }
                                                       )
                                          );
    }

    function size() {
        return count($this->tokens);
    }

    function stopWordCount() {
        return $this->stopwords()->size();
    }

    function stopWordFraq($bins = 10.0) {
        return floor(round($bins*$this->stopWordCount()/$this->size())/$bins);
    }

    function badWordCount() {
        return $this->badwords()->size();
    }

    function badWordFraq($bins = 10.0) {
        return floor(round($bins*$this->badWordCount()/$this->size())/$bins);
    }

    function capsCount() {
        return $this->allCaps()->size();
    }

    function capFraq($bins = 10.0) {
        return floor(round($bins*$this->capsCount()/$this->size())/$bins);
    }

    function urlCount() {
        return $this->allURLs()->size();
    }

    function urlFraq($bins = 10.0) {
        return floor(round($bins*$this->urlCount()/$this->size())/$bins);
    }

    function listingsCount() {
        return $this->badwords()->size();
    }

    function listingsFraq($bins = 10.0) {
        return floor(round($bins*$this->allListings()/$this->size())/$bins);
    }

    function sizeBin() {
        return floor(log($this->size()));
    }

    // filtering methods (TODO)

    function replaceNumbers($replacement = "_num_") {
        return new Conjecture_TextSequence(array_map(
                                               function($x) use ($replacement) {
                                                   $text = preg_replace("/[0-9]+/", $replacement, $x);
                                                   return preg_replace("/".$replacement."\\s+".$replacement."/", $replacement, $text);
                                               }, $this->tokens));
    }


    function replaceHTMLEscapes($replacement = " ") {
        return new Conjecture_TextSequence(array_map(
                                               function($x) use ($replacement) {
                                                   return preg_replace("/&[^;]+;/", $replacement, $x);
                                               }, $this->tokens));
    }

    function removeHTMLTags() {
        return $this->replaceHTMLTags(" ");
    }

    function replaceHTMLTags($replacement = " ") {
        return new Conjecture_TextSequence(array_map(
                                               function($x) use ($replacement) {
                                                   return preg_replace("/<[^>]+>/", $replacement, $x);
                                               }, $this->tokens));
    }

    function replaceNonAlphaNumeric($replacement = " ") {
        return new Conjecture_TextSequence(array_map(
                                               function($x) use ($replacement) {
                                                   return preg_replace("/[^a-zA-Z0-9\\.\\s\\-]+/", $replacement, $x);
                                               }, $this->tokens));
    }

    function replaceNonAlphaNumericUnderscore($replacement = " ") {
        return new Conjecture_TextSequence(array_map(
                                               function($x) use ($replacement) {
                                                   return preg_replace("/[^a-zA-Z0-9\\.\\s\\-_]+/", $replacement, $x);
                                               }, $this->tokens));
    }

    function replaceNonAlpha($replacement = " ") {
        return new Conjecture_TextSequence(array_map(
                                               function($x) use ($replacement) {
                                                   return preg_replace("/[^a-zA-Z\\.\\s\\-_]+/", $replacement, $x);
                                               }, $this->tokens));
    }

    function collapseHyphens() {
        return new Conjecture_TextSequence(array_map(
                                               function($x) {
                                                   return preg_replace("/--+/", "--", $x);
                                               }, $this->tokens));
    }

    function collapseUnderscores() {
        return new Conjecture_TextSequence(array_map(
                                               function($x) {
                                                   return preg_replace("/__+/", "__", $x);
                                               }, $this->tokens));
    }

    function collapsePeriods() {
        return new Conjecture_TextSequence(array_map(
                                               function($x) {
                                                   return preg_replace("/\.\.+/", "..", $x);
                                               }, $this->tokens));
    }

    function stripPunctuation() {
        return new Conjecture_TextSequence(array_map(
                                               function($x) {
                                                   $temp = preg_replace("^[^A-Za-z0-9]", "", $x);
                                                   return preg_replace("[^A-Za-z0-9]$", "", $temp);
                                               }, $this->tokens));
    }

    // compact any white space
    function collapse() {
        return new Conjecture_TextSequence(array_map(
                                               function($x) {
                                                   return preg_replace("/\\s+/", " ", $x);
                                               }, $this->tokens));
    }

    function rstrip() {
        return new Conjecture_TextSequence(array_map(
                                               function($x) {
                                                   return preg_replace("/^\\s+/", "", $x);
                                               }, $this->tokens));
    }

    function lstrip() {
        return new Conjecture_TextSequence(array_map(
                                               function($x) {
                                                   return preg_replace("/\\s+$/", "", $x);
                                               }, $this->tokens));
    }

    // remove any leading or trailing whitespace
    function strip() {
        return $this->rstrip()->lstrip();
    }

    // clean up any whitespace
    function wsclean() {
        return $this->strip()->collapse();
    }

    // remove any unprintable non-ASCII characters
    function removeUnprintables() {
        return new Conjecture_TextSequence(array_map(
                                               function($x) {
                                                   return preg_replace("/[^\\x20-\\x7E]/", "", $x);
                                               }, $this->tokens));

    }

    function collapseWhitespaceAndPunc() {
        return new Conjecture_TextSequence(array_map(
                                               function($x) {
                                                   $ws = preg_replace("/\\s+/", " ", $x);
                                                   $dh = preg_replace("/[\\-]+/", "-", $ws);
                                                   return preg_replace("/[\\.]+/", ".", $dh);
                                               }, $this->tokens));
    }

    function prependNameSpace($namespace) {
        return new Conjecture_TextSequence(array_map(
                                               function($x) use ($namespace) {
                                                   return $namespace . $x;
                                               }, $this->tokens));
    }

    function toList() {
        return $this->tokens;
    }

    function shingles($n, $whitespace = "_") {
        $str = implode($whitespace, $this->tokens);
        $arr = explode('', $str);

        $shingles = array();
        for ($i = 0; $i < count($arr) - $n; $i++) {
            $shingles[] = implode('', array_slice($arr, $i, $i + $n));
        }

        return new Conjecture_TextSequence($shingles);
    }

    function ngrams($n, $glue = " ") {
        $grams = array();
        for ($i = 0; $i < count($this->tokens) - $n+1; $i++) {
            $grams[] = implode($glue, array_slice($this->tokens, $i, $n));
        }

        return new Conjecture_TextSequence($grams);
    }

    function unigramsAndBigrams($glue = " ") {
      return $this->ngrams(1)->concat($this->ngrams(2, $glue));
    }

    function toInstance() {
        $instance = new Conjecture_Instance();

        foreach ($this->tokens as $token) {
            $instance->addTerm($token);
        }

        return $instance;
    }

    static $stopwordList = array("a","as","able","about","above","according","accordingly","across","actually","after","afterwards","again","against","aint","all","allow","allows","almost","alone","along","already","also","although","always","am","among","amongst","amoungst","amount","an","and","another","any","anybody","anyhow","anyone","anything","anyway","anyways","anywhere","apart","appear","appreciate","appropriate","are","arent","around","as","aside","ask","asking","associated","at","available","away","awfully","b","back","be","became","because","become","becomes","becoming","been","before","beforehand","behind","being","believe","below","beside","besides","best","better","between","beyond","bill","both","bottom","brief","but","by","c","cmon","cs","call","came","can","cant","cannot","cant","cause","causes","certain","certainly","changes","clearly","co","com","come","comes","con","concerning","consequently","consider","considering","contain","containing","contains","corresponding","could","couldnt","couldnt","course","cry","currently","d","de","definitely","describe","described","despite","detail","did","didnt","different","do","does","doesnt","doing","dont","done","down","downwards","due","during","e","each","edu","eg","eight","either","eleven","else","elsewhere","empty","enough","entirely","especially","et","etc","even","ever","every","everybody","everyone","everything","everywhere","ex","exactly","example","except","f","far","few","fifteen","fifth","fify","fill","find","fire","first","five","followed","following","follows","for","former","formerly","forth","forty","found","four","from","front","full","further","furthermore","g","get","gets","getting","give","given","gives","go","goes","going","gone","got","gotten","greetings","h","had","hadnt","happens","hardly","has","hasnt","hasnt","have","havent","having","he","hes","hello","help","hence","her","here","heres","hereafter","hereby","herein","hereupon","hers","herself","hi","him","himself","his","hither","hopefully","how","howbeit","however","hundred","i","id","ill","im","ive","ie","if","ignored","immediate","in","inasmuch","inc","indeed","indicate","indicated","indicates","inner","insofar","instead","interest","into","inward","is","isnt","it","itd","itll","its","its","itself","j","just","k","keep","keeps","kept","know","known","knows","l","last","lately","later","latter","latterly","least","less","lest","let","lets","like","liked","likely","little","look","looking","looks","ltd","m","made","mainly","many","may","maybe","me","mean","meanwhile","merely","might","mill","mine","more","moreover","most","mostly","move","much","must","my","myself","n","name","namely","nd","near","nearly","necessary","need","needs","neither","never","nevertheless","new","next","nine","no","nobody","non","none","noone","nor","normally","not","nothing","novel","now","nowhere","o","obviously","of","off","often","oh","ok","okay","old","on","once","one","ones","only","onto","or","other","others","otherwise","ought","our","ours","ourselves","out","outside","over","overall","own","p","part","particular","particularly","per","perhaps","placed","please","plus","possible","presumably","probably","provides","put","q","que","quite","qv","r","rather","rd","re","really","reasonably","regarding","regardless","regards","relatively","respectively","right","s","said","same","saw","say","saying","says","second","secondly","see","seeing","seem","seemed","seeming","seems","seen","self","selves","sensible","sent","serious","seriously","seven","several","shall","she","should","shouldnt","show","side","since","sincere","six","sixty","so","some","somebody","somehow","someone","something","sometime","sometimes","somewhat","somewhere","soon","sorry","specified","specify","specifying","still","sub","such","sup","sure","system","t","ts","take","taken","tell","ten","tends","th","than","thank","thanks","thanx","that","thats","thats","the","thea","their","theirs","them","themselves","then","thence","there","theres","thereafter","thereby","therefore","therein","theres","thereupon","these","they","theyd","theyll","theyre","theyve","thickv","thin","think","third","this","thorough","thoroughly","those","though","three","through","throughout","thru","thus","to","together","too","took","top","toward","towards","tried","tries","truly","try","trying","twelve","twenty","twice","two","u","un","under","unfortunately","unless","unlikely","until","unto","up","re","werent","what","whats","whatever","when","whence","whenever","where","wheres","whereafter","whereas","whereby","wherein","whereupon","wherever","whether","which","while","whither","who","whos","whoever","whole","whom","whose","why","will","willing","wish","with","within","without","wont","wonder","would","wouldnt","x","y","yes","yet","you","youd","youll","youre","youve","your","yours","yourself","yourselves","z","zero");

    static $badwordList = array("ahole", "arse", "ass", "asshole", "asswipe", "bastard", "batty", "bender", "bitch", "bloody", "bollocks", "boner", "bumboy", "bugger", "coon", "cock", "cocksucker", "cracker", "crap", "cumsucker", "cunt", "damn", "dick", "dildo", "douchebag", "faggot", "fistfucker", "fuck", "fucker", "fuckwit", "fucktwat", "gaylord", "ho", "honky", "jackass", "jism", "joey", "knobcheese", "minge", "minger", "mong", "motherfucker", "munter", "pickle", "piss", "piss", "prick", "pussy", "rimmer", "schmuck", "shit", "slut", "spakka", "spaz", "skank", "taint", "tit", "tool", "tosser", "twat", "whore", "wanker");
}


================================================
FILE: clients/phplib/Conjecture/Vector.php
================================================
<?php 

class Conjecture_Vector {

    protected $vector = null;

    function __construct($array = array()) {
        $this->vector = (array)$array;
    }

    public function dot($rhs) {
        $keys = array_intersect_key($this->vector, $rhs->vector);
        $res = 0.0;

        foreach ($keys as $key => $val) {
            $res += $this->vector[$key] * $rhs->vector[$key];
        }

        return $res;
    }

    public function getParams() {
        return $this->vector;
    }

    public function getParam($k) {
        if (array_key_exists($k, $this->vector)) {
            return $this->vector[$k];
        } else {
            return 0.0;
        }
    }
}


================================================
FILE: data/iris.tsv
================================================
7.0	3.2	4.7	1.4	Iris-versicolor
5.6	3.0	4.1	1.3	Iris-versicolor
5.4	3.4	1.7	0.2	Iris-setosa
5.0	3.0	1.6	0.2	Iris-setosa
6.9	3.2	5.7	2.3	Iris-virginica
4.9	3.0	1.4	0.2	Iris-setosa
5.0	2.3	3.3	1.0	Iris-versicolor
5.2	2.7	3.9	1.4	Iris-versicolor
5.1	3.8	1.9	0.4	Iris-setosa
7.2	3.6	6.1	2.5	Iris-virginica
4.8	3.4	1.6	0.2	Iris-setosa
6.0	2.9	4.5	1.5	Iris-versicolor
5.8	2.6	4.0	1.2	Iris-versicolor
5.7	2.6	3.5	1.0	Iris-versicolor
5.9	3.0	4.2	1.5	Iris-versicolor
5.5	2.3	4.0	1.3	Iris-versicolor
4.6	3.2	1.4	0.2	Iris-setosa
6.3	2.8	5.1	1.5	Iris-virginica
6.3	3.3	6.0	2.5	Iris-virginica
6.9	3.1	4.9	1.5	Iris-versicolor
6.7	3.3	5.7	2.5	Iris-virginica
5.1	3.7	1.5	0.4	Iris-setosa
6.7	3.3	5.7	2.1	Iris-virginica
5.8	2.8	5.1	2.4	Iris-virginica
6.0	3.4	4.5	1.6	Iris-versicolor
5.4	3.0	4.5	1.5	Iris-versicolor
5.5	3.5	1.3	0.2	Iris-setosa
5.0	3.3	1.4	0.2	Iris-setosa
5.7	4.4	1.5	0.4	Iris-setosa
5.3	3.7	1.5	0.2	Iris-setosa
5.2	3.5	1.5	0.2	Iris-setosa
6.5	2.8	4.6	1.5	Iris-versicolor
7.4	2.8	6.1	1.9	Iris-virginica
4.9	3.1	1.5	0.2	Iris-setosa
5.0	3.2	1.2	0.2	Iris-setosa
7.7	2.8	6.7	2.0	Iris-virginica
4.8	3.4	1.9	0.2	Iris-setosa
6.5	3.0	5.2	2.0	Iris-virginica
6.3	2.5	5.0	1.9	Iris-virginica
6.4	3.1	5.5	1.8	Iris-virginica
5.8	2.7	5.1	1.9	Iris-virginica
7.1	3.0	5.9	2.1	Iris-virginica
5.7	2.5	5.0	2.0	Iris-virginica
6.4	2.8	5.6	2.2	Iris-virginica
6.4	3.2	4.5	1.5	Iris-versicolor
6.1	2.6	5.6	1.4	Iris-virginica
4.8	3.0	1.4	0.1	Iris-setosa
5.6	2.8	4.9	2.0	Iris-virginica
6.0	2.2	5.0	1.5	Iris-virginica
5.0	3.5	1.3	0.3	Iris-setosa
5.5	2.6	4.4	1.2	Iris-versicolor
5.0	3.6	1.4	0.2	Iris-setosa
5.0	3.4	1.6	0.4	Iris-setosa
6.3	2.7	4.9	1.8	Iris-virginica
6.7	3.1	4.7	1.5	Iris-versicolor
6.3	2.5	4.9	1.5	Iris-versicolor
4.5	2.3	1.3	0.3	Iris-setosa
6.8	3.2	5.9	2.3	Iris-virginica
7.2	3.2	6.0	1.8	Iris-virginica
5.5	2.4	3.8	1.1	Iris-versicolor
5.8	2.7	5.1	1.9	Iris-virginica
6.1	2.8	4.0	1.3	Iris-versicolor
6.3	2.9	5.6	1.8	Iris-virginica
6.1	2.9	4.7	1.4	Iris-versicolor
6.3	2.3	4.4	1.3	Iris-versicolor
4.6	3.4	1.4	0.3	Iris-setosa
5.5	4.2	1.4	0.2	Iris-setosa
6.5	3.0	5.5	1.8	Iris-virginica
6.7	3.1	4.4	1.4	Iris-versicolor
6.6	2.9	4.6	1.3	Iris-versicolor
5.9	3.0	5.1	1.8	Iris-virginica
6.4	2.7	5.3	1.9	Iris-virginica
5.6	2.5	3.9	1.1	Iris-versicolor
6.4	3.2	5.3	2.3	Iris-virginica
5.7	3.8	1.7	0.3	Iris-setosa
7.2	3.0	5.8	1.6	Iris-virginica
6.7	3.0	5.2	2.3	Iris-virginica
4.6	3.1	1.5	0.2	Iris-setosa
5.6	2.9	3.6	1.3	Iris-versicolor
6.4	2.9	4.3	1.3	Iris-versicolor
5.1	3.5	1.4	0.2	Iris-setosa
7.6	3.0	6.6	2.1	Iris-virginica
5.7	2.8	4.1	1.3	Iris-versicolor
5.6	2.7	4.2	1.3	Iris-versicolor
5.7	2.9	4.2	1.3	Iris-versicolor
5.4	3.7	1.5	0.2	Iris-setosa
6.4	2.8	5.6	2.1	Iris-virginica
4.6	3.6	1.0	0.2	Iris-setosa
4.4	2.9	1.4	0.2	Iris-setosa
4.4	3.2	1.3	0.2	Iris-setosa
6.2	3.4	5.4	2.3	Iris-virginica
6.3	3.4	5.6	2.4	Iris-virginica
6.8	2.8	4.8	1.4	Iris-versicolor
5.1	3.4	1.5	0.2	Iris-setosa
6.1	3.0	4.9	1.8	Iris-virginica
5.7	3.0	4.2	1.2	Iris-versicolor
5.0	3.4	1.5	0.2	Iris-setosa
5.0	3.5	1.6	0.6	Iris-setosa
7.7	3.8	6.7	2.2	Iris-virginica
4.9	3.1	1.5	0.1	Iris-setosa
6.0	2.2	4.0	1.0	Iris-versicolor
6.8	3.0	5.5	2.1	Iris-virginica
5.1	2.5	3.0	1.1	Iris-versicolor
6.5	3.2	5.1	2.0	Iris-virginica
4.7	3.2	1.3	0.2	Iris-setosa
6.6	3.0	4.4	1.4	Iris-versicolor
6.7	3.0	5.0	1.7	Iris-versicolor
4.8	3.0	1.4	0.3	Iris-setosa
5.1	3.8	1.5	0.3	Iris-setosa
7.7	2.6	6.9	2.3	Iris-virginica
5.1	3.8	1.6	0.2	Iris-setosa
5.0	2.0	3.5	1.0	Iris-versicolor
7.7	3.0	6.1	2.3	Iris-virginica
6.5	3.0	5.8	2.2	Iris-virginica
5.8	4.0	1.2	0.2	Iris-setosa
5.4	3.4	1.5	0.4	Iris-setosa
6.2	2.2	4.5	1.5	Iris-versicolor
5.7	2.8	4.5	1.3	Iris-versicolor
5.5	2.5	4.0	1.3	Iris-versicolor
7.3	2.9	6.3	1.8	Iris-virginica
5.6	3.0	4.5	1.5	Iris-versicolor
6.2	2.8	4.8	1.8	Iris-virginica
4.3	3.0	1.1	0.1	Iris-setosa
5.8	2.7	3.9	1.2	Iris-versicolor
7.9	3.8	6.4	2.0	Iris-virginica
6.2	2.9	4.3	1.3	Iris-versicolor
4.9	2.5	4.5	1.7	Iris-virginica
4.9	3.6	1.4	0.1	Iris-setosa
5.2	3.4	1.4	0.2	Iris-setosa
6.0	2.7	5.1	1.6	Iris-versicolor
6.9	3.1	5.4	2.1	Iris-virginica
4.8	3.1	1.6	0.2	Iris-setosa
6.7	3.1	5.6	2.4	Iris-virginica
6.3	3.3	4.7	1.6	Iris-versicolor
5.2	4.1	1.5	0.1	Iris-setosa
5.4	3.9	1.3	0.4	Iris-setosa
4.9	2.4	3.3	1.0	Iris-versicolor
5.5	2.4	3.7	1.0	Iris-versicolor
5.1	3.5	1.4	0.3	Iris-setosa
6.1	3.0	4.6	1.4	Iris-versicolor
5.1	3.3	1.7	0.5	Iris-setosa
4.4	3.0	1.3	0.2	Iris-setosa
5.9	3.2	4.8	1.8	Iris-versicolor
4.7	3.2	1.6	0.2	Iris-setosa
6.9	3.1	5.1	2.3	Iris-virginica
5.4	3.9	1.7	0.4	Iris-setosa
5.8	2.7	4.1	1.0	Iris-versicolor
6.1	2.8	4.7	1.2	Iris-versicolor
6.0	3.0	4.8	1.8	Iris-virginica
6.7	2.5	5.8	1.8	Iris-virginica


================================================
FILE: project/build.properties
================================================
sbt.version=0.13.9


================================================
FILE: project/plugins.sbt
================================================
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.13.0")

addSbtPlugin("no.arktekk.sbt" % "aether-deploy" % "0.14")

addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "0.2.1")

addSbtPlugin("com.typesafe.sbt" % "sbt-pgp" % "0.8.3")

================================================
FILE: sbt
================================================
#!/usr/bin/env bash
#
# A more capable sbt runner, coincidentally also called sbt.
# Author: Paul Phillips <paulp@improving.org>

# todo - make this dynamic
declare -r sbt_release_version="0.13.8"
declare -r sbt_unreleased_version="0.13.9-M1"
declare -r buildProps="project/build.properties"

declare sbt_jar sbt_dir sbt_create sbt_version
declare scala_version sbt_explicit_version
declare verbose noshare batch trace_level log_level
declare sbt_saved_stty debugUs

echoerr () { echo >&2 "$@"; }
vlog ()    { [[ -n "$verbose" ]] && echoerr "$@"; }

# spaces are possible, e.g. sbt.version = 0.13.0
build_props_sbt () {
  [[ -r "$buildProps" ]] && \
    grep '^sbt\.version' "$buildProps" | tr '=\r' ' ' | awk '{ print $2; }'
}

update_build_props_sbt () {
  local ver="$1"
  local old="$(build_props_sbt)"

  [[ -r "$buildProps" ]] && [[ "$ver" != "$old" ]] && {
    perl -pi -e "s/^sbt\.version\b.*\$/sbt.version=${ver}/" "$buildProps"
    grep -q '^sbt.version[ =]' "$buildProps" || printf "\nsbt.version=%s\n" "$ver" >> "$buildProps"

    vlog "!!!"
    vlog "!!! Updated file $buildProps setting sbt.version to: $ver"
    vlog "!!! Previous value was: $old"
    vlog "!!!"
  }
}

set_sbt_version () {
  sbt_version="${sbt_explicit_version:-$(build_props_sbt)}"
  [[ -n "$sbt_version" ]] || sbt_version=$sbt_release_version
  export sbt_version
}

# restore stty settings (echo in particular)
onSbtRunnerExit() {
  [[ -n "$sbt_saved_stty" ]] || return
  vlog ""
  vlog "restoring stty: $sbt_saved_stty"
  stty "$sbt_saved_stty"
  unset sbt_saved_stty
}

# save stty and trap exit, to ensure echo is reenabled if we are interrupted.
trap onSbtRunnerExit EXIT
sbt_saved_stty="$(stty -g 2>/dev/null)"
vlog "Saved stty: $sbt_saved_stty"

# this seems to cover the bases on OSX, and someone will
# have to tell me about the others.
get_script_path () {
  local path="$1"
  [[ -L "$path" ]] || { echo "$path" ; return; }

  local target="$(readlink "$path")"
  if [[ "${target:0:1}" == "/" ]]; then
    echo "$target"
  else
    echo "${path%/*}/$target"
  fi
}

die() {
  echo "Aborting: $@"
  exit 1
}

make_url () {
  version="$1"

  case "$version" in
        0.7.*) echo "http://simple-build-tool.googlecode.com/files/sbt-launch-0.7.7.jar" ;;
      0.10.* ) echo "$sbt_launch_repo/org.scala-tools.sbt/sbt-launch/$version/sbt-launch.jar" ;;
    0.11.[12]) echo "$sbt_launch_repo/org.scala-tools.sbt/sbt-launch/$version/sbt-launch.jar" ;;
            *) echo "$sbt_launch_repo/org.scala-sbt/sbt-launch/$version/sbt-launch.jar" ;;
  esac
}

init_default_option_file () {
  local overriding_var="${!1}"
  local default_file="$2"
  if [[ ! -r "$default_file" && "$overriding_var" =~ ^@(.*)$ ]]; then
    local envvar_file="${BASH_REMATCH[1]}"
    if [[ -r "$envvar_file" ]]; then
      default_file="$envvar_file"
    fi
  fi
  echo "$default_file"
}

declare -r cms_opts="-XX:+CMSClassUnloadingEnabled -XX:+UseConcMarkSweepGC"
declare -r jit_opts="-XX:ReservedCodeCacheSize=256m -XX:+TieredCompilation"
declare -r default_jvm_opts_common="-Xms512m -Xmx1536m -Xss2m $jit_opts $cms_opts"
declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy"
declare -r latest_28="2.8.2"
declare -r latest_29="2.9.3"
declare -r latest_210="2.10.5"
declare -r latest_211="2.11.7"

declare -r script_path="$(get_script_path "$BASH_SOURCE")"
declare -r script_name="${script_path##*/}"

# some non-read-onlies set with defaults
declare java_cmd="java"
declare sbt_opts_file="$(init_default_option_file SBT_OPTS .sbtopts)"
declare jvm_opts_file="$(init_default_option_file JVM_OPTS .jvmopts)"
declare sbt_launch_repo="http://repo.typesafe.com/typesafe/ivy-releases"

# pull -J and -D options to give to java.
declare -a residual_args
declare -a java_args
declare -a scalac_args
declare -a sbt_commands

# args to jvm/sbt via files or environment variables
declare -a extra_jvm_opts extra_sbt_opts

addJava () {
  vlog "[addJava] arg = '$1'"
  java_args+=("$1")
}
addSbt () {
  vlog "[addSbt] arg = '$1'"
  sbt_commands+=("$1")
}
setThisBuild () {
  vlog "[addBuild] args = '$@'"
  local key="$1" && shift
  addSbt "set $key in ThisBuild := $@"
}
addScalac () {
  vlog "[addScalac] arg = '$1'"
  scalac_args+=("$1")
}
addResidual () {
  vlog "[residual] arg = '$1'"
  residual_args+=("$1")
}
addResolver () {
  addSbt "set resolvers += $1"
}
addDebugger () {
  addJava "-Xdebug"
  addJava "-Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=$1"
}
setScalaVersion () {
  [[ "$1" == *"-SNAPSHOT" ]] && addResolver 'Resolver.sonatypeRepo("snapshots")'
  addSbt "++ $1"
}
setJavaHome () {
  java_cmd="$1/bin/java"
  setThisBuild javaHome "Some(file(\"$1\"))"
  export JAVA_HOME="$1"
  export JDK_HOME="$1"
  export PATH="$JAVA_HOME/bin:$PATH"
}
setJavaHomeQuietly () {
  addSbt warn
  setJavaHome "$1"
  addSbt info
}

# if set, use JDK_HOME/JAVA_HOME over java found in path
if [[ -e "$JDK_HOME/lib/tools.jar" ]]; then
  setJavaHomeQuietly "$JDK_HOME"
elif [[ -e "$JAVA_HOME/bin/java" ]]; then
  setJavaHomeQuietly "$JAVA_HOME"
fi

# directory to store sbt launchers
declare sbt_launch_dir="$HOME/.sbt/launchers"
[[ -d "$sbt_launch_dir" ]] || mkdir -p "$sbt_launch_dir"
[[ -w "$sbt_launch_dir" ]] || sbt_launch_dir="$(mktemp -d -t sbt_extras_launchers.XXXXXX)"

java_version () {
  local version=$("$java_cmd" -version 2>&1 | grep -E -e '(java|openjdk) version' | awk '{ print $3 }' | tr -d \")
  vlog "Detected Java version: $version"
  echo "${version:2:1}"
}

# MaxPermSize critical on pre-8 jvms but incurs noisy warning on 8+
default_jvm_opts () {
  local v="$(java_version)"
  if [[ $v -ge 8 ]]; then
    echo "$default_jvm_opts_common"
  else
    echo "-XX:MaxPermSize=384m $default_jvm_opts_common"
  fi
}

build_props_scala () {
  if [[ -r "$buildProps" ]]; then
    versionLine="$(grep '^build.scala.versions' "$buildProps")"
    versionString="${versionLine##build.scala.versions=}"
    echo "${versionString%% .*}"
  fi
}

execRunner () {
  # print the arguments one to a line, quoting any containing spaces
  vlog "# Executing command line:" && {
    for arg; do
      if [[ -n "$arg" ]]; then
        if printf "%s\n" "$arg" | grep -q ' '; then
          printf >&2 "\"%s\"\n" "$arg"
        else
          printf >&2 "%s\n" "$arg"
        fi
      fi
    done
    vlog ""
  }

  [[ -n "$batch" ]] && exec </dev/null
  exec "$@"
}

jar_url () {
  make_url "$1"
}

jar_file () {
  echo "$sbt_launch_dir/$1/sbt-launch.jar"
}

download_url () {
  local url="$1"
  local jar="$2"

  echoerr "Downloading sbt launcher for $sbt_version:"
  echoerr "  From  $url"
  echoerr "    To  $jar"

  mkdir -p "${jar%/*}" && {
    if which curl >/dev/null; then
      curl --fail --silent --location "$url" --output "$jar"
    elif which wget >/dev/null; then
      wget --quiet -O "$jar" "$url"
    fi
  } && [[ -r "$jar" ]]
}

acquire_sbt_jar () {
  sbt_url="$(jar_url "$sbt_version")"
  sbt_jar="$(jar_file "$sbt_version")"

  [[ -r "$sbt_jar" ]] || download_url "$sbt_url" "$sbt_jar"
}

usage () {
  cat <<EOM
Usage: $script_name [options]

Note that options which are passed along to sbt begin with -- whereas
options to this runner use a single dash. Any sbt command can be scheduled
to run first by prefixing the command with --, so --warn, --error and so on
are not special.

Output filtering: if there is a file in the home directory called .sbtignore
and this is not an interactive sbt session, the file is treated as a list of
bash regular expressions. Output lines which match any regex are not echoed.
One can see exactly which lines would have been suppressed by starting this
runner with the -x option.

  -h | -help         print this message
  -v                 verbose operation (this runner is chattier)
  -d, -w, -q         aliases for --debug, --warn, --error (q means quiet)
  -x                 debug this script
  -trace <level>     display stack traces with a max of <level> frames (default: -1, traces suppressed)
  -debug-inc         enable debugging log for the incremental compiler
  -no-colors         disable ANSI color codes
  -sbt-create        start sbt even if current directory contains no sbt project
  -sbt-dir   <path>  path to global settings/plugins directory (default: ~/.sbt/<version>)
  -sbt-boot  <path>  path to shared boot directory (default: ~/.sbt/boot in 0.11+)
  -ivy       <path>  path to local Ivy repository (default: ~/.ivy2)
  -no-share          use all local caches; no sharing
  -offline           put sbt in offline mode
  -jvm-debug <port>  Turn on JVM debugging, open at the given port.
  -batch             Disable interactive mode
  -prompt <expr>     Set the sbt prompt; in expr, 's' is the State and 'e' is Extracted

  # sbt version (default: sbt.version from $buildProps if present, otherwise $sbt_release_version)
  -sbt-force-latest         force the use of the latest release of sbt: $sbt_release_version
  -sbt-version  <version>   use the specified version of sbt (default: $sbt_release_version)
  -sbt-dev                  use the latest pre-release version of sbt: $sbt_unreleased_version
  -sbt-jar      <path>      use the specified jar as the sbt launcher
  -sbt-launch-dir <path>    directory to hold sbt launchers (default: ~/.sbt/launchers)
  -sbt-launch-repo <url>    repo url for downloading sbt launcher jar (default: $sbt_launch_repo)

  # scala version (default: as chosen by sbt)
  -28                       use $latest_28
  -29                       use $latest_29
  -210                      use $latest_210
  -211                      use $latest_211
  -scala-home <path>        use the scala build at the specified directory
  -scala-version <version>  use the specified version of scala
  -binary-version <version> use the specified scala version when searching for dependencies

  # java version (default: java from PATH, currently $(java -version 2>&1 | grep version))
  -java-home <path>         alternate JAVA_HOME

  # passing options to the jvm - note it does NOT use JAVA_OPTS due to pollution
  # The default set is used if JVM_OPTS is unset and no -jvm-opts file is found
  <default>        $(default_jvm_opts)
  JVM_OPTS         environment variable holding either the jvm args directly, or
                   the reference to a file containing jvm args if given path is prepended by '@' (e.g. '@/etc/jvmopts')
                   Note: "@"-file is overridden by local '.jvmopts' or '-jvm-opts' argument.
  -jvm-opts <path> file containing jvm args (if not given, .jvmopts in project root is used if present)
  -Dkey=val        pass -Dkey=val directly to the jvm
  -J-X             pass option -X directly to the jvm (-J is stripped)

  # passing options to sbt, OR to this runner
  SBT_OPTS         environment variable holding either the sbt args directly, or
                   the reference to a file containing sbt args if given path is prepended by '@' (e.g. '@/etc/sbtopts')
                   Note: "@"-file is overridden by local '.sbtopts' or '-sbt-opts' argument.
  -sbt-opts <path> file containing sbt args (if not given, .sbtopts in project root is used if present)
  -S-X             add -X to sbt's scalacOptions (-S is stripped)
EOM
}

process_args ()
{
  require_arg () {
    local type="$1"
    local opt="$2"
    local arg="$3"

    if [[ -z "$arg" ]] || [[ "${arg:0:1}" == "-" ]]; then
      die "$opt requires <$type> argument"
    fi
  }
  while [[ $# -gt 0 ]]; do
    case "$1" in
          -h|-help) usage; exit 1 ;;
                -v) verbose=true && shift ;;
                -d) addSbt "--debug" && addSbt debug && shift ;;
                -w) addSbt "--warn"  && addSbt warn  && shift ;;
                -q) addSbt "--error" && addSbt error && shift ;;
                -x) debugUs=true && shift ;;
            -trace) require_arg integer "$1" "$2" && trace_level="$2" && shift 2 ;;
              -ivy) require_arg path "$1" "$2" && addJava "-Dsbt.ivy.home=$2" && shift 2 ;;
        -no-colors) addJava "-Dsbt.log.noformat=true" && shift ;;
         -no-share) noshare=true && shift ;;
         -sbt-boot) require_arg path "$1" "$2" && addJava "-Dsbt.boot.directory=$2" && shift 2 ;;
          -sbt-dir) require_arg path "$1" "$2" && sbt_dir="$2" && shift 2 ;;
        -debug-inc) addJava "-Dxsbt.inc.debug=true" && shift ;;
          -offline) addSbt "set offline := true" && shift ;;
        -jvm-debug) require_arg port "$1" "$2" && addDebugger "$2" && shift 2 ;;
            -batch) batch=true && shift ;;
           -prompt) require_arg "expr" "$1" "$2" && setThisBuild shellPrompt "(s => { val e = Project.extract(s) ; $2 })" && shift 2 ;;

       -sbt-create) sbt_create=true && shift ;;
          -sbt-jar) require_arg path "$1" "$2" && sbt_jar="$2" && shift 2 ;;
      -sbt-version) require_arg version "$1" "$2" && sbt_explicit_version="$2" && shift 2 ;;
 -sbt-force-latest) sbt_explicit_version="$sbt_release_version" && shift ;;
          -sbt-dev) sbt_explicit_version="$sbt_unreleased_version" && shift ;;
   -sbt-launch-dir) require_arg path "$1" "$2" && sbt_launch_dir="$2" && shift 2 ;;
  -sbt-launch-repo) require_arg path "$1" "$2" && sbt_launch_repo="$2" && shift 2 ;;
    -scala-version) require_arg version "$1" "$2" && setScalaVersion "$2" && shift 2 ;;
   -binary-version) require_arg version "$1" "$2" && setThisBuild scalaBinaryVersion "\"$2\"" && shift 2 ;;
       -scala-home) require_arg path "$1" "$2" && setThisBuild scalaHome "Some(file(\"$2\"))" && shift 2 ;;
        -java-home) require_arg path "$1" "$2" && setJavaHome "$2" && shift 2 ;;
         -sbt-opts) require_arg path "$1" "$2" && sbt_opts_file="$2" && shift 2 ;;
         -jvm-opts) require_arg path "$1" "$2" && jvm_opts_file="$2" && shift 2 ;;

               -D*) addJava "$1" && shift ;;
               -J*) addJava "${1:2}" && shift ;;
               -S*) addScalac "${1:2}" && shift ;;
               -28) setScalaVersion "$latest_28" && shift ;;
               -29) setScalaVersion "$latest_29" && shift ;;
              -210) setScalaVersion "$latest_210" && shift ;;
              -211) setScalaVersion "$latest_211" && shift ;;

           --debug) addSbt debug && addResidual "$1" && shift ;;
            --warn) addSbt warn  && addResidual "$1" && shift ;;
           --error) addSbt error && addResidual "$1" && shift ;;
                 *) addResidual "$1" && shift ;;
    esac
  done
}

# process the direct command line arguments
process_args "$@"

# skip #-styled comments and blank lines
readConfigFile() {
  while read line; do
    [[ $line =~ ^# ]] || [[ -z $line ]] || echo "$line"
  done < "$1"
}

# if there are file/environment sbt_opts, process again so we
# can supply args to this runner
if [[ -r "$sbt_opts_file" ]]; then
  vlog "Using sbt options defined in file $sbt_opts_file"
  while read opt; do extra_sbt_opts+=("$opt"); done < <(readConfigFile "$sbt_opts_file")
elif [[ -n "$SBT_OPTS" && ! ("$SBT_OPTS" =~ ^@.*) ]]; then
  vlog "Using sbt options defined in variable \$SBT_OPTS"
  extra_sbt_opts=( $SBT_OPTS )
else
  vlog "No extra sbt options have been defined"
fi

[[ -n "${extra_sbt_opts[*]}" ]] && process_args "${extra_sbt_opts[@]}"

# reset "$@" to the residual args
set -- "${residual_args[@]}"
argumentCount=$#

# set sbt version
set_sbt_version

# only exists in 0.12+
setTraceLevel() {
  case "$sbt_version" in
    "0.7."* | "0.10."* | "0.11."* ) echoerr "Cannot set trace level in sbt version $sbt_version" ;;
                                 *) setThisBuild traceLevel $trace_level ;;
  esac
}

# set scalacOptions if we were given any -S opts
[[ ${#scalac_args[@]} -eq 0 ]] || addSbt "set scalacOptions in ThisBuild += \"${scalac_args[@]}\""

# Update build.properties on disk to set explicit version - sbt gives us no choice
[[ -n "$sbt_explicit_version" ]] && update_build_props_sbt "$sbt_explicit_version"
vlog "Detected sbt version $sbt_version"

[[ -n "$scala_version" ]] && vlog "Overriding scala version to $scala_version"

# no args - alert them there's stuff in here
(( argumentCount > 0 )) || {
  vlog "Starting $script_name: invoke with -help for other options"
  residual_args=( shell )
}

# verify this is an sbt dir or -create was given
[[ -r ./build.sbt || -d ./project || -n "$sbt_create" ]] || {
  cat <<EOM
$(pwd) doesn't appear to be an sbt project.
If you want to start sbt anyway, run:
  $0 -sbt-create

EOM
  exit 1
}

# pick up completion if present; todo
[[ -r .sbt_completion.sh ]] && source .sbt_completion.sh

# no jar? download it.
[[ -r "$sbt_jar" ]] || acquire_sbt_jar || {
  # still no jar? uh-oh.
  echo "Download failed. Obtain the jar manually and place it at $sbt_jar"
  exit 1
}

if [[ -n "$noshare" ]]; then
  for opt in ${noshare_opts}; do
    addJava "$opt"
  done
else
  case "$sbt_version" in
    "0.7."* | "0.10."* | "0.11."* | "0.12."* )
      [[ -n "$sbt_dir" ]] || {
        sbt_dir="$HOME/.sbt/$sbt_version"
        vlog "Using $sbt_dir as sbt dir, -sbt-dir to override."
      }
    ;;
  esac

  if [[ -n "$sbt_dir" ]]; then
    addJava "-Dsbt.global.base=$sbt_dir"
  fi
fi

if [[ -r "$jvm_opts_file" ]]; then
  vlog "Using jvm options defined in file $jvm_opts_file"
  while read opt; do extra_jvm_opts+=("$opt"); done < <(readConfigFile "$jvm_opts_file")
elif [[ -n "$JVM_OPTS" && ! ("$JVM_OPTS" =~ ^@.*) ]]; then
  vlog "Using jvm options defined in \$JVM_OPTS variable"
  extra_jvm_opts=( $JVM_OPTS )
else
  vlog "Using default jvm options"
  extra_jvm_opts=( $(default_jvm_opts) )
fi

# traceLevel is 0.12+
[[ -n "$trace_level" ]] && setTraceLevel

main () {
  execRunner "$java_cmd" \
    "${extra_jvm_opts[@]}" \
    "${java_args[@]}" \
    -jar "$sbt_jar" \
    "${sbt_commands[@]}" \
    "${residual_args[@]}"
}

# sbt inserts this string on certain lines when formatting is enabled:
#   val OverwriteLine = "\r\u001BM\u001B[2K"
# ...in order not to spam the console with a million "Resolving" lines.
# Unfortunately that makes it that much harder to work with when
# we're not going to print those lines anyway. We strip that bit of
# line noise, but leave the other codes to preserve color.
mainFiltered () {
  local ansiOverwrite='\r\x1BM\x1B[2K'
  local excludeRegex=$(egrep -v '^#|^$' ~/.sbtignore | paste -sd'|' -)

  echoLine () {
    local line="$1"
    local line1="$(echo "$line" | sed -r 's/\r\x1BM\x1B\[2K//g')"       # This strips the OverwriteLine code.
    local line2="$(echo "$line1" | sed -r 's/\x1B\[[0-9;]*[JKmsu]//g')" # This strips all codes - we test regexes against this.

    if [[ $line2 =~ $excludeRegex ]]; then
      [[ -n $debugUs ]] && echo "[X] $line1"
    else
      [[ -n $debugUs ]] && echo "    $line1" || echo "$line1"
    fi
  }

  echoLine "Starting sbt with output filtering enabled."
  main | while read -r line; do echoLine "$line"; done
}

# Only filter if there's a filter file and we don't see a known interactive command.
# Obviously this is super ad hoc but I don't know how to improve on it. Testing whether
# stdin is a terminal is useless because most of my use cases for this filtering are
# exactly when I'm at a terminal, running sbt non-interactively.
shouldFilter () { [[ -f ~/.sbtignore ]] && ! egrep -q '\b(shell|console|consoleProject)\b' <<<"${residual_args[@]}"; }

# run sbt
if shouldFilter; then mainFiltered; else main; fi


================================================
FILE: src/main/java/com/etsy/conjecture/GenericPair.java
================================================
package com.etsy.conjecture;

/**
 * @author Diane Hu
 */
public class GenericPair<F, S> implements java.io.Serializable {

    private static final long serialVersionUID = 123L;
    public F first;
    public S second;

    /**
     * Class constructor specifying the first and second number to create
     * 
     * @param first
     *            first number
     * @param second
     *            second number
     */

    public GenericPair(F first, S second) {
        this.first = first;
        this.second = second;
    }

    /**
     * The method gets first number
     * 
     * @return first number
     */
    public F getFirst() {
        return first;
    }

    /**
     * The method sets first number
     * 
     * @param fisrt
     *            first number
     */
    public void setFirst(F first) {
        this.first = first;
    }

    /**
     * The method gets second number
     * 
     * @return second number
     */
    public S getSecond() {
        return second;
    }

    /**
     * The method sets second number
     * 
     * @param second
     *            second number
     */
    public void setSecond(S second) {
        this.second = second;
    }

    @Override
    public String toString() {
        return first + "," + second;
    }

    @SuppressWarnings("unchecked")
    public boolean equals(Object o) {
        if (!(o instanceof GenericPair<?, ?>))
            return false;
        GenericPair<F, S> p = (GenericPair<F, S>)o;
        return (p.first).equals(first) && (p.second).equals(second);
    }

    public int hashCode() {
        return 17 + first.hashCode() * 31 + second.hashCode();
    }

}


================================================
FILE: src/main/java/com/etsy/conjecture/PrimitivePair.java
================================================
package com.etsy.conjecture;

/**
 * PrimitivePair is JavaBean
 * 
 * @author Josh Attenberg
 */
public class PrimitivePair implements java.io.Serializable {
    private static final long serialVersionUID = 1234L;
    public double first;
    public double second;

    /**
     * Class constructor specifying the first and second number to create
     * 
     * @param first
     *            first number
     * @param second
     *            second number
     */
    public PrimitivePair(double first, double second) {
        this.first = first;
        this.second = second;
    }

    /**
     * The method gets first number
     * 
     * @return first number
     */
    public double getFirst() {
        return first;
    }

    /**
     * The method sets first number
     * 
     * @param fisrt
     *            first number
     */
    public void setFirst(double fisrt) {
        this.first = fisrt;
    }

    /**
     * The method gets second number
     * 
     * @return second number
     */
    public double getSecond() {
        return second;
    }

    /**
     * The method sets second number
     * 
     * @param second
     *            second number
     */
    public void setSecond(double second) {
        this.second = second;
    }

    @Override
    public String toString() {
        return first + "," + second;
    }

    @Override
    public boolean equals(Object o) {
        if (!(o instanceof PrimitivePair))
            return false;
        PrimitivePair p = (PrimitivePair)o;
        return p.first == first && p.second == second;
    }

    @Override
    public int hashCode() {
        return (17 + Utilities.doubleHash(first)) * 31
                + Utilities.doubleHash(second);
    }

}


================================================
FILE: src/main/java/com/etsy/conjecture/Utilities.java
================================================
package com.etsy.conjecture;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;

import org.apache.commons.lang.StringUtils;
import com.google.common.hash.*;
import com.google.common.collect.Lists;

/**
 * class of static data science utility methods
 * 
 * @author jattenberg
 * 
 */
public class Utilities {

    public static final double SMALL = 1e-10;
    public static final HashFunction HASHER = Hashing.md5();
    public static final double ROOT2 = Math.sqrt(2d);
    public static final double LOG2 = Math.log(2.);

    private Utilities() {
    }

    public static String cleanLine(String line) {
        StringBuffer buffer = new StringBuffer();
        for (int i = 0; i < line.length(); i++) {
            char c = line.charAt(i);
            if (c < 128 && Character.isLetter(c)) {
                buffer.append(c);
            } else {
                buffer.append(' ');
            }
        }
        return buffer.toString().toLowerCase();
    }

    public static String cleanLineRobust(String input, String separator,
            boolean ignoreNumbers) {
        StringBuilder buff = new StringBuilder();
        StringTokenizer tokenizer = new StringTokenizer(input,
                " +.,~\\<>\\$?!:;(){}|" + "\b\t\n\f\r\"\'\\\\/\\=\\&\\%\\_");

        while (tokenizer.hasMoreTokens()) {
            String token = tokenizer.nextToken();
            token = token.replaceAll("-{2,}", "-");
            token = token.replaceAll("^-", "");
            token = token.replaceAll("-$", "");
            if (token.length() < 2
                    || (ignoreNumbers && StringUtils.containsAny(token,
                            "0123456789")))
                continue;
            buff.append(token + separator);
        }
        int index = buff.lastIndexOf(separator);
        if (index >= 0)
            buff.delete(index, buff.length());
        return buff.toString();
    }

    public static String checkNotBlank(String s) {
        if (StringUtils.isBlank(s)) {
            throw new IllegalArgumentException("Argument cannot be blank");
        }
        return s;
    }

    public static List<String> checkNotBlank(List<String> S) {
        for (String s : S)
            checkNotBlank(s);
        return S;
    }

    public static String[] checkNotBlank(String[] S) {
        for (String s : S)
            checkNotBlank(s);
        return S;
    }

    public static double stringInnerProduct(Map<String, Double> coefficients,
            Collection<String> input) {
        double output = 0;
        for (String token : input)
            output += coefficients.containsKey(token) ? coefficients.get(token)
                    : 0;
        return output;
    }

    public static double sigmoid(double operand) {
        return 1. / (1. + Math.exp(-operand));
    }

    /**
     * derivative of the sigmoid function
     */
    public static double dsigmoid(double operand) {
        return Math.exp(operand) / Math.pow(1. + Math.exp(operand), 2.);
    }

    /**
     * returns the strings in input in sorted order
     * 
     * @param input
     * @return
     */
    public static String sortTerms(String input) {
        return sortTerms(input, "\\s+");
    }

    public static String sortTerms(String input, String delim) {
        String[] terms = input.split(delim);
        Arrays.sort(terms);
        return StringUtils.join(terms, delim);
    }

    public final static String cleanText(String tmp, int maxlen) {

        StringTokenizer tok = new StringTokenizer(tmp,
                " +.,~\\<>\\$?!:;(){}|-0123456789\b\t\n\f\r\"\'\\\\/\\=\\&\\%\\_");
        StringBuilder buff = new StringBuilder();
        while (tok.hasMoreTokens()) {
            String out = tok.nextToken();
            if (out.length() < 2 || out.length() > maxlen)
                continue;
            buff.append(out + " ");
        }
        return buff.toString();
    }

    public final static List<String> grams(String input, int[] gramSizes,
            String separator) {
        List<String> out = Lists.newArrayList();
        StringBuilder buff = new StringBuilder();
        String[] tokens = StringUtils.split(input);

        for (int i = 0; i < tokens.length; i++) {
            String token = tokens[i];
            for (int len : gramSizes) {
                if (len > i + 1)
                    continue;
                if (len == 1) {
                    out.add(token);
                    continue;
                }
                buff.setLength(0);

                for (int k = len - 1; k > 0; k--)
                    buff.append(tokens[i - k] + separator);
                buff.append(token);
                out.add(buff.toString());
            }
        }
        return out;
    }

    public static final boolean floatingPointEquals(double a, double b) {
        return (a - b < SMALL) && (b - a < SMALL);
    }

    public static int doubleHash(double d) {
        long t = Double.doubleToLongBits(d);
        return (int)(t ^ (t >>> 32));
    }

    public static double logistic(double x) {
        return 1d / (1 + Math.exp(-x));
    }

    static class ValueComparator<K, V extends Comparable<? super V>> implements
            Comparator<Map.Entry<K, V>> {
        boolean reverse;

        public ValueComparator(boolean reverse) {
            this.reverse = reverse;
        }

        public int compare(Map.Entry<K, V> a, Map.Entry<K, V> b) {
            int res = a.getValue().compareTo(b.getValue());
            return reverse ? -res : res;
        }
    }

    public static <K, V extends Comparable<? super V>> ArrayList<K> orderKeysByValue(
            Map<K, V> map) {
        return orderKeysByValue(map, false);
    }

    public static <K, V extends Comparable<? super V>> ArrayList<K> orderKeysByValue(
            Map<K, V> map, boolean reverse) {
        ArrayList<Map.Entry<K, V>> keys = new ArrayList<Map.Entry<K, V>>();
        keys.addAll(map.entrySet());
        Collections.sort(keys, new ValueComparator<K, V>(reverse));
        ArrayList<K> res = new ArrayList<K>();
        for (int i = 0; i < keys.size(); i++) {
            res.add(keys.get(i).getKey());
        }
        return res;
    }

    public static <K, V extends Comparable<? super V>> List<K> topKeysByValue(
            Map<K, V> map, int n) {
        ArrayList<K> keys = orderKeysByValue(map, true);
        ArrayList<K> res = new ArrayList<K>(n);
        for (int i = 0; i < n && i < keys.size(); i++) {
            res.add(keys.get(i));
        }
        return res;
    }
}


================================================
FILE: src/main/java/com/etsy/conjecture/data/AbstractInstance.java
================================================
package com.etsy.conjecture.data;

import java.util.Collection;
import java.util.List;
import java.util.Map;

public abstract class AbstractInstance<T extends AbstractInstance<T>> {

    protected static final String SEP = "___";
    public String id;
    public String supporting_data;
    protected double weight;

    StringKeyedVector vector;

    public AbstractInstance() {
        this(new StringKeyedVector(), 1.0);
    }

    public AbstractInstance(double weight) {
        this(new StringKeyedVector(), weight);
    }

    public AbstractInstance(StringKeyedVector skv) {
        this(skv, 1.0);
    }

    public AbstractInstance(StringKeyedVector skv, double weight) {
        this.vector = skv;
        this.weight = weight;
    }

    public AbstractInstance(Map<String, Double> map) {
        this(map, 1.0);
    }

    public AbstractInstance(Map<String, Double> map, double weight) {
        this.vector = new StringKeyedVector(map);
        this.weight = weight;
    }

    @SuppressWarnings("unchecked")
    public T setWeight(double weight) {
        this.weight = weight;
        return (T)this;
    }

    public double getWeight() {
        return weight;
    }

    public String getId() {
        return id;
    }

    public StringKeyedVector getVector() {
        return vector;
    }

    public void setSupportingData(String s) {
        supporting_data = s;
    }

    public String getSupportingData() {
        return supporting_data;
    }

    @SuppressWarnings("unchecked")
    public T setCoordinate(String id, double value) {
        vector.setCoordinate(id, value);
        return (T)this;
    }

    @SuppressWarnings("unchecked")
    public T addToCoordinate(String id, double value) {
        vector.addToCoordinate(id, value);
        return (T)this;
    }

    @SuppressWarnings("unchecked")
    public T setId(String id) {
        this.id = id;
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.etsy.conjecture.data.InstanceInterface#addTerm(java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T addTerm(String term) {
        addTerm(term, 1.);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.etsy.conjecture.data.InstanceInterface#addTerm(java.lang.String,
     * double)
     */

    @SuppressWarnings("unchecked")
    public T addTerm(String term, double featureWeight) {
        addToCoordinate(term, featureWeight);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#addTermWithNamespace(java.
     * lang.String, java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T addTermWithNamespace(String term, String namespace) {
        addTermWithNamespace(term, namespace, 1);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#addTermWithNamespace(java.
     * lang.String, java.lang.String, double)
     */

    @SuppressWarnings("unchecked")
    public T addTermWithNamespace(String term, String namespace,
            double featureWeight) {
        addToCoordinate(namespace + SEP + term, featureWeight);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#addTerms(java.util.Collection,
     * double)
     */

    @SuppressWarnings("unchecked")
    public T addTerms(Collection<String> terms, double featureWeight) {
        for (String term : terms) {
            addToCoordinate(term, featureWeight);
        }
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#addTerms(java.util.Collection)
     */

    @SuppressWarnings("unchecked")
    public T addTerms(Collection<String> terms) {
        addTerms(terms, 1.);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#addTermsWithNamespace(java
     * .util.Collection, java.lang.String, double)
     */

    @SuppressWarnings("unchecked")
    public T addTermsWithNamespace(Collection<String> terms, String namespace,
            double featureWeight) {
        for (String term : terms) {
            addTermWithNamespace(term, namespace, featureWeight);
        }
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#addTermsWithNamespace(java
     * .util.Collection, java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T addTermsWithNamespace(Collection<String> terms, String namespace) {
        addTermsWithNamespace(terms, namespace, 1.);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#addTerms(java.lang.String[],
     * double)
     */

    @SuppressWarnings("unchecked")
    public T addTerms(String[] terms, double featureWeight) {
        for (String term : terms) {
            addToCoordinate(term, featureWeight);
        }
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#addTerms(java.lang.String[])
     */

    @SuppressWarnings("unchecked")
    public T addTerms(String[] terms) {
        addTerms(terms, 1.);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#addTermsWithNamespace(java
     * .lang.String[], java.lang.String, double)
     */

    @SuppressWarnings("unchecked")
    public T addTermsWithNamespace(String[] terms, String namespace,
            double featureWeight) {
        for (String term : terms) {
            addTermWithNamespace(term, namespace, featureWeight);
        }
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#addTermsWithNamespace(java
     * .lang.String[], java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T addTermsWithNamespace(String[] terms, String namespace) {
        addTermsWithNamespace(terms, namespace, 1.);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#addTermsWithWeights(java.util
     * .Map)
     */

    @SuppressWarnings("unchecked")
    public T addTermsWithWeights(Map<String, Double> termsWithWeights) {
        for (String term : termsWithWeights.keySet()) {
            addTerm(term, termsWithWeights.get(term));
        }
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#addTermsWithWeightsWithNamespace
     * (java.util.Map, java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T addTermsWithWeightsWithNamespace(
            Map<String, Double> termsWithWeights, String namespace) {
        for (String term : termsWithWeights.keySet()) {
            addTermWithNamespace(term, namespace, termsWithWeights.get(term));
        }
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#addNumericArrayWithNamespace
     * (double[], java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T addNumericArrayWithNamespace(double[] array, String namespace) {
        for (int i = 0; i < array.length; i++) {
            addToCoordinate(namespace + SEP + i, array[i]);
        }
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.etsy.conjecture.data.InstanceInterface#addNumericArray(double[])
     */

    @SuppressWarnings("unchecked")
    public T addNumericArray(double[] array) {
        for (int i = 0; i < array.length; i++) {
            addToCoordinate("" + i, array[i]);
        }
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#addNumericArrayWithNamespace
     * (java.lang.Double[], java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T addNumericArrayWithNamespace(Double[] array, String namespace) {
        for (int i = 0; i < array.length; i++) {
            addToCoordinate(namespace + SEP + i, array[i]);
        }
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#addNumericArray(java.lang.
     * Double[])
     */

    @SuppressWarnings("unchecked")
    public T addNumericArray(Double[] array) {
        for (int i = 0; i < array.length; i++) {
            addToCoordinate("" + i, array[i]);
        }
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#addNumericArrayWithNamespace
     * (java.util.List, java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T addNumericArrayWithNamespace(List<Double> values, String namespace) {
        for (int i = 0; i < values.size(); i++) {
            addToCoordinate(namespace + SEP + i, values.get(i));
        }
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#addNumericArray(java.util.
     * List)
     */

    @SuppressWarnings("unchecked")
    public T addNumericArray(List<Double> values) {
        for (int i = 0; i < values.size(); i++) {
            addToCoordinate("" + i, values.get(i));
        }
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#setNumericArrayWithNamespace
     * (double[], java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T setNumericArrayWithNamespace(double[] array, String namespace) {
        for (int i = 0; i < array.length; i++) {
            addToCoordinate(namespace + SEP + i, array[i]);
        }
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.etsy.conjecture.data.InstanceInterface#setNumericArray(double[])
     */

    @SuppressWarnings("unchecked")
    public T setNumericArray(double[] array) {
        for (int i = 0; i < array.length; i++) {
            addToCoordinate("" + i, array[i]);
        }
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#setNumericArrayWithNamespace
     * (java.lang.Double[], java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T setNumericArrayWithNamespace(Double[] array, String namespace) {
        for (int i = 0; i < array.length; i++) {
            addToCoordinate(namespace + SEP + i, array[i]);
        }
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#setNumericArray(java.lang.
     * Double[])
     */

    @SuppressWarnings("unchecked")
    public T setNumericArray(Double[] array) {
        for (int i = 0; i < array.length; i++) {
            addToCoordinate("" + i, array[i]);
        }
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#setNumericArrayWithNamespace
     * (java.util.List, java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T setNumericArrayWithNamespace(List<Double> values, String namespace) {
        for (int i = 0; i < values.size(); i++) {
            addToCoordinate(namespace + SEP + i, values.get(i));
        }
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#setNumericArray(java.util.
     * List)
     */

    @SuppressWarnings("unchecked")
    public T setNumericArray(List<Double> values) {
        for (int i = 0; i < values.size(); i++) {
            addToCoordinate("" + i, values.get(i));
        }
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.etsy.conjecture.data.InstanceInterface#addIdField(long, double)
     */

    @SuppressWarnings("unchecked")
    public T addIdField(long id, double featureWeight) {
        addToCoordinate("" + id, featureWeight);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.etsy.conjecture.data.InstanceInterface#addIdField(long)
     */

    @SuppressWarnings("unchecked")
    public T addIdField(long id) {
        addIdField(id, 1.);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#addIdFieldWithNamespace(long,
     * double, java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T addIdFieldWithNamespace(long id, double featureWeight,
            String namespace) {
        addToCoordinate(namespace + SEP + id, featureWeight);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#addIdFieldWithNamespace(long,
     * java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T addIdFieldWithNamespace(long id, String namespace) {
        addIdFieldWithNamespace(id, 1., namespace);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.etsy.conjecture.data.InstanceInterface#addIdField(int, double)
     */

    @SuppressWarnings("unchecked")
    public T addIdField(int id, double featureWeight) {
        addToCoordinate("" + id, featureWeight);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.etsy.conjecture.data.InstanceInterface#addIdField(int)
     */

    @SuppressWarnings("unchecked")
    public T addIdField(int id) {
        addIdField(id, 1.);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#addIdFieldWithNamespace(int,
     * double, java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T addIdFieldWithNamespace(int id, double featureWeight,
            String namespace) {
        addToCoordinate(namespace + SEP + id, featureWeight);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#addIdFieldWithNamespace(int,
     * java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T addIdFieldWithNamespace(int id, String namespace) {
        addIdFieldWithNamespace(id, 1., namespace);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.etsy.conjecture.data.InstanceInterface#addIds(long[], double)
     */

    @SuppressWarnings("unchecked")
    public T addIds(long[] ids, double featureWeight) {
        for (long id : ids) {
            addToCoordinate("" + id, featureWeight);
        }
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.etsy.conjecture.data.InstanceInterface#addIds(long[])
     */

    @SuppressWarnings("unchecked")
    public T addIds(long[] ids) {
        addIds(ids, 1.);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.etsy.conjecture.data.InstanceInterface#addIds(int[], double)
     */

    @SuppressWarnings("unchecked")
    public T addIds(int[] ids, double featureWeight) {
        for (long id : ids) {
            addToCoordinate("" + id, featureWeight);
        }
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.etsy.conjecture.data.InstanceInterface#addIds(int[])
     */

    @SuppressWarnings("unchecked")
    public T addIds(int[] ids) {
        addIds(ids, 1.);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#addIds(java.util.Collection,
     * double)
     */

    @SuppressWarnings("unchecked")
    public T addIds(Collection<Integer> ids, double featureWeight) {
        for (long id : ids) {
            addToCoordinate("" + id, featureWeight);
        }
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#addIds(java.util.Collection)
     */

    @SuppressWarnings("unchecked")
    public T addIds(Collection<Integer> ids) {
        addIds(ids, 1.);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#addIdsWithNamespace(long[],
     * double, java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T addIdsWithNamespace(long[] ids, double featureWeight,
            String namespace) {
        for (long id : ids) {
            addToCoordinate(namespace + SEP + id, featureWeight);
        }
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#addIdsWithNamespace(long[],
     * java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T addIdsWithNamespace(long[] ids, String namespace) {
        addIdsWithNamespace(ids, 1., namespace);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#addIdsWithNamespace(int[],
     * double, java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T addIdsWithNamespace(int[] ids, double featureWeight,
            String namespace) {
        for (int id : ids) {
            addToCoordinate(namespace + SEP + id, featureWeight);
        }
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#addIdsWithNamespace(int[],
     * java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T addIdsWithNamespace(int[] ids, String namespace) {
        addIdsWithNamespace(ids, 1., namespace);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#addIdsWithNamespace(java.util
     * .Collection, double, java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T addIdsWithNamespace(Collection<Long> ids, double featureWeight,
            String namespace) {
        for (Long id : ids) {
            addToCoordinate(namespace + SEP + id, featureWeight);
        }
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#addIdsWithNamespace(java.util
     * .Collection, java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T addIdsWithNamespace(Collection<Long> ids, String namespace) {
        addIdsWithNamespace(ids, 1., namespace);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.etsy.conjecture.data.InstanceInterface#setIdField(long, double)
     */

    @SuppressWarnings("unchecked")
    public T setIdField(long id, double featureWeight) {
        addToCoordinate("" + id, featureWeight);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.etsy.conjecture.data.InstanceInterface#setIdField(long)
     */

    @SuppressWarnings("unchecked")
    public T setIdField(long id) {
        setIdField(id, 1.);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#setIdFieldWithNamespace(long,
     * double, java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T setIdFieldWithNamespace(long id, double featureWeight,
            String namespace) {
        addToCoordinate(namespace + SEP + id, featureWeight);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#setIdFieldWithNamespace(long,
     * java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T setIdFieldWithNamespace(long id, String namespace) {
        setIdFieldWithNamespace(id, 1., namespace);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.etsy.conjecture.data.InstanceInterface#setIdField(int, double)
     */

    @SuppressWarnings("unchecked")
    public T setIdField(int id, double featureWeight) {
        addToCoordinate("" + id, featureWeight);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.etsy.conjecture.data.InstanceInterface#setIdField(int)
     */

    @SuppressWarnings("unchecked")
    public T setIdField(int id) {
        setIdField(id, 1.);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#setIdFieldWithNamespace(int,
     * double, java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T setIdFieldWithNamespace(int id, double featureWeight,
            String namespace) {
        addToCoordinate(namespace + SEP + id, featureWeight);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#setIdFieldWithNamespace(int,
     * java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T setIdFieldWithNamespace(int id, String namespace) {
        setIdFieldWithNamespace(id, 1., namespace);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.etsy.conjecture.data.InstanceInterface#setIds(long[], double)
     */

    @SuppressWarnings("unchecked")
    public T setIds(long[] ids, double featureWeight) {
        for (long id : ids) {
            addToCoordinate("" + id, featureWeight);
        }
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.etsy.conjecture.data.InstanceInterface#setIds(long[])
     */

    @SuppressWarnings("unchecked")
    public T setIds(long[] ids) {
        setIds(ids, 1.);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.etsy.conjecture.data.InstanceInterface#setIds(int[], double)
     */

    @SuppressWarnings("unchecked")
    public T setIds(int[] ids, double featureWeight) {
        for (long id : ids) {
            addToCoordinate("" + id, featureWeight);
        }
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.etsy.conjecture.data.InstanceInterface#setIds(int[])
     */

    @SuppressWarnings("unchecked")
    public T setIds(int[] ids) {
        setIds(ids, 1.);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#setIds(java.util.Collection,
     * double)
     */

    @SuppressWarnings("unchecked")
    public T setIds(Collection<Integer> ids, double featureWeight) {
        for (long id : ids) {
            addToCoordinate("" + id, featureWeight);
        }
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#setIds(java.util.Collection)
     */

    @SuppressWarnings("unchecked")
    public T setIds(Collection<Integer> ids) {
        setIds(ids, 1.);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#setIdsWithNamespace(long[],
     * double, java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T setIdsWithNamespace(long[] ids, double featureWeight,
            String namespace) {
        for (long id : ids) {
            addToCoordinate(namespace + SEP + id, featureWeight);
        }
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#setIdsWithNamespace(long[],
     * java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T setIdsWithNamespace(long[] ids, String namespace) {
        setIdsWithNamespace(ids, 1., namespace);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#setIdsWithNamespace(int[],
     * double, java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T setIdsWithNamespace(int[] ids, double featureWeight,
            String namespace) {
        for (int id : ids) {
            addToCoordinate(namespace + SEP + id, featureWeight);
        }
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#setIdsWithNamespace(int[],
     * java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T setIdsWithNamespace(int[] ids, String namespace) {
        setIdsWithNamespace(ids, 1., namespace);
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#setIdsWithNamespace(java.util
     * .Collection, double, java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T setIdsWithNamespace(Collection<Long> ids, double featureWeight,
            String namespace) {
        for (Long id : ids) {
            addToCoordinate(namespace + SEP + id, featureWeight);
        }
        return (T)this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.etsy.conjecture.data.InstanceInterface#setIdsWithNamespace(java.util
     * .Collection, java.lang.String)
     */

    @SuppressWarnings("unchecked")
    public T setIdsWithNamespace(Collection<Long> ids, String namespace) {
        setIdsWithNamespace(ids, 1., namespace);
        return (T)this;
    }

}


================================================
FILE: src/main/java/com/etsy/conjecture/data/BinaryLabel.java
================================================
package com.etsy.conjecture.data;

import static com.google.common.base.Preconditions.checkArgument;

public class BinaryLabel extends RealValuedLabel {

    private static final long serialVersionUID = 1L;

    public BinaryLabel() {
        super(0.0);
    }

    public BinaryLabel(double value) {
        super(checkBinaryValue(value));

    }

    private static double checkBinaryValue(double value) {
        checkArgument(value >= 0 && value <= 1,
                "value must be in [0, 1], given: %s", value);
        return value;
    }

    // {0,+1} -> {-1,+1}
    public double getAsPlusMinus() {
        return 2.0 * (getValue() - 0.5);
    }
}


================================================
FILE: src/main/java/com/etsy/conjecture/data/BinaryLabeledInstance.java
================================================
package com.etsy.conjecture.data;

import java.util.Map;

/**
 * TODO: when using method string all methods return a RealValueLabeledInstance
 * think about how to avoid this while not using generic types
 */
public class BinaryLabeledInstance extends
        AbstractInstance<BinaryLabeledInstance> implements
        LabeledInstance<BinaryLabel> {

    protected BinaryLabel label;

    public BinaryLabel getLabel() {
        return label;
    }

    public BinaryLabeledInstance() {
        this(new BinaryLabel(0.0), 1.0);
    }

    public BinaryLabeledInstance(double label, Map<String, Double> instance) {
        this(new BinaryLabel(label), instance, 1.0);
    }

    public BinaryLabeledInstance(double label, Map<String, Double> instance,
            double weight) {
        this(new BinaryLabel(label), instance, weight);
    }

    public BinaryLabeledInstance(double label, StringKeyedVector vec) {
        this(new BinaryLabel(label), vec.getMap(), 1.0);
    }

    public BinaryLabeledInstance(double label, StringKeyedVector vec,
            double weight) {
        this(new BinaryLabel(label), vec.getMap(), weight);
    }

    public BinaryLabeledInstance(BinaryLabel label, Map<String, Double> instance) {
        this(label, instance, 1.0);
    }

    public BinaryLabeledInstance(BinaryLabel label,
            Map<String, Double> instance, double weight) {
        super(instance, weight);
        this.label = label;
    }

    public BinaryLabeledInstance(BinaryLabel label, StringKeyedVector vec) {
        this(label, vec.getMap(), 1.0);
    }

    public BinaryLabeledInstance(BinaryLabel label, StringKeyedVector vec,
            double weight) {
        this(label, vec.getMap(), weight);
    }

    public BinaryLabeledInstance(double label) {
        this(new BinaryLabel(label), 1.0);
    }

    public BinaryLabeledInstance(double label, double weight) {
        this(new BinaryLabel(label), weight);
    }

    public BinaryLabeledInstance(BinaryLabel label) {
        this(label, 1.0);
    }

    public BinaryLabeledInstance(BinaryLabel label, double weight) {
        super(weight);
        this.label = label;
    }

}


================================================
FILE: src/main/java/com/etsy/conjecture/data/ByteArrayDoubleHashMap.java
================================================
package com.etsy.conjecture.data;

import gnu.trove.function.TDoubleFunction;
import gnu.trove.iterator.TObjectDoubleIterator;
import gnu.trove.map.hash.TObjectDoubleHashMap;

import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.util.AbstractMap;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

import com.esotericsoftware.kryo.Kryo;
import com.esotericsoftware.kryo.KryoSerializable;
import com.esotericsoftware.kryo.io.Input;
import com.esotericsoftware.kryo.io.Output;

public class ByteArrayDoubleHashMap implements Serializable, KryoSerializable,
        Iterable<Map.Entry<String, Double>>, Map<String, Double> {

    private static final long serialVersionUID = -7070522686694887436L;

    // - represent the sparse map by a mapping of coordinate name strings
    // (feature names)
    // to doubles.
    protected TObjectDoubleHashMap<byte[]> map;

    protected String keyEncoding;
    protected float loadFactor;
    protected double defaultValue;

    public ByteArrayDoubleHashMap() {
        this(10, 0.8f, 0.0);
    }

    public ByteArrayDoubleHashMap(int initialCapacity, float loadFactor,
            double defaultValue) {
        this(initialCapacity, loadFactor, "ASCII", defaultValue);
    }

    public ByteArrayDoubleHashMap(int initialCapacity, float loadFactor,
            String keyEncoding, double defaultValue) {
        this.map = new TByteArrayDoubleHashMap(initialCapacity, loadFactor,
                defaultValue);
        this.keyEncoding = keyEncoding;
        this.loadFactor = loadFactor;
        this.defaultValue = defaultValue;
    }

    public String byteArrayToString(byte[] b) {
        try {
            return new String(b, keyEncoding);
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
            return null;
        }
    }

    public byte[] stringToByteArray(String s) {
        try {
            return s.getBytes(keyEncoding);
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
            return null;
        }
    }

    /**
     * Customized trove hashmap which does both: customized hash/equality
     * functions, and also storing the values as a primitive array.
     */
    static class TByteArrayDoubleHashMap extends TObjectDoubleHashMap<byte[]> {
        public TByteArrayDoubleHashMap(int initialSize, float loadFactor,
                double defaultValue) {
            super(initialSize, loadFactor, defaultValue);
        }

        protected int hash(Object obj) {
            return Arrays.hashCode((byte[])obj);
        }

        protected boolean equals(Object a, Object b) {
            return b != null && b != REMOVED
                    && Arrays.equals((byte[])a, (byte[])b);
        }

        // - ovrride this to prevent doubling on resize.
        public double put(byte[] key, double value) {
            int index = insertKey(key);
            double previous = 0.0;
            boolean isNewMapping = true;
            if (index < 0) {
                index = -index - 1;
                previous = _values[index];
                isNewMapping = false;
            }
            _values[index] = value;
            if (isNewMapping) {
                postInsertHook2(consumeFreeSlot);
            }

            return previous;
        }

        protected final void postInsertHook2(boolean usedFreeSlot) {
            if (usedFreeSlot) {
                _free--;
            }

            if (++_size > _maxSize || _free == 0) {
                int newCapacity = _size > _maxSize ? gnu.trove.impl.PrimeFinder
                        .nextPrime((int)(capacity() * 1.2) + 10) : capacity();
                if (newCapacity > 1000000) {
                    System.out.println("rehashing to size: " + newCapacity
                            + " from " + capacity());
                }
                rehash(newCapacity);
                computeMaxSize(capacity());
            }
        }
    }

    public int size() {
        return map.size();
    }

    public boolean containsKey(Object key) {
        if (key instanceof byte[]) {
            return map.containsKey(key);
        } else if (key instanceof String) {
            return map.containsKey(stringToByteArray((String)key));
        } else {
            throw new IllegalArgumentException("class "
                    + key.getClass().toString()
                    + " is not valid for ByteArrayDoubleHashMap.containsKey");
        }
    }

    public Set<String> keySet() {
        Set<String> res = new HashSet<String>();
        for (byte[] b : map.keySet()) {
            res.add(byteArrayToString(b));
        }
        return res;
    }

    public Set<Double> values() {
        Set<Double> values = new HashSet<Double>();
        for (Map.Entry<String, Double> e : this) {
            values.add(e.getValue());
        }
        return values;
    }

    public boolean containsValue(Object d) {
        return values().contains((Double)d);
    }

    public Set<Map.Entry<String, Double>> entrySet() {
        Set<Map.Entry<String, Double>> entries = new HashSet<Map.Entry<String, Double>>();
        for (Map.Entry<String, Double> e : this) {
            entries.add(e);
        }
        return entries;
    }

    public boolean isEmpty() {
        return size() > 0;
    }

    public void clear() {
        map.clear();
    }

    public Double remove(Object k) {
        return removePrimitive((String)k);
    }

    public Double get(Object k) {
        return getPrimitive((String)k);
    }

    public Double put(String key, Double value) {
        return putPrimitive(key, value);
    }

    public void putAll(Map<? extends String, ? extends Double> m) {
        for (Map.Entry<? extends String, ? extends Double> e : m.entrySet()) {
            put((String)e.getKey(), (Double)e.getValue());
        }
    }

    public double getPrimitive(byte[] key) {
        return map.get(key);
    }

    public double getPrimitive(String key) {
        return map.get(stringToByteArray(key));
    }

    public double putPrimitive(byte[] key, double value) {
        return map.put(key, value);
    }

    public double putPrimitive(String key, double value) {
        return map.put(stringToByteArray(key), value);
    }

    public double removePrimitive(byte[] key) {
        return map.remove(key);
    }

    public double removePrimitive(String key) {
        return map.remove(stringToByteArray(key));
    }

    public void transformValues(TDoubleFunction func) {
        map.transformValues(func);
    }

    public TObjectDoubleIterator<byte[]> troveIterator() {
        return map.iterator();
    }

    public Iterator<Map.Entry<String, Double>> iterator() {
        return new Iterator<Map.Entry<String, Double>>() {
            private TObjectDoubleIterator<byte[]> iter = troveIterator();

            public boolean hasNext() {
                return iter.hasNext();
            }

            public void remove() {
                iter.remove();
            }

            public Map.Entry<String, Double> next() {
                iter.advance();
                return new AbstractMap.SimpleImmutableEntry<String, Double>(
                        byteArrayToString(iter.key()), iter.value());
            }
        };
    }

    // - java serialization
    private void writeObject(ObjectOutputStream output) throws IOException {
        output.writeObject(keyEncoding);
        output.writeFloat(loadFactor);
        output.writeDouble(defaultValue);
        output.writeInt(map.size());
        for (TObjectDoubleIterator<byte[]> it = map.iterator(); it.hasNext();) {
            it.advance();
            byte[] key = it.key();
            output.writeInt(key.length);
            for (int i = 0; i < key.length; i++) {
                output.writeByte(key[i]);
            }
            output.writeDouble(it.value());
        }
    }

    private void readObject(ObjectInputStream input) throws IOException,
            ClassNotFoundException {
        keyEncoding = (String)input.readObject();
        loadFactor = input.readFloat();
        defaultValue = input.readDouble();
        int size = input.readInt();
        map = new TByteArrayDoubleHashMap(size, loadFactor, defaultValue);
        for (int i = 0; i < size; i++) {
            int length = input.readInt();
            byte[] key = new byte[length];
            for (int j = 0; j < length; j++) {
                key[j] = input.readByte();
            }
            double value = input.readDouble();
            map.put(key, value);
        }
    }

    // - kryo serialization for use in scalding.
    public void write(Kryo kryo, Output output) {
        output.writeString(keyEncoding);
        output.writeFloat(loadFactor);
        output.writeDouble(defaultValue);
        output.writeInt(map.size());
        for (TObjectDoubleIterator<byte[]> it = map.iterator(); it.hasNext();) {
            it.advance();
            byte[] key = it.key();
            output.writeInt(key.length);
            for (int i = 0; i < key.length; i++) {
                output.writeByte(key[i]);
            }
            output.writeDouble(it.value());
        }
    }

    public void read(Kryo kryo, Input input) {
        keyEncoding = input.readString();
        loadFactor = input.readFloat();
        defaultValue = input.readDouble();
        int size = input.readInt();
        map = new TByteArrayDoubleHashMap(size, loadFactor, defaultValue);
        for (int i = 0; i < size; i++) {
            int length = input.readInt();
            byte[] key = new byte[length];
            for (int j = 0; j < length; j++) {
                key[j] = input.readByte();
            }
            double value = input.readDouble();
            map.put(key, value);
        }
    }
}


================================================
FILE: src/main/java/com/etsy/conjecture/data/ClusterLabel.java
================================================
package com.etsy.conjecture.data;

public class ClusterLabel extends Label{

    private static final long serialVersionUID = 1L;

    protected String label;

    public ClusterLabel() {
        this(null);
    }

    public ClusterLabel(String label) {
        this.label = label;
    }

    public String getLabel() {
        return this.label;
    }

    public void setLabel(String label) {
        this.label = label;
    }

    public String toString() {
        return label;
    }

    @Override
    public int hashCode() {
        final int prime = 31;
        int result = 1;
        result = prime * result + ((label == null) ? 0 : label.hashCode());
        return result;
    }

    @Override
    public boolean equals(Object obj) {
        if (this == obj)
            return true;
        if (obj == null)
            return false;
        if (getClass() != obj.getClass())
            return false;
        ClusterLabel other = (ClusterLabel) obj;
        if (label == null) {
            if (other.label != null)
                return false;
        } else if (!label.equals(other.label))
            return false;
        return true;
    }
}


================================================
FILE: src/main/java/com/etsy/conjecture/data/ClusterPrediction.java
================================================
package com.etsy.conjecture.data;

import java.util.Map;
import com.google.common.collect.Maps;

/**
 * Representing a probability of membership in each cluster
 */
public class ClusterPrediction extends ClusterLabel{

    private static final long serialVersionUID = -1L;

    /**
     * Cluster membership probabilities
     */
    private Map<String, Double> clusterProbs;

    public ClusterPrediction(Map<String, Double> clusterProbs) {
        this.clusterProbs = Maps.newHashMap(clusterProbs);
        boolean first = true;
        double maxProb = 0;
        String maxCategory = null;
        for (String key : clusterProbs.keySet()) {
            if(first || clusterProbs.get(key) > maxProb) {
              maxProb = clusterProbs.get(key);
              maxCategory = key;
              first = false;
            }
        }
        setLabel(maxCategory);
    }

    public Map<String,Double> getMap() {
        return clusterProbs;
    }

}


================================================
FILE: src/main/java/com/etsy/conjecture/data/Instance.java
================================================
package com.etsy.conjecture.data;


//TODO: reset methods for string adders
//TODO: for instance, vector subtraction?
public class Instance extends AbstractInstance<Instance> {

    public Instance() {
        super();
    }

    public Instance(StringKeyedVector vec) {
        super(vec);
    }

}


================================================
FILE: src/main/java/com/etsy/conjecture/data/InstanceFactory.java
================================================
package com.etsy.conjecture.data;

public class InstanceFactory {

    private InstanceFactory() {
    };

    public static Instance buildInstance() {
        return new Instance();
    }

    public static Instance copyInstance(Instance inst) {
        return new Instance(inst.getVector());
    }

    public static BinaryLabeledInstance toBinaryLabeledInstance(double label,
            Instance instance) {
        return new BinaryLabeledInstance(label, instance.getVector());
    }

    public static BinaryLabeledInstance toBinaryLabeledInstance(
            BinaryLabel label, Instance instance) {
        return new BinaryLabeledInstance(label, instance.getVector());
    }

    public static RealValueLabeledInstance toRealValueLabeledInstance(
            double label, Instance instance) {
        return new RealValueLabeledInstance(label, instance.getVector());
    }

    public static RealValueLabeledInstance toRealValueLabeledInstance(
            RealValuedLabel label, Instance instance) {
        return new RealValueLabeledInstance(label, instance.getVector());
    }
}


================================================
FILE: src/main/java/com/etsy/conjecture/data/InstanceInterface.java
================================================
package com.etsy.conjecture.data;

import java.util.Collection;
import java.util.List;
import java.util.Map;

public interface InstanceInterface<T extends InstanceInterface<T>> {

    public abstract String getId();

    public abstract T setId(String id);

    public abstract T addTerm(String term);

    public abstract T addTerm(String term, double featureWeight);

    public abstract T addTermWithNamespace(String term, String namespace);

    public abstract T addTermWithNamespace(String term, String namespace,
            double featureWeight);

    public abstract T addTerms(Collection<String> terms, double featureWeight);

    public abstract T addTerms(Collection<String> terms);

    public abstract T addTermsWithNamespace(Collection<String> terms,
            String namespace, double featureWeight);

    public abstract T addTermsWithNamespace(Collection<String> terms,
            String namespace);

    public abstract T addTerms(String[] terms, double featureWeight);

    public abstract T addTerms(String[] terms);

    public abstract T addTermsWithNamespace(String[] terms, String namespace,
            double featureWeight);

    public abstract T addTermsWithNamespace(String[] terms, String namespace);

    public abstract T addTermsWithWeights(Map<String, Double> termsWithWeights);

    public abstract T addTermsWithWeightsWithNamespace(
            Map<String, Double> termsWithWeights, String namespace);

    public abstract T addNumericArrayWithNamespace(double[] array,
            String namespace);

    public abstract T addNumericArray(double[] array);

    public abstract T addNumericArrayWithNamespace(Double[] array,
            String namespace);

    public abstract T addNumericArray(Double[] array);

    public abstract T addNumericArrayWithNamespace(List<Double> values,
            String namespace);

    public abstract T addNumericArray(List<Double> values);

    public abstract T setNumericArrayWithNamespace(double[] array,
            String namespace);

    public abstract T setNumericArray(double[] array);

    public abstract T setNumericArrayWithNamespace(Double[] array,
            String namespace);

    public abstract T setNumericArray(Double[] array);

    public abstract T setNumericArrayWithNamespace(List<Double> values,
            String namespace);

    public abstract T setNumericArray(List<Double> values);

    public abstract T addIdField(long id, double featureWeight);

    public abstract T addIdField(long id);

    public abstract T addIdFieldWithNamespace(long id, double featureWeight,
            String namespace);

    public abstract T addIdFieldWithNamespace(long id, String namespace);

    public abstract T addIdField(int id, double featureWeight);

    public abstract T addIdField(int id);

    public abstract T addIdFieldWithNamespace(int id, double featureWeight,
            String namespace);

    public abstract T addIdFieldWithNamespace(int id, String namespace);

    public abstract T addIds(long[] ids, double featureWeight);

    public abstract T addIds(long[] ids);

    public abstract T addIds(int[] ids, double featureWeight);

    public abstract T addIds(int[] ids);

    public abstract T addIds(Collection<Integer> ids, double featureWeight);

    public abstract T addIds(Collection<Integer> ids);

    public abstract T addIdsWithNamespace(long[] ids, double featureWeight,
            String namespace);

    public abstract T addIdsWithNamespace(long[] ids, String namespace);

    public abstract T addIdsWithNamespace(int[] ids, double featureWeight,
            String namespace);

    public abstract T addIdsWithNamespace(int[] ids, String namespace);

    public abstract T addIdsWithNamespace(Collection<Long> ids,
            double featureWeight, String namespace);

    public abstract T addIdsWithNamespace(Collection<Long> ids, String namespace);

    public abstract T setIdField(long id, double featureWeight);

    public abstract T setIdField(long id);

    public abstract T setIdFieldWithNamespace(long id, double featureWeight,
            String namespace);

    public abstract T setIdFieldWithNamespace(long id, String namespace);

    public abstract T setIdField(int id, double featureWeight);

    public abstract T setIdField(int id);

    public abstract T setIdFieldWithNamespace(int id, double featureWeight,
            String namespace);

    public abstract T setIdFieldWithNamespace(int id, String namespace);

    public abstract T setIds(long[] ids, double featureWeight);

    public abstract T setIds(long[] ids);

    public abstract T setIds(int[] ids, double featureWeight);

    public abstract T setIds(int[] ids);

    public abstract T setIds(Collection<Integer> ids, double featureWeight);

    public abstract T setIds(Collection<Integer> ids);

    public abstract T setIdsWithNamespace(long[] ids, double featureWeight,
            String namespace);

    public abstract T setIdsWithNamespace(long[] ids, String namespace);

    public abstract T setIdsWithNamespace(int[] ids, double featureWeight,
            String namespace);

    public abstract T setIdsWithNamespace(int[] ids, String namespace);

    public abstract T setIdsWithNamespace(Collection<Long> ids,
            double featureWeight, String namespace);

    public abstract T setIdsWithNamespace(Collection<Long> ids, String namespace);

}


================================================
FILE: src/main/java/com/etsy/conjecture/data/Label.java
================================================
package com.etsy.conjecture.data;

public class Label implements java.io.Serializable {

    private static final long serialVersionUID = 1L;

    public Label() {

    }
}


================================================
FILE: src/main/java/com/etsy/conjecture/data/LabeledInstance.java
================================================
package com.etsy.conjecture.data;

public interface LabeledInstance<L extends Label> {
    public L getLabel();

    public StringKeyedVector getVector();

    public double getWeight();
}


================================================
FILE: src/main/java/com/etsy/conjecture/data/LazyVector.java
================================================
package com.etsy.conjecture.data;

import gnu.trove.function.TDoubleFunction;
import gnu.trove.iterator.TObjectDoubleIterator;

import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

import com.esotericsoftware.kryo.Kryo;
import com.esotericsoftware.kryo.KryoSerializable;
import com.esotericsoftware.kryo.io.Input;
import com.esotericsoftware.kryo.io.Output;
import com.etsy.conjecture.Utilities;

public class LazyVector extends StringKeyedVector implements Serializable,
        KryoSerializable {

    private static final long serialVersionUID = -7070522686694887436L;

    protected transient ByteArrayDoubleHashMap iterations;

    protected long iteration = 0;

    protected UpdateFunction updater;

    /**
     * The function used to update the parameters during the lazy update
     */
    public static interface UpdateFunction extends Serializable {
        public double lazyUpdate(String key, double param, long startIteration,
                long endIteration);
    }

    public LazyVector() {
        this(new UpdateFunction() {
            private static final long serialVersionUID = 1740773207106961880L;

            public double lazyUpdate(String key, double p, long a, long b) {
                return p;
            }
        });
    }

    public LazyVector(UpdateFunction uf) {
        this(10, uf);
    }

    public LazyVector(int initialCapacity, UpdateFunction uf) {
        super(initialCapacity);
        iterations = new ByteArrayDoubleHashMap(initialCapacity, LOAD_FACTOR,
                FEATURE_ENCODING, 0.0);
        updater = uf;
    }

    public LazyVector(StringKeyedVector skv, UpdateFunction uf) {
        if (skv instanceof LazyVector) {
            ((LazyVector)skv).delazify();
        }
        this.vector = skv.vector;
        iterations = new ByteArrayDoubleHashMap(skv.size(), LOAD_FACTOR,
                FEATURE_ENCODING, 0.0);
        updater = uf;
    }

    public LazyVector(ByteArrayDoubleHashMap map, UpdateFunction uf) {
        super(map);
        iterations = new ByteArrayDoubleHashMap(10, LOAD_FACTOR,
                FEATURE_ENCODING, 0.0);
        updater = uf;
    }

    public LazyVector(Map<String, Double> jmap, UpdateFunction uf) {
        super(jmap);
        iterations = new ByteArrayDoubleHashMap(10, LOAD_FACTOR,
                FEATURE_ENCODING, 0.0);
        updater = uf;
    }

    public void incrementIteration() {
        iteration++;
    }

    public void delazify() {
        for (TObjectDoubleIterator<byte[]> it = vector.troveIterator(); it
                .hasNext();) {
            it.advance();
            long startIter = (long)iterations.getPrimitive(it.key()); // defaults
                                                                      // to 0.0
            if (startIter < iteration) {
                it.setValue(updater.lazyUpdate(it.key().toString(), it.value(), startIter, iteration));
                iterations.putPrimitive(it.key(), (double)iteration);
            }
        }
        removeZeroCoordinates();
    }

    public double delazifyCoordinate(String key) {
        return delazifyCoordinate(vector.stringToByteArray(key));
    }

    public double delazifyCoordinate(byte[] key) {
        if (vector.containsKey(key)) {
            long oldIteration = (long)iterations.getPrimitive(key);
            double initial = vector.getPrimitive(key);
            if (oldIteration < iteration) {
                double updated = updater.lazyUpdate(key.toString(), initial, oldIteration,
                        iteration);
                if (Utilities.floatingPointEquals(updated, 0.0d)) {
                    vector.removePrimitive(key);
                    iterations.removePrimitive(key);
                } else {
                    iterations.putPrimitive(key, (double)iteration);
                    vector.putPrimitive(key, updated);
                }
                return updated;
            } else {
                return initial;
            }
        }
        return 0.0;
    }

    public void skipToIteration(long iter) {
        delazify();
        iteration = iter;
        for (TObjectDoubleIterator<byte[]> it = iterations.troveIterator(); it
                .hasNext();) {
            it.advance();
            it.setValue((double)iter);
        }
    }

    /**
     * disregards prior value at a particular key, replacing with the specified
     * value.
     */
    public double setCoordinate(String key, double value) {
        if (Utilities.floatingPointEquals(value, 0d)) {
            return deleteCoordinate(key);
        } else if (!freezeKeySet) {
            vector.putPrimitive(key, value);
            iterations.putPrimitive(key, (double)iteration);
        }
        return 0d;
    }

    /**
     * remove a coordinate from the vector (same as setting it to 0).
     */
    public double deleteCoordinate(String key) {
        if (vector.containsKey(key) && !freezeKeySet) {
            iterations.removePrimitive(key);
            return vector.removePrimitive(key);
        } else {
            return 0d;
        }
    }

    public Map<String, Double> getMap() {
        return vector;
    }

    protected double addToCoordinateInternal(byte[] bkey, double value) {
        delazifyCoordinate(bkey);
        if (vector.containsKey(bkey)) {
            double updated = vector.getPrimitive(bkey) + value;
            if (Utilities.floatingPointEquals(updated, 0.0d)) {
                iterations.removePrimitive(bkey);
                return vector.removePrimitive(bkey);
            } else {
                iterations.putPrimitive(bkey, (double)iteration);
                return vector.putPrimitive(bkey, updated);
            }
        } else if (!freezeKeySet && !Utilities.floatingPointEquals(value, 0.0d)) {
            vector.putPrimitive(bkey, value);
            iterations.putPrimitive(bkey, (double)iteration);
        }
        return 0d;
    }

    /**
     * return the value of a coordinate.
     */
    public double getCoordinate(String key) {
        delazifyCoordinate(key);
        return vector.getPrimitive(key);
    }

    /**
     * the dimension of the vector.
     */
    public int size() {
        delazify();
        return vector.size();
    }

    /**
     * whether this vector has a non-zero value for a coordinate.
     */
    public boolean containsKey(String key) {
        delazify();
        return vector.containsKey(key);
    }

    /**
     * whether this vector has a non-zero value for a coordinate.
     */
    public boolean contains(String key) {
        return containsKey(key);
    }

    /**
     * the set of non-zero coordinate names.
     */
    public Set<String> keySet() {
        delazify();
        return vector.keySet();
    }

    /**
     * the set of values in the map.
     */
    public Set<Double> values() {
        delazify();
        return vector.values();
    }

    /**
     * Apply an arbitrary scalar function to the values.
     */
    public void transformValues(TDoubleFunction func) {
        delazify();
        vector.transformValues(func);
    }

    /**
     * Remove zeros that may have appeared as a result of a transform
     */
    public void removeZeroCoordinates() {
        for (TObjectDoubleIterator<byte[]> it = vector.troveIterator(); it
                .hasNext();) {
            it.advance();
            if (Utilities.floatingPointEquals(it.value(), 0d)) {
                iterations.removePrimitive(it.key());
                it.remove();
            }
        }
    }

    /**
     * compute the inner product between this and vec.
     */
    public double dot(StringKeyedVector skv) {
        if (skv instanceof LazyVector) {
            return dotWithLazy((LazyVector)skv);
        } else {
            return dotWithSKV(skv);
        }
    }

    protected double dotWithSKV(StringKeyedVector vec) {
        // dont figure out which ones bigger etc, since delazifying this to get
        // the size is too slow.
        double res = 0.0;
        for (TObjectDoubleIterator<byte[]> it = vec.vector.troveIterator(); it
                .hasNext();) {
            it.advance();
            res += it.value() * delazifyCoordinate(it.key());
        }
        return res;
    }

    protected double dotWithLazy(LazyVector vec) {
        ByteArrayDoubleHashMap vec_small = this.size() > vec.size() ? vec.vector
                : this.vector;
        ByteArrayDoubleHashMap vec_big = this.size() > vec.size() ? this.vector
                : vec.vector;
        ArrayList<byte[]> commonCoordinates = new ArrayList<byte[]>(); // prevent
                                                                       // modification
                                                                       // during
                                                                       // iteration.
        double res = 0.0;
        for (TObjectDoubleIterator<byte[]> it = vec_small.troveIterator(); it
                .hasNext();) {
            it.advance();
            if (vec_big.containsKey(it.key())) {
                commonCoordinates.add(it.key());
            }
        }
        for (byte[] key : commonCoordinates) {
            delazifyCoordinate(key);
            vec.delazifyCoordinate(key);
            res += vec_small.getPrimitive(key) * vec_big.getPrimitive(key);
        }
        return res;
    }

    /**
     * compute the LP norm for given p < infinity.
     */
    public double LPNorm(double p) {
        delazify();
        return super.LPNorm(p);
    }

    /**
     * immutable access the underlying hash map.
     */
    public Iterator<Map.Entry<String, Double>> iterator() {
        delazify();
        return vector.iterator();
    }

    public String toString() {
        delazify();
        return super.toString();
    }

    private Object writeReplace() throws java.io.ObjectStreamException {
        delazify();
        return this;
    }

    // - java serialization
    private void writeObject(ObjectOutputStream output) throws IOException {
        output.writeLong(iteration);
        output.writeObject(vector);
        output.writeObject(updater);
        output.writeBoolean(freezeKeySet);
    }

    private void readObject(ObjectInputStream input) throws IOException,
            ClassNotFoundException {
        iteration = input.readLong();
        vector = (ByteArrayDoubleHashMap)input.readObject();
        updater = (UpdateFunction)input.readObject();
        freezeKeySet = input.readBoolean();
        // set up iteration info,
        iterations = new ByteArrayDoubleHashMap(10, LOAD_FACTOR,
                (double)iteration);
    }

    // - kryo serialization for use in scalding.
    public void write(Kryo kryo, Output output) {
        delazify();
        output.writeLong(iteration);
        kryo.writeObject(output, vector);
        kryo.writeClassAndObject(output, updater);
        output.writeBoolean(freezeKeySet);
    }

    public void read(Kryo kryo, Input input) {
        iteration = input.readLong();
        vector = kryo.readObject(input, ByteArrayDoubleHashMap.class);
        updater = (UpdateFunction)kryo.readClassAndObject(input);
        freezeKeySet = input.readBoolean();
        // set up iteration info,
        iterations = new ByteArrayDoubleHashMap(10, LOAD_FACTOR,
                (double)iteration);
    }
}


================================================
FILE: src/main/java/com/etsy/conjecture/data/MulticlassLabel.java
================================================
package com.etsy.conjecture.data;

/**
 * representing a 100% probability of membership in a particular class
 */
public class MulticlassLabel extends Label {

    private static final long serialVersionUID = 1L;

    protected String label;

    public MulticlassLabel() {
        this(null);
    }

    public MulticlassLabel(String label) {
        this.label = label;
    }

    public String getLabel() {
        return this.label;
    }

    public void setLabel(String label) {
        this.label = label;
    }

    public String toString() {
        return label;
    }

    public BinaryLabel toBinaryLabel(String className) {
        return new BinaryLabel(className.equals(label) ? 1.0 : 0.0);
    }

    @Override
    public int hashCode() {
        final int prime = 31;
        int result = 1;
        result = prime * result + ((label == null) ? 0 : label.hashCode());
        return result;
    }

    @Override
    public boolean equals(Object obj) {
        if (this == obj)
            return true;
        if (obj == null)
            return false;
        if (getClass() != obj.getClass())
            return false;
        MulticlassLabel other = (MulticlassLabel)obj;
        if (label == null) {
            if (other.label != null)
                return false;
        } else if (!label.equals(other.label))
            return false;
        return true;
    }
}


================================================
FILE: src/main/java/com/etsy/conjecture/data/MulticlassLabeledInstance.java
================================================
package com.etsy.conjecture.data;

import java.util.Map;

public class MulticlassLabeledInstance extends
        AbstractInstance<MulticlassLabeledInstance> implements
        LabeledInstance<MulticlassLabel> {

    protected MulticlassLabel label;

    public MulticlassLabel getLabel() {
        return label;
    }

    public MulticlassLabeledInstance(String label) {
        this(new MulticlassLabel(label), 1.0);
    }

    public MulticlassLabeledInstance(String label, double weight) {
        this(new MulticlassLabel(label), weight);
    }

    public MulticlassLabeledInstance(String label, Map<String, Double> instance) {
        this(new MulticlassLabel(label), instance, 1.0);
    }

    public MulticlassLabeledInstance(String label,
            Map<String, Double> instance, double weight) {
        this(new MulticlassLabel(label), instance, weight);
    }

    public MulticlassLabeledInstance(String label, StringKeyedVector vec) {
        this(new MulticlassLabel(label), vec.getMap(), 1.0);
    }

    public MulticlassLabeledInstance(String label, StringKeyedVector vec,
            double weight) {
        this(new MulticlassLabel(label), vec.getMap(), weight);
    }

    public MulticlassLabeledInstance(MulticlassLabel label) {
        this(label, 1.0);
    }

    public MulticlassLabeledInstance(MulticlassLabel label, double weight) {
        super(weight);
        this.label = label;
    }

    public MulticlassLabeledInstance(MulticlassLabel label,
            Map<String, Double> instance) {
        this(label, instance, 1.0);
    }

    public MulticlassLabeledInstance(MulticlassLabel label,
            Map<String, Double> instance, double weight) {
        super(instance, weight);
        this.label = label;
    }

    public MulticlassLabeledInstance(MulticlassLabel label,
            StringKeyedVector vec) {
        this(label, vec.getMap(), 1.0);
    }

    public MulticlassLabeledInstance(MulticlassLabel label,
            StringKeyedVector vec, double weight) {
        this(label, vec.getMap(), weight);
    }

    public BinaryLabeledInstance toBinaryInstance(String category) {
        double tmpLabel = 0d;
        if (category.equals(this.label.getLabel())) {
            tmpLabel = 1d;
        }
        return new BinaryLabeledInstance(tmpLabel, getVector());
    }
}


================================================
FILE: src/main/java/com/etsy/conjecture/data/MulticlassPrediction.java
================================================
package com.etsy.conjecture.data;

import java.util.Map;
import com.google.common.collect.Maps;

/**
 * representing a probability of membership in each class
 */
public class MulticlassPrediction extends MulticlassLabel {

    private static final long serialVersionUID = -1L;

    /**
     * class membership probabilities
     */
    private Map<String, Double> classProbs;

    public MulticlassPrediction(Map<String, Double> classProbs) {
        this.classProbs = Maps.newHashMap(classProbs);
        boolean first = true;
        double maxProb = 0;
        String maxCategory = null;
        for (String key : classProbs.keySet()) {
            if (first || classProbs.get(key) > maxProb) {
                maxProb = classProbs.get(key);
                maxCategory = key;
                first = false;
            }
        }
        setLabel(maxCategory);
    }

    public Double getProb(String category) {
        return classProbs.get(category);
    }

    public Double getProbOrElse(String category, Double def) {
        if (classProbs.containsKey(category)) {
            return classProbs.get(category);
        } else {
            return def;
        }
    }

    public Map<String, Double> getMap() {
        return classProbs;
    }

}


================================================
FILE: src/main/java/com/etsy/conjecture/data/RealValueLabeledInstance.java
================================================
package com.etsy.conjecture.data;

import java.util.Map;

public class RealValueLabeledInstance extends
        AbstractInstance<RealValueLabeledInstance> implements
        LabeledInstance<RealValuedLabel> {

    private final RealValuedLabel label;

    public RealValuedLabel getLabel() {
        return label;
    }

    public RealValueLabeledInstance() {
        this(0.0);
    }

    public RealValueLabeledInstance(RealValuedLabel label) {
        this(label, 1.0);
    }

    public RealValueLabeledInstance(RealValuedLabel label, double weight) {
        super(weight);
        this.label = label;
    }

    public RealValueLabeledInstance(double label) {
        this(new RealValuedLabel(label), 1.0);
    }

    public RealValueLabeledInstance(double label, double weight) {
        this(new RealValuedLabel(label), weight);
    }

    public RealValueLabeledInstance(double label, Map<String, Double> instance) {
        this(new RealValuedLabel(label), instance, 1.0);
    }

    public RealValueLabeledInstance(double label, Map<String, Double> instance,
            double weight) {
        this(new RealValuedLabel(label), instance, weight);
    }

    public RealValueLabeledInstance(double label, StringKeyedVector vec) {
        this(new RealValuedLabel(label), vec.getMap(), 1.0);
    }

    public RealValueLabeledInstance(double label, StringKeyedVector vec,
            double weight) {
        this(new RealValuedLabel(label), vec.getMap(), weight);
    }

    public RealValueLabeledInstance(RealValuedLabel label,
            Map<String, Double> instance) {
        this(label, instance, 1.0);
    }

    public RealValueLabeledInstance(RealValuedLabel label,
            Map<String, Double> instance, double weight) {
        super(instance, weight);
        this.label = label;
    }

    public RealValueLabeledInstance(RealValuedLabel label, StringKeyedVector vec) {
        this(label, vec, 1.0);
    }

    public RealValueLabeledInstance(RealValuedLabel label,
            StringKeyedVector vec, double weight) {
        super(vec.getMap(), weight);
        this.label = label;
    }

}


================================================
FILE: src/main/java/com/etsy/conjecture/data/RealValuedLabel.java
================================================
package com.etsy.conjecture.data;

public class RealValuedLabel extends Label {

    protected final Double value;
    private static final long serialVersionUID = -1L;

    public RealValuedLabel(double value) {
        this.value = new Double(value);
    }

    public Double getValue() {
        return this.value;
    }

    @Override
    public String toString() {
        return value + "";
    }
}


================================================
FILE: src/main/java/com/etsy/conjecture/data/Recommendation.java
================================================
package com.etsy.conjecture.data;

import java.io.Serializable;

public class Recommendation implements Serializable {

    private static final long serialVersionUID = 1L;

    public final double score;
    public final String id;

    public Recommendation(String id, double score) {
        this.id = id;
        this.score = score;
    }

}

================================================
FILE: src/main/java/com/etsy/conjecture/data/StringKeyedVector.java
================================================
package com.etsy.conjecture.data;

import gnu.trove.function.TDoubleFunction;
import gnu.trove.iterator.TObjectDoubleIterator;

import java.io.Serializable;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

import com.etsy.conjecture.Utilities;
import com.google.gson.Gson;

public class StringKeyedVector implements Serializable,
        Iterable<Map.Entry<String, Double>> {

    private static final long serialVersionUID = -7070522686694887436L;

    // - represent the sparse vector by a mapping of coordinate name strings
    // (feature names)
    // to doubles.
    protected ByteArrayDoubleHashMap vector;

    // - whether to permit the addition of more features to this vector.
    protected boolean freezeKeySet = false;

    // - the load factor for the underlying hashmap.
    public static final float LOAD_FACTOR = 0.9f;

    public static final String FEATURE_ENCODING = "ASCII";

    public StringKeyedVector() {
        this(10);
    }

    public StringKeyedVector(int initialCapacity) {
        vector = new ByteArrayDoubleHashMap(initialCapacity, LOAD_FACTOR,
                FEATURE_ENCODING, 0.0);
    }

    public StringKeyedVector(StringKeyedVector skv) {
        this(skv.size());
        add(skv);
    }

    public StringKeyedVector(Map<String, Double> jmap) {
        vector = new ByteArrayDoubleHashMap(jmap.size(), LOAD_FACTOR,
                FEATURE_ENCODING, 0.0);
        vector.putAll(jmap);
    }

    /**
     * returns whether the key set is frozen (true means that further dimensions
     * cannot be added to this vector).
     */
    public boolean getFreezeKeySet() {
        return freezeKeySet;
    }

    /**
     * sets whether the key set is frozen (true means that further dimensions
     * cannot be added to this vector).
     */
    public void setFreezeKeySet(boolean freeze) {
        freezeKeySet = freeze;
    }

    /**
     * disregards prior value at a particular key, replacing with the specified
     * value.
     */
    public double setCoordinate(String key, double value) {
        if (Utilities.floatingPointEquals(value, 0d)) {
            return deleteCoordinate(key);
        } else if (!freezeKeySet) {
            vector.putPrimitive(key, value);
        }
        return 0d;
    }

    /**
     * remove a coordinate from the vector (same as setting it to 0).
     */
    public double deleteCoordinate(String key) {
        if (vector.containsKey(key) && !freezeKeySet) {
            return vector.removePrimitive(key);
        } else {
            return 0d;
        }
    }

    public Map<String, Double> getMap() {
        return vector;
    }

    /**
     * add to a specified coordinate (treating it as 0 if it was not present).
     */
    public double addToCoordinate(String key, double value) {
        byte[] bkey = vector.stringToByteArray(key);
        return addToCoordinateInternal(bkey, value);
    }

    protected double addToCoordinateInternal(byte[] bkey, double value) {
        if (vector.containsKey(bkey)) {
            double updated = vector.getPrimitive(bkey) + value;
            if (Utilities.floatingPointEquals(updated, 0.0d)) {
                return vector.removePrimitive(bkey);
            } else {
                return vector.putPrimitive(bkey, updated);
            }
        } else if (!freezeKeySet && !Utilities.floatingPointEquals(value, 0.0d)) {
            vector.putPrimitive(bkey, value);
        }
        return 0d;
    }

    /**
     * return the value of a coordinate.
     */
    public double getCoordinate(String key) {
        return vector.getPrimitive(key);
    }

    /**
     * add a multiple of vec to this.
     */
    public void addScaled(StringKeyedVector vec, double scale) {
        if (vec instanceof LazyVector) {
            ((LazyVector)vec).delazify();
        }
        for (TObjectDoubleIterator<byte[]> it = vec.vector.troveIterator(); it
                .hasNext();) {
            it.advance();
            addToCoordinateInternal(it.key(), scale * it.value());
        }
    }

    public StringKeyedVector multiplyPointwise(StringKeyedVector vec) {
        StringKeyedVector res = new StringKeyedVector();
        if (vec instanceof LazyVector) {
            ((LazyVector)vec).delazify();
        }
        for (TObjectDoubleIterator<byte[]> it = vec.vector.troveIterator(); it
                .hasNext();) {
            it.advance();
            res.vector.putPrimitive(it.key(), vector.getPrimitive(it.key())
                    * it.value());
        }
        return res;
    }

    public StringKeyedVector projectOntoNonZeroCoordinates(StringKeyedVector vec) {
        StringKeyedVector res = new StringKeyedVector();
        if (vec instanceof LazyVector) {
            ((LazyVector)vec).delazify();
        }
        for (TObjectDoubleIterator<byte[]> it = vec.vector.troveIterator(); it
                .hasNext();) {
            it.advance();
            res.addToCoordinateInternal(it.key(), vector.getPrimitive(it.key()));
        }
        return res;
    }

    /**
     * the dimension of the vector.
     */
    public int size() {
        return vector.size();
    }

    /**
     * whether this vector has a non-zero value for a coordinate.
     */
    public boolean containsKey(String key) {
        return vector.containsKey(key);
    }

    /**
     * whether this vector has a non-zero value for a coordinate.
     */
    public boolean contains(String key) {
        return containsKey(key);
    }

    /**
     * the set of non-zero coordinate names.
     */
    public Set<String> keySet() {
        return vector.keySet();
    }

    /**
     * the set of values in the map.
     */
    public Set<Double> values() {
        return vector.values();
    }

    /**
     * add vec to this
     */
    public void add(StringKeyedVector vec) {
        addScaled(vec, 1.0);
    }

    /**
     * subtract vec from this.
     */
    public void sub(StringKeyedVector vec) {
        addScaled(vec, -1.0);
    }

    /**
     * multiply this vector by a scalar.
     */
    public void mul(final double a) {
        transformValues(new TDoubleFunction() {
            public double execute(double b) {
                return a * b;
            }
        });
    }

    /**
     * Apply an arbitrary scalar function to the values.
     */
    public void transformValues(TDoubleFunction func) {
        vector.transformValues(func);
    }

    /**
     * Remove zeros that may have appeared as a result of a transform
     */
    public void removeZeroCoordinates() {
        @SuppressWarnings("unused")
        int i = 0;
        for (TObjectDoubleIterator<byte[]> it = vector.troveIterator(); it
                .hasNext();) {
            it.advance();
            if (Utilities.floatingPointEquals(it.value(), 0d)) {
                i++;
                it.remove();
            }
        }
    }

    /**
     * compute the inner product between this and vec.
     */
    public double dot(StringKeyedVector vec) {
        if (vec instanceof LazyVector) {
            return vec.dot(this);
        }
        ByteArrayDoubleHashMap vec_small = this.size() > vec.size() ? vec.vector
                : this.vector;
        ByteArrayDoubleHashMap vec_big = this.size() > vec.size() ? this.vector
                : vec.vector;
        double res = 0.0;
        for (TObjectDoubleIterator<byte[]> it = vec_small.troveIterator(); it
                .hasNext();) {
            it.advance();
            if (vec_big.containsKey(it.key())) {
                res += it.value() * vec_big.getPrimitive(it.key());
            }
        }
        return res;
    }

    /**
     * compute the LP norm for given p < infinity.
     */
    public double LPNorm(double p) {
        double tot = 0d;
        for (double v : vector.values()) {
            tot += Math.pow(Math.abs(v), p);
        }
        return Math.pow(tot, 1d / p);
    }

    /**
     * Find the max value.
     */
    public double max() {
        double max = 0.0;
        for (double v : vector.values()) {
            if (v > max) {
                max = v;
            }
        }
        return max;
    }

    /**
     * immutable access the underlying hash map.
     */
    public Iterator<Map.Entry<String, Double>> iterator() {
        return vector.iterator();
    }

    public String toString() {
        Gson gson = new Gson();
        return gson.toJson(vector);
    }

    /**
     * performs a deep copy of a stringkeyedvector
     *
     */
    public StringKeyedVector copy() {
        StringKeyedVector out = new StringKeyedVector(this.size());
        Iterator<Map.Entry<String, Double>> it = this.iterator();

        while (it.hasNext()) {
            Map.Entry<String, Double> entry = it.next();
            String key = entry.getKey();
            Double value = entry.getValue();

            out.setCoordinate(key, value);
        }

        return out;
    }
}


================================================
FILE: src/main/java/com/etsy/conjecture/evaluation/BinaryModelEvaluation.java
================================================
package com.etsy.conjecture.evaluation;

import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;

import com.etsy.conjecture.data.BinaryLabel;
import com.etsy.conjecture.PrimitivePair;

/**
 * a basic container for evaluations TODO: add getters for individual metrics
 */
public class BinaryModelEvaluation implements ModelEvaluation<BinaryLabel>,
        Serializable {

    private static final long serialVersionUID = 1L;
    private final ReceiverOperatingCharacteristic ROC;
    private final ConfusionMatrix conf;

    public BinaryModelEvaluation() {
        ROC = new ReceiverOperatingCharacteristic();
        conf = new ConfusionMatrix(2);
    }

    public void merge(ModelEvaluation<BinaryLabel> other) {
      BinaryModelEvaluation tempOther = (BinaryModelEvaluation) other;
      ROC.add(tempOther.ROC);
      conf.add(tempOther.conf);
    }

    public void add(BinaryLabel real, BinaryLabel pred) {
        add(real.getValue(), pred.getValue());
    }

    public void add(double label, double prediction) {
        ROC.add(label, prediction);
        conf.addHard((int)label, prediction);
    }

    public void add(PrimitivePair labelPrediction) {
        ROC.add(labelPrediction);
        conf.addHard((int)labelPrediction.first, labelPrediction.second);
    }

    public double computeAUC() {
        return ROC.binaryAUC();
    }

    public double computeBrier() {
        return ROC.brierScore();
    }

    public double computeAccy() {
        return conf.computeAccuracy();
    }

    public double computeAccy(int dim) {
        return conf.computeAccuracy(dim);
    }

    public double computeFmeasure() {
        return conf.computeAverageFmeasure();
    }

    public double computeFmeasure(int dim) {
        return conf.computeFmeasure(dim);
    }

    public double computePrecision() {
        return conf.computeAveragePrecision();
    }

    public double computePrecision(int dim) {
        return conf.computePrecision(dim);
    }

    public double computeRecall() {
        return conf.computeAverageRecall();
    }

    public double computeRecall(int dim) {
        return conf.computeRecall(dim);
    }

    public Map<String, Double> getStatistics() {
        SortedMap<String, Double> m = new TreeMap<String, Double>();

        m.put("Brier", computeBrier());
        m.put("Acc (avg)", computeAccy());
        m.put("F1 (avg)", computeFmeasure());
        m.put("Prc (avg)", computePrecision());
        m.put("Rec (avg)", computeRecall());

        m.put("0-class Acc", computeAccy(0));
        m.put("0-class F1", computeFmeasure(0));
        m.put("0-class Prc", computePrecision(0));
        m.put("0-class Rec", computeRecall(0));

        m.put("1-class Acc", computeAccy(1));
        m.put("1-class F1", computeFmeasure(1));
        m.put("1-class Prc", computePrecision(1));
        m.put("1-class Rec", computeRecall(1));
        m.put("1-class AUC", computeAUC());
        return m;
    }

    public Map<String, Object> getObjects() {
        Map<String, Object> m = new HashMap<String, Object>();
        m.put("conf", conf.toString());
        return m;
    }
}


================================================
FILE: src/main/java/com/etsy/conjecture/evaluation/ConfusionMatrix.java
================================================
package com.etsy.conjecture.evaluation;

import java.io.Serializable;
import java.util.Collection;

import com.etsy.conjecture.PrimitivePair;
import static com.google.common.base.Preconditions.checkArgument;

/**
 * class representing a confusion matrix for representing misclassification
 * errors.
 * {@link <a href="http://en.wikipedia.org/wiki/Confusion_matrix">Confusion Matrix</a>}
 * 
 * @author jattenberg
 */
public class ConfusionMatrix implements Serializable {

    private static final long serialVersionUID = 1L;

    /**
     * The data structure representing the confusion matrix. rows correspond to
     * labels, columns to predictions
     */
    private double[][] confMatrix;

    /** The num_classes represented in the confusion matrix */
    private final int numClasses;

    /** The number of label / prediction pairs observed */
    double obs;

    /**
     * Instantiates a new confusion matrix.
     * 
     * @param classes
     *            the number of target classes in the problem being considered
     */
    public ConfusionMatrix(int classes) {
        obs = 0;
        this.numClasses = classes;
        this.confMatrix = new double[numClasses][numClasses];
    }

    public void add(ConfusionMatrix m) {
        obs += m.obs;
        for (int i = 0; i < numClasses; i++) {
            for (int j = 0; j < numClasses; j++) {
                confMatrix[i][j] += m.confMatrix[i][j];
            }
        }
    }

    /**
     * Instantiates a new confusion matrix and adds some initial data
     * 
     * @param classes
     *            - the number of target classes in the problem being considered
     * @param labelsAndPredictions
     *            the labels and predictions
     */
    public ConfusionMatrix(int classes,
            Collection<PrimitivePair> labelsAndPredictions) {
        this(classes);
        for (PrimitivePair p : labelsAndPredictions)
            addInfo(p.first, p.second);
    }

    /**
     * Instantiates a new confusion matrix and adds some initial data
     * 
     * @param classes
     *            - the number of target classes in the problem being considered
     * @param labelsAndPredictions
     *            the labels and predictions
     */
    public ConfusionMatrix(int classes, PrimitivePair[] labelsAndPredictions) {
        this(classes);
        for (PrimitivePair p : labelsAndPredictions)
            addInfo(p.first, p.second);
    }

    /**
     * Instantiates a new confusion matrix and adds some initial data
     * 
     * @param classes
     *            - the number of target classes in the problem being considered
     * @param labelsAndPredictions
     *            the labels and predictions
     */
    public ConfusionMatrix(int classes, double[] labels, double[] predictions) {
        this(classes);
        checkArgument(
                labels.length == predictions.length,
                "labels and predictions must be of the same length! (%s vs %s)",
                labels.length, predictions.length);
        for (int i = 0; i < labels.length; i++) {
            addInfo(labels[i], predictions[i]);
        }
    }

    /**
     * Adds a label / prediction pair to the confusion matrix
     * 
     * @param label
     *            the index of the actual class
     * @param guess
     *            the index of the predicted class
     */
    public void addInfo(int label, int guess) {
        obs++;
        this.confMatrix[label][guess]++;
    }

    /**
     * Adds a label / prediction pair to the confusion matrix with soft labels
     * 
     * @param label
     *            the index of the actual class
     * @param guess
     *            the predicted distribution over classes.
     */
    public void addInfo(int label, double[] guess) {
        addInfo(label, guess, 1);
    }

    /**
     * Adds a label / prediction pair to the confusion matrix with soft labels
     * 
     * @param label
     *            the index of the actual class
     * @param guess
     *            the predicted distribution over classes.
     * @param freq
     *            the number of times to consider the input label / prediction
     *            pair
     */
    public void addInfo(int label, double[] guess, double freq) {
        checkArgument(
                guess.length == numClasses,
                "input lenght (%d) must match num classes in confusion matrix (%d) ",
                guess.length, numClasses);
        obs += freq;
        for (int i = 0; i < numClasses; i++) {
            confMatrix[label][i] += freq * guess[i];
        }
    }

    /**
     * Adds a label / prediction pair to the confusion matrix with soft labels
     * note, only applicable for binary classification (2 class) problems
     * 
     * @param label
     *            the actual probability of membership in the positive class
     * @param prediction
     *            the predicted probability of membership in the positive class
     */
    public void addInfo(double label, double prediction) {
        checkArgument(
                2 == numClasses,
                "num classes in confusion matrix (%d) must be 2 for this method",
                numClasses);
        addInfo(new double[] { 1. - label, label }, new double[] {
                1. - prediction, prediction });
    }

    /**
     * Adds a label / prediction pair to the confusion matrix with soft labels
     * 
     * @param softlabels
     *            actual distribution of target class memberships
     * @param guess
     *            the predicted distribution of class memberships
     */
    public void addInfo(double[] softlabels, double[] guess) {
        obs++;
        for (int i = 0; i < numClasses; i++) {
            for (int j = 0; j < numClasses; j++) {
                confMatrix[i][j] += softlabels[i] * guess[j];
            }
        }
    }

    /**
     * Adds a label / prediction pair to the confusion matrix with soft labels
     * 
     * @param softlabels
     *            actual distribution of target class memberships
     * @param guess
     *            the predicted distribution of class memberships
     * @param freq
     *            the number of times to consider this label / prediction pair
     */
    public void addInfo(double[] softlabels, double[] guess, double freq) {
        obs += freq;
        for (int i = 0; i < numClasses; i++) {
            for (int j = 0; j < numClasses; j++) {
                confMatrix[i][j] += softlabels[i] * guess[j] * freq;
            }
        }
    }

    /**
     * Computes the actual distribution over labels
     * 
     * @return the double[] encoding probabilities in each class.
     */
    public double[] classDistribution() {
        double[] dists = new double[this.numClasses];
        for (int i = 0; i < numClasses; i++) {
            dists[i] = classDistribution(i);
        }
        return dists;
    }

    /**
     * Computes the actual probability of mambership in a particular class
     * denoted by the input index
     * 
     * @param num
     *            index of the class of interest
     * @return the probability of membership in the requested class
     */
    public double classDistribution(int num) {
        double classSum = 0;
        double totSum = 0;
        for (int i = 0; i < numClasses; i++) {
            for (int j = 0; j < numClasses; j++) {
                if (i == num)
                    classSum += confMatrix[i][j];
                totSum += confMatrix[i][j];
            }
        }
        return classSum / totSum;
    }

    /**
     * Adds a label / prediction pair to the confusion matrix with hard (most
     * likely class) labels
     * 
     * @param softlabels
     *            actual distribution of target class memberships
     * @param guess
     *            the predicted distribution of class memberships
     * @param freq
     *            the number of times to consider this label / prediction pair
     */
    public void addHard(double[] softlabels, double[] guess, double weight) {
        addInfo(softToHard(softlabels), softToHard(guess), weight);
    }

    /**
     * Adds a label / prediction pair to the confusion matrix with hard (most
     * likely class) labels
     * 
     * @param softlabels
     *            actual distribution of target class memberships
     * @param guess
     *            the predicted distribution of class memberships
     */
    public void addHard(double[] softlabels, double[] guess) {
        addInfo(softToHard(softlabels), softToHard(guess));
    }

    /**
     * Adds a label / prediction pair to the confusion matrix with hard (most
     * likely class) labels note, only applicable for binary classification (2
     * class) problems
     * 
     * @param label
     *            the index of the actual class of membership
     * @param prediction
     *            the predicted probability of membership in the positive class
     */
    public void addHard(int label, double[] guess) {
        addInfo(label, softToHard(guess));
    }

    /**
     * Adds a label / prediction pair to the confusion matrix with hard (most
     * likely class) labels note, only applicable for binary classification (2
     * class) problems
     * 
     * @param label
     *            the index of the actual class of membership
     * @param prediction
     *            the predicted probability of membership in the positive class
     */
    public void addHard(int label, double prediction) {
        addInfo(label,
                softToHard(new double[] { 1.0 - prediction, prediction }));
    }

    /**
     * Adds a label / prediction pair to the confusion matrix with hard (most
     * likely class) labels note, only applicable for binary classification (2
     * class) problems
     * 
     * @param label
     *            the index of the actual class of membership
     * @param prediction
     *            the predicted probability of membership in the positive class
     * @param freq
     *            the number of times this label / prediction pair should be
     *            considered.
     */
    public void addHard(int label, double[] guess, double freq) {
        addInfo(label, softToHard(guess));
    }

    /**
     * converts a soft prediction of probability estimates into a categorical
     * indicator for the most likely class
     * 
     * @param scores
     *            probabilities of label class membership
     * @return the categorical values, 0's for all target classes with a 1 for
     *         the most likely class
     */
    private static double[] softToHard(double[] scores) {
        int maxindex = 0;
        double max = 0;
        double[] out = new double[scores.length];
        for (int i = 0; i < scores.length; i++) {
            if (scores[i] > max) {
                maxindex = i;
                max = scores[i];
            }
        }
        out[maxindex] = 1;
        return out;
    }

    /*
     * (non-Javadoc)
     * 
     * @see java.lang.Object#toString()
     */
    @Override
    public String toString() {
        StringBuilder buff = new StringBuilder();
        buff.append("predicted:\t");
        for (int i = 0; i < numClasses - 1; i++) {
            buff.append(i + "\t");
        }
        buff.append((numClasses - 1) + "\n");
        for (int i = 0; i < numClasses; i++) {
            buff.append("actually " + i + ":\t");
            for (int j = 0; j < numClasses; j++) {
                buff.append(String.format("%.4f\t", confMatrix[i][j]));
            }
            buff.append("\n");
        }
        return buff.toString();
    }

    /**
     * To string row normalized (divided by the sum of each row)
     * 
     * @return the string representation of the confusion matrix that has been
     *         row normalized
     */
    public String toStringRowNormalized() {
        StringBuilder buff = new StringBuilder();
        buff.append("predicted:\t");
        for (int i = 0; i < numClasses - 1; i++) {
            buff.append(i + "\t");
        }
        double[] rowSums = this.rowSums();
        buff.append((numClasses - 1) + "\n");
        for (int i = 0; i < numClasses; i++) {
            buff.append("actually " + i + ":\t");
            for (int j = 0; j < numClasses; j++) {
                String s = String.format("%.4f\t", confMatrix[i][j]
                        / rowSums[i]);
                buff.append(s);
            }
            buff.append("\n");
        }
        return buff.toString();
    }

    /**
     * To string column normalized (divided by the sum of each column)
     * 
     * @return the string representation of the confusion matrix that has been
     *         column normalized
     */
    public String toStringColNormalized() {
        StringBuilder buff = new StringBuilder();
        buff.append("predicted:\t");
        for (int i = 0; i < numClasses - 1; i++) {
            buff.append(i + "\t");
        }
        double[] colSums = this.colSums();
        buff.append((numClasses - 1) + "\n");
        for (int i = 0; i < numClasses; i++) {
            buff.append("actually " + i + ":\t");
            for (int j = 0; j < numClasses; j++) {
                String s = String.format("%.4f\t", confMatrix[i][j]
                        / colSums[i]);
                buff.append(s);
            }
            buff.append("\n");
        }
        return buff.toString();
    }

    /**
     * Compute the sum of each row
     * 
     * @return an array containing the sum of each row.
     */
    public double[] rowSums() {
        double[] sums = new double[numClasses];
        for (int i = 0; i < numClasses; i++) {
            for (int j = 0; j < numClasses; j++) {
                sums[i] += confMatrix[i][j];
            }
        }
        return sums;
    }

    /**
     * Compute the accuracy for a given class; the % of examples that have been
     * correctly classifieed.
     * 
     * @param classid
     *            the index of the class where accuracy has been requested
     * @return the % of correctly classified examples for the requested class
     */
    public double computeAccuracy(int classid) {
        double tn = 0.;
        for (int i = 0; i < numClasses; i++)
            for (int j = 0; j < numClasses; j++)
                if (j != classid && i != classid)
                    tn += confMatrix[i][j];
        double tp = confMatrix[classid][classid];
        return (tn + tp) / obs;
    }

    public double computeAverageFmeasure() {
        double[] rowSums = rowSums();
        double total = total(rowSums);
        double fmeasure = 0.;

        for (int i = 0; i < numClasses; i++) {
            fmeasure += rowSums[i] * computeFmeasure(i);
        }
        return fmeasure / total;
    }

    public double computeAveragePrecision() {
        double[] rowSums = rowSums();
        double total = total(rowSums);
        double precision = 0.;

        for (int i = 0; i < numClasses; i++) {
            precision += rowSums[i] * computePrecision(i);
        }
        return precision / total;
    }

    public double computeAverageRecall() {
        double[] rowSums = rowSums();
        double total = total(rowSums);
        double recall = 0.;

        for (int i = 0; i < numClasses; i++) {
            recall += rowSums[i] * computeRecall(i);
        }
        return recall / total;
    }

    /**
     * Compute the sums of each column
     * 
     * @return an array containing the sum of each column.
     */
    public double[] colSums() {
        double[] sums = new double[numClasses];
        for (int i = 0; i < numClasses; i++) {
            for (int j = 0; j < numClasses; j++) {
                sums[j] += confMatrix[i][j];
            }
        }
        return sums;
    }

    /**
     * Return the confusion matrix as a 2d array
     * 
     * @return the confusion matrix data structure
     */
    public double[][] getMatrix() {
        double[][] out = new double[numClasses][numClasses];
        for (int i = 0; i < numClasses; i++)
            for (int j = 0; j < numClasses; j++)
                out[i][j] = confMatrix[i][j];
        return out;
    }

    /**
     * Gets the number of classes in the confusion matrix
     * 
     * @return the number of classes considered
     */
    public int getDim() {
        return this.numClasses;
    }

    /**
     * Computes the accuracy over all observations for all classes (% of
     * correctly labeled examples).
     * 
     * @return accuracy over all classes.
     */
    public double computeAccuracy() {
        double accy = 0;
        double tot = 0;
        double right = 0;
        for (int i = 0; i < this.numClasses; i++) {
            tot += total(this.confMatrix[i]);
            right += this.confMatrix[i][i];
        }
        if (tot > 0) {
            accy = right / tot;
        }

        return accy;
    }

    /**
     * Compute the precision for each class; the % of members of labeled as
     * belonging to each class who were actually members of that class
     * 
     * @return an array containing the precision values for each class.
     */
    public double[] computePrecision() {
        double[] precision = new double[this.numClasses];
        for (int i = 0; i < this.numClasses; i++) {
            double yes = 0;
            double no = 0;
            for (int j = 0; j < this.numClasses; j++) {
                if (i == j)
                    yes += confMatrix[i][j];
                else
                    no += confMatrix[i][j];
            }
            if (yes + no != 0)
                precision[i] = yes / (yes + no);
        }
        return precision;
    }

    /**
     * Compute the recall for each class; the % of members of belonging to each
     * class that were labeled as class members
     * 
     * @return an array containing the recall values for each class.
     */
    public double[] computeRecall() {
        double[] recall = new double[this.numClasses];
        double yes[] = new double[this.numClasses];
        double no[] = new double[this.numClasses];
        for (int i = 0; i < this.numClasses; i++) {
            for (int j = 0; j < this.numClasses; j++) {
                if (i == j)
                    yes[j] += confMatrix[i][j];
                else
                    no[j] += confMatrix[i][j];
            }
        }
        for (int i = 0; i < numClasses; i++) {
            if (yes[i] + no[i] != 0)
                recall[i] = yes[i] / (yes[i] + no[i]);
        }
        return recall;
    }

    /**
     * Computes the F-measure for each class; the harmonic mean of precision and
     * recall
     * {@link <a href="http://en.wikipedia.org/wiki/F_measure">F-Measure</a>}
     * for more info
     * 
     * @return the array containing the F-measure for each class
     */
    public double[] computeFmeasure() {
        double[] fmeasure = new double[numClasses];
        double[] precision = this.computePrecision();
        double[] recall = this.computeRecall();

        for (int i = 0; i < this.numClasses; i++) {
            if (recall[i] + precision[i] != 0)
                fmeasure[i] = 2.0 * (precision[i] * recall[i])
                        / (precision[i] + recall[i]);
        }
        return fmeasure;
    }

    /**
     * Builds a string table containing the common IR measures, precision,
     * recall, and F measure for each class
     * 
     * @return the string with performance stats
     */
    public String getIR() {
        StringBuffer buff = new StringBuffer();
        buff.append("class\t" + "precision\t" + "recall\t" + "F measure\n");
        double[] precision = this.computePrecision();
        double[] recall = this.computeRecall();
        double[] fmeasure = this.computeFmeasure();

        for (int i = 0; i < numClasses; i++) {
            buff.append(i + "\t" + precision[i] + "\t" + recall[i] + "\t"
                    + fmeasure[i] + "\n");
        }
        return buff.toString();

    }

    /**
     * Computes precision for a given class; the % of members of belonging to
     * each class that were labeled as class members
     * 
     * @param dim
     *            class of interest
     * @return the precision for the requested class
     */
    public double computePrecision(int dim) {
        double tot = 0;
        for (int i = 0; i < numClasses; i++)
            tot += confMatrix[i][dim];
        return confMatrix[dim][dim] / tot;
    }

    /**
     * Compute the recall for a given class; the % of members of belonging to
     * each class that were labeled as class members
     * 
     * @param dim
     *            the class of interest
     * @return the recall for the requested class
     */
    public double computeRecall(int dim) {
        double tot = 0;
        for (int i = 0; i < numClasses; i++)
            tot += confMatrix[dim][i];
        return confMatrix[dim][dim] / tot;
    }

    /**
     * Computes the F-measure for a given class; the harmonic mean of precision
     * and recall
     * {@link <a href="http://en.wikipedia.org/wiki/F_measure">F-Measure</a>}
     * for more info
     * 
     * 
     * @param dim
     *            the class of interest
     * @return the F-Measure of the requested class
     */
    public double computeFmeasure(int dim) {
        double pre = computePrecision(dim);
        double rec = computeRecall(dim);
        return 2 * (pre * rec) / (pre + rec);
    }

    /**
     * Total.
     * 
     * @param arr
     *            the arr
     * @return the double
     */
    private double total(double[] arr) {
        double total = 0;
        for (int i = 0; i < arr.length; i++)
            total += arr[i];
        return total;
    }

    /**
     * Builds a confusion matrix with the input observations and computes the
     * accuracy over all observations for all classes (% of correctly labeled
     * examples).
     * 
     * 
     * @param input
     *            the input label / prediction pairs
     * @return the accuracy of the input values
     */
    public static double computeAccuracy(Collection<PrimitivePair> input) {
        ConfusionMatrix conf = new ConfusionMatrix(2);
        for (PrimitivePair p : input)
            conf.addInfo(new double[] { 1. - p.first, p.first }, new double[] {
                    1. - p.second, p.second });
        return conf.computeAccuracy();
    }
}


================================================
FILE: src/main/java/com/etsy/conjecture/evaluation/EvaluationAggregator.java
================================================
package com.etsy.conjecture.evaluation;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;

import com.etsy.conjecture.data.Label;

public class EvaluationAggregator<L extends Label> implements Serializable {

    private static final long serialVersionUID = 5825037849957449364L;
    protected Map<String, DescriptiveStatistics> stats = new TreeMap<String, DescriptiveStatistics>();
    protected Map<String, List<Object>> obj = new HashMap<String, List<Object>>();

    public void add(ModelEvaluation<L> eval) {
        Map<String, Double> fold = eval.getStatistics();
        if (!stats.isEmpty()) {
            if (!fold.keySet().equals(stats.keySet())) {
                throw new java.lang.RuntimeException(
                        "Tried to add incompatible folds, with fields:"
                                + fold.keySet().toString() + " and "
                                + stats.keySet().toString());
            }
            for (Map.Entry<String, Double> e : fold.entrySet()) {
                stats.get(e.getKey()).addValue(e.getValue());
            }
            for (Map.Entry<String, Object> e : eval.getObjects().entrySet()) {
                obj.get(e.getKey()).add(e.getValue());
            }
        } else {
            for (Map.Entry<String, Double> e : fold.entrySet()) {
                DescriptiveStatistics ds = new DescriptiveStatistics();
                ds.addValue(e.getValue());
                stats.put(e.getKey(), ds);
            }
            for (Map.Entry<String, Object> e : eval.getObjects().entrySet()) {
                obj.put(e.getKey(), new ArrayList<Object>(5));
                obj.get(e.getKey()).add(e.getValue());
            }
        }
    }

    public double getValue(String key) {
       return stats.get(key).getMean();
    }

    @Override
    public String toString() {
        StringBuilder buff = new StringBuilder("Stat:\tMean\tStdDev\tMedian\n");
        for (Map.Entry<String, DescriptiveStatistics> e : stats.entrySet()) {
            buff.append(e.getKey() + ":\t" + format(e.getValue()) + "\n");
        }
        for (Map.Entry<String, List<Object>> e : obj.entrySet()) {
            buff.append(e.getKey()).append(":\n");
            for (Object o : e.getValue()) {
                buff.append("----\n").append(o.toString()).append("\n");
            }
        }
        return buff.toString();
    }

    private String format(DescriptiveStatistics stats) {
        return String.format("%.4f\t%.4f\t%.4f", stats.getMean(),
                stats.getStandardDeviation(), stats.getPercentile(50));
    }
}


================================================
FILE: src/main/java/com/etsy/conjecture/evaluation/ModelEvaluation.java
================================================
package com.etsy.conjecture.evaluation;

import com.etsy.conjecture.data.Label;

import java.util.Map;

public interface ModelEvaluation<L extends Label> {
    public void add(L real, L predicted);

    public Map<String, Double> getStatistics();

    public Map<String, Object> getObjects();

    public void merge(ModelEvaluation<L> other);
}


================================================
FILE: src/main/java/com/etsy/conjecture/evaluation/MulticlassConfusionMatrix.java
================================================
package com.etsy.conjecture.evaluation;

import java.io.Serializable;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;

/**
 * class representing a confusion matrix for representing misclassification
 * errors.
 * {@link <a href="http://en.wikipedia.org/wiki/Confusion_matrix">Confusion Matrix</a>}
 * 
 * @author jattenberg
 */
public class MulticlassConfusionMatrix implements Serializable {

    private static final long serialVersionUID = 1L;

    /**
     * The data structure representing the confusion matrix. rows correspond to
     * labels, columns to predictions
     */
    private final SortedMap<String, SortedMap<String, Double>> confusionMatrix;

    /** The num_classes represented in the confusion matrix */
    private final int numClasses;

    /** The number of label / prediction pairs observed */
    double obs;

    /**
     * Instantiates a new confusion matrix.
     * 
     * @param classes
     *            the number of target classes in the problem being considered
     */
    public MulticlassConfusionMatrix(String[] categories) {
        obs = 0;
        this.numClasses = categories.length;
        confusionMatrix = initializeMatrix(categories);
    }

    public void add(MulticlassConfusionMatrix m) {
        obs += m.obs;
        for(Map.Entry<String,SortedMap<String, Double>> entry : m.confusionMatrix.entrySet()) {
            String label = entry.getKey();
            SortedMap<String, Double> value = entry.getValue();
            for(Map.Entry<String,Double> inner_entry : value.entrySet()) {
                String inner_label = inner_entry.getKey();
                Double update = inner_entry.getValue();
                confusionMatrix.get(label)
Download .txt
gitextract__actrptd/

├── .gitignore
├── .travis.yml
├── LICENSE.md
├── README.md
├── bin/
│   ├── demo.sh
│   ├── model_diff.py
│   ├── model_param.py
│   └── prediction_inspection.py
├── build.sbt
├── clients/
│   └── phplib/
│       └── Conjecture/
│           ├── BinaryClassifier.php
│           ├── Config.php
│           ├── ConjectureException.php
│           ├── Finder.php
│           ├── Instance.php
│           ├── MulticlassClassifier.php
│           ├── MulticlassLogisticRegressionClassifier.php
│           ├── MulticlassOneVsAllClassifier.php
│           ├── Text.php
│           ├── TextSequence.php
│           └── Vector.php
├── data/
│   └── iris.tsv
├── project/
│   ├── build.properties
│   └── plugins.sbt
├── sbt
└── src/
    ├── main/
    │   ├── java/
    │   │   └── com/
    │   │       └── etsy/
    │   │           └── conjecture/
    │   │               ├── GenericPair.java
    │   │               ├── PrimitivePair.java
    │   │               ├── Utilities.java
    │   │               ├── data/
    │   │               │   ├── AbstractInstance.java
    │   │               │   ├── BinaryLabel.java
    │   │               │   ├── BinaryLabeledInstance.java
    │   │               │   ├── ByteArrayDoubleHashMap.java
    │   │               │   ├── ClusterLabel.java
    │   │               │   ├── ClusterPrediction.java
    │   │               │   ├── Instance.java
    │   │               │   ├── InstanceFactory.java
    │   │               │   ├── InstanceInterface.java
    │   │               │   ├── Label.java
    │   │               │   ├── LabeledInstance.java
    │   │               │   ├── LazyVector.java
    │   │               │   ├── MulticlassLabel.java
    │   │               │   ├── MulticlassLabeledInstance.java
    │   │               │   ├── MulticlassPrediction.java
    │   │               │   ├── RealValueLabeledInstance.java
    │   │               │   ├── RealValuedLabel.java
    │   │               │   ├── Recommendation.java
    │   │               │   └── StringKeyedVector.java
    │   │               ├── evaluation/
    │   │               │   ├── BinaryModelEvaluation.java
    │   │               │   ├── ConfusionMatrix.java
    │   │               │   ├── EvaluationAggregator.java
    │   │               │   ├── ModelEvaluation.java
    │   │               │   ├── MulticlassConfusionMatrix.java
    │   │               │   ├── MulticlassModelEvaluation.java
    │   │               │   ├── MulticlassReceiverOperatingCharacteristic.java
    │   │               │   ├── ReceiverOperatingCharacteristic.java
    │   │               │   └── RegressionModelEvaluation.java
    │   │               ├── model/
    │   │               │   ├── AdagradOptimizer.java
    │   │               │   ├── ClusteringModel.java
    │   │               │   ├── ControlOptimizer.java
    │   │               │   ├── Decomposable.java
    │   │               │   ├── ElasticNetOptimizer.java
    │   │               │   ├── FTRLOptimizer.java
    │   │               │   ├── Hinge.java
    │   │               │   ├── KMeans.java
    │   │               │   ├── LeastSquaresRegressionModel.java
    │   │               │   ├── LogisticRegression.java
    │   │               │   ├── MIRA.java
    │   │               │   ├── MIRAOptimizer.java
    │   │               │   ├── Model.java
    │   │               │   ├── PassiveAggressiveOptimizer.java
    │   │               │   ├── SGDOptimizer.java
    │   │               │   ├── UpdateableLinearModel.java
    │   │               │   ├── UpdateableModel.java
    │   │               │   └── UpdateableMulticlassLinearModel.java
    │   │               └── topics/
    │   │                   └── lda/
    │   │                       ├── LDADenseTopics.java
    │   │                       ├── LDADict.java
    │   │                       ├── LDADoc.java
    │   │                       ├── LDAPartialSparseTopics.java
    │   │                       ├── LDAPartialTopics.java
    │   │                       ├── LDARandomTopics.java
    │   │                       ├── LDASparseTopics.java
    │   │                       ├── LDATopics.java
    │   │                       └── LDAUtils.java
    │   └── scala/
    │       └── com/
    │           └── etsy/
    │               ├── conjecture/
    │               │   ├── VWReader.scala
    │               │   ├── demo/
    │               │   │   ├── DemoLinearHyperparameterSearch.scala
    │               │   │   ├── IrisDataToMulticlassLabeledInstances.scala
    │               │   │   └── LearnMulticlassClassifier.scala
    │               │   ├── scalding/
    │               │   │   ├── ALSJob.scala
    │               │   │   ├── FastKNN.scala
    │               │   │   ├── LSH.scala
    │               │   │   ├── NNMF.scala
    │               │   │   ├── SVD.scala
    │               │   │   ├── evaluate/
    │               │   │   │   ├── GenericCrossValidator.scala
    │               │   │   │   └── GenericEvaluator.scala
    │               │   │   ├── factorize/
    │               │   │   │   └── FactorizationTools.scala
    │               │   │   ├── train/
    │               │   │   │   ├── AbstractModelTrainer.scala
    │               │   │   │   ├── BinaryModelTrainer.scala
    │               │   │   │   ├── ClusteringModelTrainer.scala
    │               │   │   │   ├── LargeModelTrainer.scala
    │               │   │   │   ├── ModelTrainerStrategy.scala
    │               │   │   │   ├── MulticlassModelTrainer.scala
    │               │   │   │   ├── RegressionModelTrainer.scala
    │               │   │   │   └── SmallModelTrainer.scala
    │               │   │   └── util/
    │               │   │       ├── BaseGridSearcher.scala
    │               │   │       ├── DynamicOptions.scala
    │               │   │       └── HyperparameterSearcher.scala
    │               │   └── text/
    │               │       ├── FeatureHelper.scala
    │               │       ├── Text.scala
    │               │       └── TextSequence.scala
    │               └── scalding/
    │                   └── jobs/
    │                       └── conjecture/
    │                           ├── AdHocClassifier.scala
    │                           ├── AdHocClusterer.scala
    │                           ├── AdHocMulticlassClassifier.scala
    │                           ├── AdHocPredictor.scala
    │                           └── NNMFTest.scala
    └── test/
        └── java/
            └── com/
                └── etsy/
                    └── conjecture/
                        ├── data/
                        │   ├── LazyVectorTest.java
                        │   └── StringKeyedVectorTest.java
                        ├── evaluation/
                        │   └── TestReceiverOperatingCharacteristic.java
                        └── model/
                            └── UpdateableLinearModelTest.java
Download .txt
SYMBOL INDEX (966 symbols across 73 files)

FILE: clients/phplib/Conjecture/BinaryClassifier.php
  class Conjecture_BinaryClassifier (line 3) | class Conjecture_BinaryClassifier {
    method __construct (line 6) | function __construct($param_vec) {
    method dot (line 10) | public function dot($instance_vec) {
    method predict (line 14) | public function predict($instance_vec) {
    method getParams (line 20) | public function getParams() {
    method explain (line 24) | public function explain($instance_vec, $n = 10) {

FILE: clients/phplib/Conjecture/Config.php
  type Conjecture_Config (line 3) | interface Conjecture_Config {
    method useDummyConjectureModel (line 5) | public function useDummyConjectureModel();
    method getConjectureModelPath (line 6) | public function getConjectureModelPath();
    method getMaxFileSize (line 7) | public function getMaxFileSize();

FILE: clients/phplib/Conjecture/ConjectureException.php
  class Conjecture_ConjectureException (line 3) | class Conjecture_ConjectureException extends Exception{}

FILE: clients/phplib/Conjecture/Finder.php
  class Conjecture_Finder (line 3) | class Conjecture_Finder {
    method __construct (line 7) | public function __construct(Conjecture_Config $config) {
    method getLocalModel (line 15) | public function getLocalModel($local_file_path) {
    method parseFile (line 25) | private function parseFile($fp) {
    method getLatestModelJsonForProblem (line 40) | private function getLatestModelJsonForProblem($file_name) {
    method getLatestModelForProblem (line 49) | public function getLatestModelForProblem($file_name) {
    method getLatestBinaryClassificationVectorForProblem (line 54) | public function getLatestBinaryClassificationVectorForProblem($file_na...
    method getLatestBinaryClassifierForProblem (line 59) | public function getLatestBinaryClassifierForProblem($file_name) {
    method getOneVsAllClassifier (line 63) | public function getOneVsAllClassifier($file_name) {
    method getMulticlassClassifier (line 74) | public function getMulticlassClassifier($file_name) {
    method build (line 92) | static function build(Conjecture_Config $config) {
    method getDummyModel (line 101) | private static function getDummyModel() {

FILE: clients/phplib/Conjecture/Instance.php
  class Conjecture_Instance (line 9) | class Conjecture_Instance extends Conjecture_Vector{
    method __construct (line 17) | public function __construct(array $vector = array()) {
    method getId (line 21) | public function getId() {
    method setId (line 25) | public function setId($id) {
    method put (line 30) | public function put($key, $value = 1.0) {
    method update (line 34) | public function update($key, $value = 1.0) {
    method putAll (line 45) | public function putAll(array $vector) {
    method containsKey (line 51) | public function containsKey($key) {
    method containsValue (line 55) | public function containsValue($key) {
    method keySet (line 60) | public function keySet() {
    method values (line 64) | public function values() {
    method size (line 68) | public function size() {
    method isEmpty (line 72) | public function isEmpty() {
    method remove (line 76) | public function remove($key) {
    method toString (line 80) | public function toString() {
    method addTerm (line 84) | public function addTerm($term, $featureWeight = 1.0, $namespace = "") {
    method addTerms (line 90) | public function addTerms(array $terms, $featureWeight = 1.0, $namespac...
    method addNumericArray (line 97) | public function addNumericArray(array $numberValues, $namespace = "") {

FILE: clients/phplib/Conjecture/MulticlassClassifier.php
  class Conjecture_MulticlassClassifier (line 4) | class Conjecture_MulticlassClassifier {
    method __construct (line 11) | function __construct($param) {
    method predict (line 15) | public function predict($instance_vec) {
    method getParams (line 30) | public function getParams() {
    method explain (line 34) | public function explain($instance_vec, $n = 10) {
    method categoryExplain (line 45) | private function categoryExplain($instance_vec, $category_model, $n = ...

FILE: clients/phplib/Conjecture/MulticlassLogisticRegressionClassifier.php
  class Conjecture_MulticlassLogisticRegressionClassifier (line 4) | class Conjecture_MulticlassLogisticRegressionClassifier extends Conjectu...
    method predict (line 8) | public function predict($instance_vec) {

FILE: clients/phplib/Conjecture/MulticlassOneVsAllClassifier.php
  class Conjecture_MulticlassOneVsAllClassifier (line 4) | class Conjecture_MulticlassOneVsAllClassifier {
    method __construct (line 12) | function __construct($param) {
    method predict (line 16) | public function predict($instance_vec) {
    method getParams (line 31) | public function getParams() {
    method explain (line 41) | public function explain($instance_vec, $n = 10) {

FILE: clients/phplib/Conjecture/Text.php
  class Conjecture_Text (line 4) | class Conjecture_Text {
    method build (line 8) | static function build($text) {
    method __construct (line 12) | function __construct($text) {
    method toString (line 16) | function toString() {
    method replaceNumbers (line 20) | function replaceNumbers($replacement = "_num_") {
    method replaceHTMLEscapes (line 25) | function replaceHTMLEscapes($replacement = " ") {
    method removeHTMLTags (line 29) | function removeHTMLTags() {
    method replaceHTMLTags (line 33) | function replaceHTMLTags($replacement = " ") {
    method replaceNonAlphaNumeric (line 37) | function replaceNonAlphaNumeric($replacement = " ") {
    method replaceNonAlphaNumericUnderscore (line 41) | function replaceNonAlphaNumericUnderscore($replacement = " ") {
    method replaceNonAlpha (line 45) | function replaceNonAlpha($replacement = " ") {
    method collapseHyphens (line 49) | function collapseHyphens() {
    method collapseUnderscores (line 53) | function collapseUnderscores() {
    method collapsePeriods (line 57) | function collapsePeriods() {
    method stripPunctuation (line 61) | function stripPunctuation() {
    method collapse (line 67) | function collapse() {
    method rstrip (line 72) | function rstrip() {
    method lstrip (line 77) | function lstrip() {
    method strip (line 82) | function strip() {
    method wsclean (line 87) | function wsclean() {
    method removeUnprintables (line 92) | function removeUnprintables() {
    method collapseWhitespaceAndPunc (line 96) | function collapseWhitespaceAndPunc() {
    method toLowerCase (line 101) | function toLowerCase() {
    method standardTextFilter (line 105) | function standardTextFilter() {
    method toArrayFromShingles (line 115) | function toArrayFromShingles($n) {
    method toSequenceFromShingles (line 127) | function toSequenceFromShingles($n) {

FILE: clients/phplib/Conjecture/TextSequence.php
  class Conjecture_TextSequence (line 4) | class Conjecture_TextSequence {
    method __construct (line 8) | function __construct(array $tokens) {
    method concat (line 15) | function concat($other) {
    method mkString (line 19) | function mkString($glue = " ") {
    method toString (line 23) | function toString() {
    method getTokens (line 27) | function getTokens() {
    method filterBlank (line 31) | function filterBlank() {
    method filterStopwords (line 40) | function filterStopwords() {
    method stopwords (line 49) | function stopwords() {
    method filterBadwords (line 59) | function filterBadwords() {
    method badwords (line 68) | function badwords() {
    method filterAllCaps (line 77) | function filterAllCaps() {
    method AllCaps (line 86) | function AllCaps() {
    method filterCapitalized (line 95) | function filterCapitalized() {
    method capitalized (line 104) | function capitalized() {
    method filterLowercase (line 113) | function filterLowercase() {
    method allLowercase (line 122) | function allLowercase() {
    method filterURLs (line 131) | function filterURLs() {
    method allURLs (line 140) | function allURLs() {
    method filterListings (line 149) | function filterListings() {
    method allListings (line 158) | function allListings() {
    method size (line 167) | function size() {
    method stopWordCount (line 171) | function stopWordCount() {
    method stopWordFraq (line 175) | function stopWordFraq($bins = 10.0) {
    method badWordCount (line 179) | function badWordCount() {
    method badWordFraq (line 183) | function badWordFraq($bins = 10.0) {
    method capsCount (line 187) | function capsCount() {
    method capFraq (line 191) | function capFraq($bins = 10.0) {
    method urlCount (line 195) | function urlCount() {
    method urlFraq (line 199) | function urlFraq($bins = 10.0) {
    method listingsCount (line 203) | function listingsCount() {
    method listingsFraq (line 207) | function listingsFraq($bins = 10.0) {
    method sizeBin (line 211) | function sizeBin() {
    method replaceNumbers (line 217) | function replaceNumbers($replacement = "_num_") {
    method replaceHTMLEscapes (line 226) | function replaceHTMLEscapes($replacement = " ") {
    method removeHTMLTags (line 233) | function removeHTMLTags() {
    method replaceHTMLTags (line 237) | function replaceHTMLTags($replacement = " ") {
    method replaceNonAlphaNumeric (line 244) | function replaceNonAlphaNumeric($replacement = " ") {
    method replaceNonAlphaNumericUnderscore (line 251) | function replaceNonAlphaNumericUnderscore($replacement = " ") {
    method replaceNonAlpha (line 258) | function replaceNonAlpha($replacement = " ") {
    method collapseHyphens (line 265) | function collapseHyphens() {
    method collapseUnderscores (line 272) | function collapseUnderscores() {
    method collapsePeriods (line 279) | function collapsePeriods() {
    method stripPunctuation (line 286) | function stripPunctuation() {
    method collapse (line 295) | function collapse() {
    method rstrip (line 302) | function rstrip() {
    method lstrip (line 309) | function lstrip() {
    method strip (line 317) | function strip() {
    method wsclean (line 322) | function wsclean() {
    method removeUnprintables (line 327) | function removeUnprintables() {
    method collapseWhitespaceAndPunc (line 335) | function collapseWhitespaceAndPunc() {
    method prependNameSpace (line 344) | function prependNameSpace($namespace) {
    method toList (line 351) | function toList() {
    method shingles (line 355) | function shingles($n, $whitespace = "_") {
    method ngrams (line 367) | function ngrams($n, $glue = " ") {
    method unigramsAndBigrams (line 376) | function unigramsAndBigrams($glue = " ") {
    method toInstance (line 380) | function toInstance() {

FILE: clients/phplib/Conjecture/Vector.php
  class Conjecture_Vector (line 3) | class Conjecture_Vector {
    method __construct (line 7) | function __construct($array = array()) {
    method dot (line 11) | public function dot($rhs) {
    method getParams (line 22) | public function getParams() {
    method getParam (line 26) | public function getParam($k) {

FILE: src/main/java/com/etsy/conjecture/GenericPair.java
  class GenericPair (line 6) | public class GenericPair<F, S> implements java.io.Serializable {
    method GenericPair (line 21) | public GenericPair(F first, S second) {
    method getFirst (line 31) | public F getFirst() {
    method setFirst (line 41) | public void setFirst(F first) {
    method getSecond (line 50) | public S getSecond() {
    method setSecond (line 60) | public void setSecond(S second) {
    method toString (line 64) | @Override
    method equals (line 69) | @SuppressWarnings("unchecked")
    method hashCode (line 77) | public int hashCode() {

FILE: src/main/java/com/etsy/conjecture/PrimitivePair.java
  class PrimitivePair (line 8) | public class PrimitivePair implements java.io.Serializable {
    method PrimitivePair (line 21) | public PrimitivePair(double first, double second) {
    method getFirst (line 31) | public double getFirst() {
    method setFirst (line 41) | public void setFirst(double fisrt) {
    method getSecond (line 50) | public double getSecond() {
    method setSecond (line 60) | public void setSecond(double second) {
    method toString (line 64) | @Override
    method equals (line 69) | @Override
    method hashCode (line 77) | @Override

FILE: src/main/java/com/etsy/conjecture/Utilities.java
  class Utilities (line 22) | public class Utilities {
    method Utilities (line 29) | private Utilities() {
    method cleanLine (line 32) | public static String cleanLine(String line) {
    method cleanLineRobust (line 45) | public static String cleanLineRobust(String input, String separator,
    method checkNotBlank (line 68) | public static String checkNotBlank(String s) {
    method checkNotBlank (line 75) | public static List<String> checkNotBlank(List<String> S) {
    method checkNotBlank (line 81) | public static String[] checkNotBlank(String[] S) {
    method stringInnerProduct (line 87) | public static double stringInnerProduct(Map<String, Double> coefficients,
    method sigmoid (line 96) | public static double sigmoid(double operand) {
    method dsigmoid (line 103) | public static double dsigmoid(double operand) {
    method sortTerms (line 113) | public static String sortTerms(String input) {
    method sortTerms (line 117) | public static String sortTerms(String input, String delim) {
    method cleanText (line 123) | public final static String cleanText(String tmp, int maxlen) {
    method grams (line 137) | public final static List<String> grams(String input, int[] gramSizes,
    method floatingPointEquals (line 163) | public static final boolean floatingPointEquals(double a, double b) {
    method doubleHash (line 167) | public static int doubleHash(double d) {
    method logistic (line 172) | public static double logistic(double x) {
    class ValueComparator (line 176) | static class ValueComparator<K, V extends Comparable<? super V>> imple...
      method ValueComparator (line 180) | public ValueComparator(boolean reverse) {
      method compare (line 184) | public int compare(Map.Entry<K, V> a, Map.Entry<K, V> b) {
    method orderKeysByValue (line 190) | public static <K, V extends Comparable<? super V>> ArrayList<K> orderK...
    method orderKeysByValue (line 195) | public static <K, V extends Comparable<? super V>> ArrayList<K> orderK...
    method topKeysByValue (line 207) | public static <K, V extends Comparable<? super V>> List<K> topKeysByVa...

FILE: src/main/java/com/etsy/conjecture/data/AbstractInstance.java
  class AbstractInstance (line 7) | public abstract class AbstractInstance<T extends AbstractInstance<T>> {
    method AbstractInstance (line 16) | public AbstractInstance() {
    method AbstractInstance (line 20) | public AbstractInstance(double weight) {
    method AbstractInstance (line 24) | public AbstractInstance(StringKeyedVector skv) {
    method AbstractInstance (line 28) | public AbstractInstance(StringKeyedVector skv, double weight) {
    method AbstractInstance (line 33) | public AbstractInstance(Map<String, Double> map) {
    method AbstractInstance (line 37) | public AbstractInstance(Map<String, Double> map, double weight) {
    method setWeight (line 42) | @SuppressWarnings("unchecked")
    method getWeight (line 48) | public double getWeight() {
    method getId (line 52) | public String getId() {
    method getVector (line 56) | public StringKeyedVector getVector() {
    method setSupportingData (line 60) | public void setSupportingData(String s) {
    method getSupportingData (line 64) | public String getSupportingData() {
    method setCoordinate (line 68) | @SuppressWarnings("unchecked")
    method addToCoordinate (line 74) | @SuppressWarnings("unchecked")
    method setId (line 80) | @SuppressWarnings("unchecked")
    method addTerm (line 92) | @SuppressWarnings("unchecked")
    method addTerm (line 105) | @SuppressWarnings("unchecked")
    method addTermWithNamespace (line 119) | @SuppressWarnings("unchecked")
    method addTermWithNamespace (line 133) | @SuppressWarnings("unchecked")
    method addTerms (line 148) | @SuppressWarnings("unchecked")
    method addTerms (line 163) | @SuppressWarnings("unchecked")
    method addTermsWithNamespace (line 177) | @SuppressWarnings("unchecked")
    method addTermsWithNamespace (line 194) | @SuppressWarnings("unchecked")
    method addTerms (line 208) | @SuppressWarnings("unchecked")
    method addTerms (line 223) | @SuppressWarnings("unchecked")
    method addTermsWithNamespace (line 237) | @SuppressWarnings("unchecked")
    method addTermsWithNamespace (line 254) | @SuppressWarnings("unchecked")
    method addTermsWithWeights (line 268) | @SuppressWarnings("unchecked")
    method addTermsWithWeightsWithNamespace (line 284) | @SuppressWarnings("unchecked")
    method addNumericArrayWithNamespace (line 301) | @SuppressWarnings("unchecked")
    method addNumericArray (line 315) | @SuppressWarnings("unchecked")
    method addNumericArrayWithNamespace (line 331) | @SuppressWarnings("unchecked")
    method addNumericArray (line 347) | @SuppressWarnings("unchecked")
    method addNumericArrayWithNamespace (line 363) | @SuppressWarnings("unchecked")
    method addNumericArray (line 379) | @SuppressWarnings("unchecked")
    method setNumericArrayWithNamespace (line 395) | @SuppressWarnings("unchecked")
    method setNumericArray (line 409) | @SuppressWarnings("unchecked")
    method setNumericArrayWithNamespace (line 425) | @SuppressWarnings("unchecked")
    method setNumericArray (line 441) | @SuppressWarnings("unchecked")
    method setNumericArrayWithNamespace (line 457) | @SuppressWarnings("unchecked")
    method setNumericArray (line 473) | @SuppressWarnings("unchecked")
    method addIdField (line 487) | @SuppressWarnings("unchecked")
    method addIdField (line 499) | @SuppressWarnings("unchecked")
    method addIdFieldWithNamespace (line 513) | @SuppressWarnings("unchecked")
    method addIdFieldWithNamespace (line 528) | @SuppressWarnings("unchecked")
    method addIdField (line 540) | @SuppressWarnings("unchecked")
    method addIdField (line 552) | @SuppressWarnings("unchecked")
    method addIdFieldWithNamespace (line 566) | @SuppressWarnings("unchecked")
    method addIdFieldWithNamespace (line 581) | @SuppressWarnings("unchecked")
    method addIds (line 593) | @SuppressWarnings("unchecked")
    method addIds (line 607) | @SuppressWarnings("unchecked")
    method addIds (line 619) | @SuppressWarnings("unchecked")
    method addIds (line 633) | @SuppressWarnings("unchecked")
    method addIds (line 647) | @SuppressWarnings("unchecked")
    method addIds (line 662) | @SuppressWarnings("unchecked")
    method addIdsWithNamespace (line 676) | @SuppressWarnings("unchecked")
    method addIdsWithNamespace (line 693) | @SuppressWarnings("unchecked")
    method addIdsWithNamespace (line 707) | @SuppressWarnings("unchecked")
    method addIdsWithNamespace (line 724) | @SuppressWarnings("unchecked")
    method addIdsWithNamespace (line 738) | @SuppressWarnings("unchecked")
    method addIdsWithNamespace (line 755) | @SuppressWarnings("unchecked")
    method setIdField (line 767) | @SuppressWarnings("unchecked")
    method setIdField (line 779) | @SuppressWarnings("unchecked")
    method setIdFieldWithNamespace (line 793) | @SuppressWarnings("unchecked")
    method setIdFieldWithNamespace (line 808) | @SuppressWarnings("unchecked")
    method setIdField (line 820) | @SuppressWarnings("unchecked")
    method setIdField (line 832) | @SuppressWarnings("unchecked")
    method setIdFieldWithNamespace (line 846) | @SuppressWarnings("unchecked")
    method setIdFieldWithNamespace (line 861) | @SuppressWarnings("unchecked")
    method setIds (line 873) | @SuppressWarnings("unchecked")
    method setIds (line 887) | @SuppressWarnings("unchecked")
    method setIds (line 899) | @SuppressWarnings("unchecked")
    method setIds (line 913) | @SuppressWarnings("unchecked")
    method setIds (line 927) | @SuppressWarnings("unchecked")
    method setIds (line 942) | @SuppressWarnings("unchecked")
    method setIdsWithNamespace (line 956) | @SuppressWarnings("unchecked")
    method setIdsWithNamespace (line 973) | @SuppressWarnings("unchecked")
    method setIdsWithNamespace (line 987) | @SuppressWarnings("unchecked")
    method setIdsWithNamespace (line 1004) | @SuppressWarnings("unchecked")
    method setIdsWithNamespace (line 1018) | @SuppressWarnings("unchecked")
    method setIdsWithNamespace (line 1035) | @SuppressWarnings("unchecked")

FILE: src/main/java/com/etsy/conjecture/data/BinaryLabel.java
  class BinaryLabel (line 5) | public class BinaryLabel extends RealValuedLabel {
    method BinaryLabel (line 9) | public BinaryLabel() {
    method BinaryLabel (line 13) | public BinaryLabel(double value) {
    method checkBinaryValue (line 18) | private static double checkBinaryValue(double value) {
    method getAsPlusMinus (line 25) | public double getAsPlusMinus() {

FILE: src/main/java/com/etsy/conjecture/data/BinaryLabeledInstance.java
  class BinaryLabeledInstance (line 9) | public class BinaryLabeledInstance extends
    method getLabel (line 15) | public BinaryLabel getLabel() {
    method BinaryLabeledInstance (line 19) | public BinaryLabeledInstance() {
    method BinaryLabeledInstance (line 23) | public BinaryLabeledInstance(double label, Map<String, Double> instanc...
    method BinaryLabeledInstance (line 27) | public BinaryLabeledInstance(double label, Map<String, Double> instance,
    method BinaryLabeledInstance (line 32) | public BinaryLabeledInstance(double label, StringKeyedVector vec) {
    method BinaryLabeledInstance (line 36) | public BinaryLabeledInstance(double label, StringKeyedVector vec,
    method BinaryLabeledInstance (line 41) | public BinaryLabeledInstance(BinaryLabel label, Map<String, Double> in...
    method BinaryLabeledInstance (line 45) | public BinaryLabeledInstance(BinaryLabel label,
    method BinaryLabeledInstance (line 51) | public BinaryLabeledInstance(BinaryLabel label, StringKeyedVector vec) {
    method BinaryLabeledInstance (line 55) | public BinaryLabeledInstance(BinaryLabel label, StringKeyedVector vec,
    method BinaryLabeledInstance (line 60) | public BinaryLabeledInstance(double label) {
    method BinaryLabeledInstance (line 64) | public BinaryLabeledInstance(double label, double weight) {
    method BinaryLabeledInstance (line 68) | public BinaryLabeledInstance(BinaryLabel label) {
    method BinaryLabeledInstance (line 72) | public BinaryLabeledInstance(BinaryLabel label, double weight) {

FILE: src/main/java/com/etsy/conjecture/data/ByteArrayDoubleHashMap.java
  class ByteArrayDoubleHashMap (line 24) | public class ByteArrayDoubleHashMap implements Serializable, KryoSeriali...
    method ByteArrayDoubleHashMap (line 38) | public ByteArrayDoubleHashMap() {
    method ByteArrayDoubleHashMap (line 42) | public ByteArrayDoubleHashMap(int initialCapacity, float loadFactor,
    method ByteArrayDoubleHashMap (line 47) | public ByteArrayDoubleHashMap(int initialCapacity, float loadFactor,
    method byteArrayToString (line 56) | public String byteArrayToString(byte[] b) {
    method stringToByteArray (line 65) | public byte[] stringToByteArray(String s) {
    class TByteArrayDoubleHashMap (line 78) | static class TByteArrayDoubleHashMap extends TObjectDoubleHashMap<byte...
      method TByteArrayDoubleHashMap (line 79) | public TByteArrayDoubleHashMap(int initialSize, float loadFactor,
      method hash (line 84) | protected int hash(Object obj) {
      method equals (line 88) | protected boolean equals(Object a, Object b) {
      method put (line 94) | public double put(byte[] key, double value) {
      method postInsertHook2 (line 111) | protected final void postInsertHook2(boolean usedFreeSlot) {
    method size (line 129) | public int size() {
    method containsKey (line 133) | public boolean containsKey(Object key) {
    method keySet (line 145) | public Set<String> keySet() {
    method values (line 153) | public Set<Double> values() {
    method containsValue (line 161) | public boolean containsValue(Object d) {
    method entrySet (line 165) | public Set<Map.Entry<String, Double>> entrySet() {
    method isEmpty (line 173) | public boolean isEmpty() {
    method clear (line 177) | public void clear() {
    method remove (line 181) | public Double remove(Object k) {
    method get (line 185) | public Double get(Object k) {
    method put (line 189) | public Double put(String key, Double value) {
    method putAll (line 193) | public void putAll(Map<? extends String, ? extends Double> m) {
    method getPrimitive (line 199) | public double getPrimitive(byte[] key) {
    method getPrimitive (line 203) | public double getPrimitive(String key) {
    method putPrimitive (line 207) | public double putPrimitive(byte[] key, double value) {
    method putPrimitive (line 211) | public double putPrimitive(String key, double value) {
    method removePrimitive (line 215) | public double removePrimitive(byte[] key) {
    method removePrimitive (line 219) | public double removePrimitive(String key) {
    method transformValues (line 223) | public void transformValues(TDoubleFunction func) {
    method troveIterator (line 227) | public TObjectDoubleIterator<byte[]> troveIterator() {
    method iterator (line 231) | public Iterator<Map.Entry<String, Double>> iterator() {
    method writeObject (line 252) | private void writeObject(ObjectOutputStream output) throws IOException {
    method readObject (line 268) | private void readObject(ObjectInputStream input) throws IOException,
    method write (line 287) | public void write(Kryo kryo, Output output) {
    method read (line 303) | public void read(Kryo kryo, Input input) {

FILE: src/main/java/com/etsy/conjecture/data/ClusterLabel.java
  class ClusterLabel (line 3) | public class ClusterLabel extends Label{
    method ClusterLabel (line 9) | public ClusterLabel() {
    method ClusterLabel (line 13) | public ClusterLabel(String label) {
    method getLabel (line 17) | public String getLabel() {
    method setLabel (line 21) | public void setLabel(String label) {
    method toString (line 25) | public String toString() {
    method hashCode (line 29) | @Override
    method equals (line 37) | @Override

FILE: src/main/java/com/etsy/conjecture/data/ClusterPrediction.java
  class ClusterPrediction (line 9) | public class ClusterPrediction extends ClusterLabel{
    method ClusterPrediction (line 18) | public ClusterPrediction(Map<String, Double> clusterProbs) {
    method getMap (line 33) | public Map<String,Double> getMap() {

FILE: src/main/java/com/etsy/conjecture/data/Instance.java
  class Instance (line 6) | public class Instance extends AbstractInstance<Instance> {
    method Instance (line 8) | public Instance() {
    method Instance (line 12) | public Instance(StringKeyedVector vec) {

FILE: src/main/java/com/etsy/conjecture/data/InstanceFactory.java
  class InstanceFactory (line 3) | public class InstanceFactory {
    method InstanceFactory (line 5) | private InstanceFactory() {
    method buildInstance (line 8) | public static Instance buildInstance() {
    method copyInstance (line 12) | public static Instance copyInstance(Instance inst) {
    method toBinaryLabeledInstance (line 16) | public static BinaryLabeledInstance toBinaryLabeledInstance(double label,
    method toBinaryLabeledInstance (line 21) | public static BinaryLabeledInstance toBinaryLabeledInstance(
    method toRealValueLabeledInstance (line 26) | public static RealValueLabeledInstance toRealValueLabeledInstance(
    method toRealValueLabeledInstance (line 31) | public static RealValueLabeledInstance toRealValueLabeledInstance(

FILE: src/main/java/com/etsy/conjecture/data/InstanceInterface.java
  type InstanceInterface (line 7) | public interface InstanceInterface<T extends InstanceInterface<T>> {
    method getId (line 9) | public abstract String getId();
    method setId (line 11) | public abstract T setId(String id);
    method addTerm (line 13) | public abstract T addTerm(String term);
    method addTerm (line 15) | public abstract T addTerm(String term, double featureWeight);
    method addTermWithNamespace (line 17) | public abstract T addTermWithNamespace(String term, String namespace);
    method addTermWithNamespace (line 19) | public abstract T addTermWithNamespace(String term, String namespace,
    method addTerms (line 22) | public abstract T addTerms(Collection<String> terms, double featureWei...
    method addTerms (line 24) | public abstract T addTerms(Collection<String> terms);
    method addTermsWithNamespace (line 26) | public abstract T addTermsWithNamespace(Collection<String> terms,
    method addTermsWithNamespace (line 29) | public abstract T addTermsWithNamespace(Collection<String> terms,
    method addTerms (line 32) | public abstract T addTerms(String[] terms, double featureWeight);
    method addTerms (line 34) | public abstract T addTerms(String[] terms);
    method addTermsWithNamespace (line 36) | public abstract T addTermsWithNamespace(String[] terms, String namespace,
    method addTermsWithNamespace (line 39) | public abstract T addTermsWithNamespace(String[] terms, String namespa...
    method addTermsWithWeights (line 41) | public abstract T addTermsWithWeights(Map<String, Double> termsWithWei...
    method addTermsWithWeightsWithNamespace (line 43) | public abstract T addTermsWithWeightsWithNamespace(
    method addNumericArrayWithNamespace (line 46) | public abstract T addNumericArrayWithNamespace(double[] array,
    method addNumericArray (line 49) | public abstract T addNumericArray(double[] array);
    method addNumericArrayWithNamespace (line 51) | public abstract T addNumericArrayWithNamespace(Double[] array,
    method addNumericArray (line 54) | public abstract T addNumericArray(Double[] array);
    method addNumericArrayWithNamespace (line 56) | public abstract T addNumericArrayWithNamespace(List<Double> values,
    method addNumericArray (line 59) | public abstract T addNumericArray(List<Double> values);
    method setNumericArrayWithNamespace (line 61) | public abstract T setNumericArrayWithNamespace(double[] array,
    method setNumericArray (line 64) | public abstract T setNumericArray(double[] array);
    method setNumericArrayWithNamespace (line 66) | public abstract T setNumericArrayWithNamespace(Double[] array,
    method setNumericArray (line 69) | public abstract T setNumericArray(Double[] array);
    method setNumericArrayWithNamespace (line 71) | public abstract T setNumericArrayWithNamespace(List<Double> values,
    method setNumericArray (line 74) | public abstract T setNumericArray(List<Double> values);
    method addIdField (line 76) | public abstract T addIdField(long id, double featureWeight);
    method addIdField (line 78) | public abstract T addIdField(long id);
    method addIdFieldWithNamespace (line 80) | public abstract T addIdFieldWithNamespace(long id, double featureWeight,
    method addIdFieldWithNamespace (line 83) | public abstract T addIdFieldWithNamespace(long id, String namespace);
    method addIdField (line 85) | public abstract T addIdField(int id, double featureWeight);
    method addIdField (line 87) | public abstract T addIdField(int id);
    method addIdFieldWithNamespace (line 89) | public abstract T addIdFieldWithNamespace(int id, double featureWeight,
    method addIdFieldWithNamespace (line 92) | public abstract T addIdFieldWithNamespace(int id, String namespace);
    method addIds (line 94) | public abstract T addIds(long[] ids, double featureWeight);
    method addIds (line 96) | public abstract T addIds(long[] ids);
    method addIds (line 98) | public abstract T addIds(int[] ids, double featureWeight);
    method addIds (line 100) | public abstract T addIds(int[] ids);
    method addIds (line 102) | public abstract T addIds(Collection<Integer> ids, double featureWeight);
    method addIds (line 104) | public abstract T addIds(Collection<Integer> ids);
    method addIdsWithNamespace (line 106) | public abstract T addIdsWithNamespace(long[] ids, double featureWeight,
    method addIdsWithNamespace (line 109) | public abstract T addIdsWithNamespace(long[] ids, String namespace);
    method addIdsWithNamespace (line 111) | public abstract T addIdsWithNamespace(int[] ids, double featureWeight,
    method addIdsWithNamespace (line 114) | public abstract T addIdsWithNamespace(int[] ids, String namespace);
    method addIdsWithNamespace (line 116) | public abstract T addIdsWithNamespace(Collection<Long> ids,
    method addIdsWithNamespace (line 119) | public abstract T addIdsWithNamespace(Collection<Long> ids, String nam...
    method setIdField (line 121) | public abstract T setIdField(long id, double featureWeight);
    method setIdField (line 123) | public abstract T setIdField(long id);
    method setIdFieldWithNamespace (line 125) | public abstract T setIdFieldWithNamespace(long id, double featureWeight,
    method setIdFieldWithNamespace (line 128) | public abstract T setIdFieldWithNamespace(long id, String namespace);
    method setIdField (line 130) | public abstract T setIdField(int id, double featureWeight);
    method setIdField (line 132) | public abstract T setIdField(int id);
    method setIdFieldWithNamespace (line 134) | public abstract T setIdFieldWithNamespace(int id, double featureWeight,
    method setIdFieldWithNamespace (line 137) | public abstract T setIdFieldWithNamespace(int id, String namespace);
    method setIds (line 139) | public abstract T setIds(long[] ids, double featureWeight);
    method setIds (line 141) | public abstract T setIds(long[] ids);
    method setIds (line 143) | public abstract T setIds(int[] ids, double featureWeight);
    method setIds (line 145) | public abstract T setIds(int[] ids);
    method setIds (line 147) | public abstract T setIds(Collection<Integer> ids, double featureWeight);
    method setIds (line 149) | public abstract T setIds(Collection<Integer> ids);
    method setIdsWithNamespace (line 151) | public abstract T setIdsWithNamespace(long[] ids, double featureWeight,
    method setIdsWithNamespace (line 154) | public abstract T setIdsWithNamespace(long[] ids, String namespace);
    method setIdsWithNamespace (line 156) | public abstract T setIdsWithNamespace(int[] ids, double featureWeight,
    method setIdsWithNamespace (line 159) | public abstract T setIdsWithNamespace(int[] ids, String namespace);
    method setIdsWithNamespace (line 161) | public abstract T setIdsWithNamespace(Collection<Long> ids,
    method setIdsWithNamespace (line 164) | public abstract T setIdsWithNamespace(Collection<Long> ids, String nam...

FILE: src/main/java/com/etsy/conjecture/data/Label.java
  class Label (line 3) | public class Label implements java.io.Serializable {
    method Label (line 7) | public Label() {

FILE: src/main/java/com/etsy/conjecture/data/LabeledInstance.java
  type LabeledInstance (line 3) | public interface LabeledInstance<L extends Label> {
    method getLabel (line 4) | public L getLabel();
    method getVector (line 6) | public StringKeyedVector getVector();
    method getWeight (line 8) | public double getWeight();

FILE: src/main/java/com/etsy/conjecture/data/LazyVector.java
  class LazyVector (line 21) | public class LazyVector extends StringKeyedVector implements Serializable,
    type UpdateFunction (line 35) | public static interface UpdateFunction extends Serializable {
      method lazyUpdate (line 36) | public double lazyUpdate(String key, double param, long startIteration,
    method LazyVector (line 40) | public LazyVector() {
    method LazyVector (line 50) | public LazyVector(UpdateFunction uf) {
    method LazyVector (line 54) | public LazyVector(int initialCapacity, UpdateFunction uf) {
    method LazyVector (line 61) | public LazyVector(StringKeyedVector skv, UpdateFunction uf) {
    method LazyVector (line 71) | public LazyVector(ByteArrayDoubleHashMap map, UpdateFunction uf) {
    method LazyVector (line 78) | public LazyVector(Map<String, Double> jmap, UpdateFunction uf) {
    method incrementIteration (line 85) | public void incrementIteration() {
    method delazify (line 89) | public void delazify() {
    method delazifyCoordinate (line 103) | public double delazifyCoordinate(String key) {
    method delazifyCoordinate (line 107) | public double delazifyCoordinate(byte[] key) {
    method skipToIteration (line 129) | public void skipToIteration(long iter) {
    method setCoordinate (line 143) | public double setCoordinate(String key, double value) {
    method deleteCoordinate (line 156) | public double deleteCoordinate(String key) {
    method getMap (line 165) | public Map<String, Double> getMap() {
    method addToCoordinateInternal (line 169) | protected double addToCoordinateInternal(byte[] bkey, double value) {
    method getCoordinate (line 190) | public double getCoordinate(String key) {
    method size (line 198) | public int size() {
    method containsKey (line 206) | public boolean containsKey(String key) {
    method contains (line 214) | public boolean contains(String key) {
    method keySet (line 221) | public Set<String> keySet() {
    method values (line 229) | public Set<Double> values() {
    method transformValues (line 237) | public void transformValues(TDoubleFunction func) {
    method removeZeroCoordinates (line 245) | public void removeZeroCoordinates() {
    method dot (line 259) | public double dot(StringKeyedVector skv) {
    method dotWithSKV (line 267) | protected double dotWithSKV(StringKeyedVector vec) {
    method dotWithLazy (line 279) | protected double dotWithLazy(LazyVector vec) {
    method LPNorm (line 307) | public double LPNorm(double p) {
    method iterator (line 315) | public Iterator<Map.Entry<String, Double>> iterator() {
    method toString (line 320) | public String toString() {
    method writeReplace (line 325) | private Object writeReplace() throws java.io.ObjectStreamException {
    method writeObject (line 331) | private void writeObject(ObjectOutputStream output) throws IOException {
    method readObject (line 338) | private void readObject(ObjectInputStream input) throws IOException,
    method write (line 350) | public void write(Kryo kryo, Output output) {
    method read (line 358) | public void read(Kryo kryo, Input input) {

FILE: src/main/java/com/etsy/conjecture/data/MulticlassLabel.java
  class MulticlassLabel (line 6) | public class MulticlassLabel extends Label {
    method MulticlassLabel (line 12) | public MulticlassLabel() {
    method MulticlassLabel (line 16) | public MulticlassLabel(String label) {
    method getLabel (line 20) | public String getLabel() {
    method setLabel (line 24) | public void setLabel(String label) {
    method toString (line 28) | public String toString() {
    method toBinaryLabel (line 32) | public BinaryLabel toBinaryLabel(String className) {
    method hashCode (line 36) | @Override
    method equals (line 44) | @Override

FILE: src/main/java/com/etsy/conjecture/data/MulticlassLabeledInstance.java
  class MulticlassLabeledInstance (line 5) | public class MulticlassLabeledInstance extends
    method getLabel (line 11) | public MulticlassLabel getLabel() {
    method MulticlassLabeledInstance (line 15) | public MulticlassLabeledInstance(String label) {
    method MulticlassLabeledInstance (line 19) | public MulticlassLabeledInstance(String label, double weight) {
    method MulticlassLabeledInstance (line 23) | public MulticlassLabeledInstance(String label, Map<String, Double> ins...
    method MulticlassLabeledInstance (line 27) | public MulticlassLabeledInstance(String label,
    method MulticlassLabeledInstance (line 32) | public MulticlassLabeledInstance(String label, StringKeyedVector vec) {
    method MulticlassLabeledInstance (line 36) | public MulticlassLabeledInstance(String label, StringKeyedVector vec,
    method MulticlassLabeledInstance (line 41) | public MulticlassLabeledInstance(MulticlassLabel label) {
    method MulticlassLabeledInstance (line 45) | public MulticlassLabeledInstance(MulticlassLabel label, double weight) {
    method MulticlassLabeledInstance (line 50) | public MulticlassLabeledInstance(MulticlassLabel label,
    method MulticlassLabeledInstance (line 55) | public MulticlassLabeledInstance(MulticlassLabel label,
    method MulticlassLabeledInstance (line 61) | public MulticlassLabeledInstance(MulticlassLabel label,
    method MulticlassLabeledInstance (line 66) | public MulticlassLabeledInstance(MulticlassLabel label,
    method toBinaryInstance (line 71) | public BinaryLabeledInstance toBinaryInstance(String category) {

FILE: src/main/java/com/etsy/conjecture/data/MulticlassPrediction.java
  class MulticlassPrediction (line 9) | public class MulticlassPrediction extends MulticlassLabel {
    method MulticlassPrediction (line 18) | public MulticlassPrediction(Map<String, Double> classProbs) {
    method getProb (line 33) | public Double getProb(String category) {
    method getProbOrElse (line 37) | public Double getProbOrElse(String category, Double def) {
    method getMap (line 45) | public Map<String, Double> getMap() {

FILE: src/main/java/com/etsy/conjecture/data/RealValueLabeledInstance.java
  class RealValueLabeledInstance (line 5) | public class RealValueLabeledInstance extends
    method getLabel (line 11) | public RealValuedLabel getLabel() {
    method RealValueLabeledInstance (line 15) | public RealValueLabeledInstance() {
    method RealValueLabeledInstance (line 19) | public RealValueLabeledInstance(RealValuedLabel label) {
    method RealValueLabeledInstance (line 23) | public RealValueLabeledInstance(RealValuedLabel label, double weight) {
    method RealValueLabeledInstance (line 28) | public RealValueLabeledInstance(double label) {
    method RealValueLabeledInstance (line 32) | public RealValueLabeledInstance(double label, double weight) {
    method RealValueLabeledInstance (line 36) | public RealValueLabeledInstance(double label, Map<String, Double> inst...
    method RealValueLabeledInstance (line 40) | public RealValueLabeledInstance(double label, Map<String, Double> inst...
    method RealValueLabeledInstance (line 45) | public RealValueLabeledInstance(double label, StringKeyedVector vec) {
    method RealValueLabeledInstance (line 49) | public RealValueLabeledInstance(double label, StringKeyedVector vec,
    method RealValueLabeledInstance (line 54) | public RealValueLabeledInstance(RealValuedLabel label,
    method RealValueLabeledInstance (line 59) | public RealValueLabeledInstance(RealValuedLabel label,
    method RealValueLabeledInstance (line 65) | public RealValueLabeledInstance(RealValuedLabel label, StringKeyedVect...
    method RealValueLabeledInstance (line 69) | public RealValueLabeledInstance(RealValuedLabel label,

FILE: src/main/java/com/etsy/conjecture/data/RealValuedLabel.java
  class RealValuedLabel (line 3) | public class RealValuedLabel extends Label {
    method RealValuedLabel (line 8) | public RealValuedLabel(double value) {
    method getValue (line 12) | public Double getValue() {
    method toString (line 16) | @Override

FILE: src/main/java/com/etsy/conjecture/data/Recommendation.java
  class Recommendation (line 5) | public class Recommendation implements Serializable {
    method Recommendation (line 12) | public Recommendation(String id, double score) {

FILE: src/main/java/com/etsy/conjecture/data/StringKeyedVector.java
  class StringKeyedVector (line 14) | public class StringKeyedVector implements Serializable,
    method StringKeyedVector (line 32) | public StringKeyedVector() {
    method StringKeyedVector (line 36) | public StringKeyedVector(int initialCapacity) {
    method StringKeyedVector (line 41) | public StringKeyedVector(StringKeyedVector skv) {
    method StringKeyedVector (line 46) | public StringKeyedVector(Map<String, Double> jmap) {
    method getFreezeKeySet (line 56) | public boolean getFreezeKeySet() {
    method setFreezeKeySet (line 64) | public void setFreezeKeySet(boolean freeze) {
    method setCoordinate (line 72) | public double setCoordinate(String key, double value) {
    method deleteCoordinate (line 84) | public double deleteCoordinate(String key) {
    method getMap (line 92) | public Map<String, Double> getMap() {
    method addToCoordinate (line 99) | public double addToCoordinate(String key, double value) {
    method addToCoordinateInternal (line 104) | protected double addToCoordinateInternal(byte[] bkey, double value) {
    method getCoordinate (line 121) | public double getCoordinate(String key) {
    method addScaled (line 128) | public void addScaled(StringKeyedVector vec, double scale) {
    method multiplyPointwise (line 139) | public StringKeyedVector multiplyPointwise(StringKeyedVector vec) {
    method projectOntoNonZeroCoordinates (line 153) | public StringKeyedVector projectOntoNonZeroCoordinates(StringKeyedVect...
    method size (line 169) | public int size() {
    method containsKey (line 176) | public boolean containsKey(String key) {
    method contains (line 183) | public boolean contains(String key) {
    method keySet (line 190) | public Set<String> keySet() {
    method values (line 197) | public Set<Double> values() {
    method add (line 204) | public void add(StringKeyedVector vec) {
    method sub (line 211) | public void sub(StringKeyedVector vec) {
    method mul (line 218) | public void mul(final double a) {
    method transformValues (line 229) | public void transformValues(TDoubleFunction func) {
    method removeZeroCoordinates (line 236) | public void removeZeroCoordinates() {
    method dot (line 252) | public double dot(StringKeyedVector vec) {
    method LPNorm (line 274) | public double LPNorm(double p) {
    method max (line 285) | public double max() {
    method iterator (line 298) | public Iterator<Map.Entry<String, Double>> iterator() {
    method toString (line 302) | public String toString() {
    method copy (line 311) | public StringKeyedVector copy() {

FILE: src/main/java/com/etsy/conjecture/evaluation/BinaryModelEvaluation.java
  class BinaryModelEvaluation (line 15) | public class BinaryModelEvaluation implements ModelEvaluation<BinaryLabel>,
    method BinaryModelEvaluation (line 22) | public BinaryModelEvaluation() {
    method merge (line 27) | public void merge(ModelEvaluation<BinaryLabel> other) {
    method add (line 33) | public void add(BinaryLabel real, BinaryLabel pred) {
    method add (line 37) | public void add(double label, double prediction) {
    method add (line 42) | public void add(PrimitivePair labelPrediction) {
    method computeAUC (line 47) | public double computeAUC() {
    method computeBrier (line 51) | public double computeBrier() {
    method computeAccy (line 55) | public double computeAccy() {
    method computeAccy (line 59) | public double computeAccy(int dim) {
    method computeFmeasure (line 63) | public double computeFmeasure() {
    method computeFmeasure (line 67) | public double computeFmeasure(int dim) {
    method computePrecision (line 71) | public double computePrecision() {
    method computePrecision (line 75) | public double computePrecision(int dim) {
    method computeRecall (line 79) | public double computeRecall() {
    method computeRecall (line 83) | public double computeRecall(int dim) {
    method getStatistics (line 87) | public Map<String, Double> getStatistics() {
    method getObjects (line 109) | public Map<String, Object> getObjects() {

FILE: src/main/java/com/etsy/conjecture/evaluation/ConfusionMatrix.java
  class ConfusionMatrix (line 16) | public class ConfusionMatrix implements Serializable {
    method ConfusionMatrix (line 38) | public ConfusionMatrix(int classes) {
    method add (line 44) | public void add(ConfusionMatrix m) {
    method ConfusionMatrix (line 61) | public ConfusionMatrix(int classes,
    method ConfusionMatrix (line 76) | public ConfusionMatrix(int classes, PrimitivePair[] labelsAndPredictio...
    method ConfusionMatrix (line 90) | public ConfusionMatrix(int classes, double[] labels, double[] predicti...
    method addInfo (line 109) | public void addInfo(int label, int guess) {
    method addInfo (line 122) | public void addInfo(int label, double[] guess) {
    method addInfo (line 137) | public void addInfo(int label, double[] guess, double freq) {
    method addInfo (line 157) | public void addInfo(double label, double prediction) {
    method addInfo (line 174) | public void addInfo(double[] softlabels, double[] guess) {
    method addInfo (line 193) | public void addInfo(double[] softlabels, double[] guess, double freq) {
    method classDistribution (line 207) | public double[] classDistribution() {
    method classDistribution (line 223) | public double classDistribution(int num) {
    method addHard (line 247) | public void addHard(double[] softlabels, double[] guess, double weight) {
    method addHard (line 260) | public void addHard(double[] softlabels, double[] guess) {
    method addHard (line 274) | public void addHard(int label, double[] guess) {
    method addHard (line 288) | public void addHard(int label, double prediction) {
    method addHard (line 306) | public void addHard(int label, double[] guess, double freq) {
    method softToHard (line 319) | private static double[] softToHard(double[] scores) {
    method toString (line 338) | @Override
    method toStringRowNormalized (line 362) | public String toStringRowNormalized() {
    method toStringColNormalized (line 388) | public String toStringColNormalized() {
    method rowSums (line 413) | public double[] rowSums() {
    method computeAccuracy (line 431) | public double computeAccuracy(int classid) {
    method computeAverageFmeasure (line 441) | public double computeAverageFmeasure() {
    method computeAveragePrecision (line 452) | public double computeAveragePrecision() {
    method computeAverageRecall (line 463) | public double computeAverageRecall() {
    method colSums (line 479) | public double[] colSums() {
    method getMatrix (line 494) | public double[][] getMatrix() {
    method getDim (line 507) | public int getDim() {
    method computeAccuracy (line 517) | public double computeAccuracy() {
    method computePrecision (line 538) | public double[] computePrecision() {
    method computeRecall (line 561) | public double[] computeRecall() {
    method computeFmeasure (line 588) | public double[] computeFmeasure() {
    method getIR (line 607) | public String getIR() {
    method computePrecision (line 630) | public double computePrecision(int dim) {
    method computeRecall (line 645) | public double computeRecall(int dim) {
    method computeFmeasure (line 663) | public double computeFmeasure(int dim) {
    method total (line 676) | private double total(double[] arr) {
    method computeAccuracy (line 693) | public static double computeAccuracy(Collection<PrimitivePair> input) {

FILE: src/main/java/com/etsy/conjecture/evaluation/EvaluationAggregator.java
  class EvaluationAggregator (line 14) | public class EvaluationAggregator<L extends Label> implements Serializab...
    method add (line 20) | public void add(ModelEvaluation<L> eval) {
    method getValue (line 48) | public double getValue(String key) {
    method toString (line 52) | @Override
    method format (line 67) | private String format(DescriptiveStatistics stats) {

FILE: src/main/java/com/etsy/conjecture/evaluation/ModelEvaluation.java
  type ModelEvaluation (line 7) | public interface ModelEvaluation<L extends Label> {
    method add (line 8) | public void add(L real, L predicted);
    method getStatistics (line 10) | public Map<String, Double> getStatistics();
    method getObjects (line 12) | public Map<String, Object> getObjects();
    method merge (line 14) | public void merge(ModelEvaluation<L> other);

FILE: src/main/java/com/etsy/conjecture/evaluation/MulticlassConfusionMatrix.java
  class MulticlassConfusionMatrix (line 16) | public class MulticlassConfusionMatrix implements Serializable {
    method MulticlassConfusionMatrix (line 38) | public MulticlassConfusionMatrix(String[] categories) {
    method add (line 44) | public void add(MulticlassConfusionMatrix m) {
    method initializeMatrix (line 57) | private static SortedMap<String, SortedMap<String, Double>> initialize...
    method initializeMatrix (line 67) | private static SortedMap<String, SortedMap<String, Double>> initialize...
    method getValue (line 80) | private Double getValue(String label, String guess) {
    method updateConfusionMatrix (line 84) | private void updateConfusionMatrix(String label, String guess, double ...
    method initializeProbabilityMatrix (line 88) | private Map<String, Double> initializeProbabilityMatrix() {
    method addInfo (line 104) | public void addInfo(String label, String guess) {
    method addInfo (line 109) | public void addInfo(String label, String guess, double freq) {
    method addInfo (line 122) | public void addInfo(String label, Map<String, Double> guesses) {
    method addInfo (line 137) | public void addInfo(String label, Map<String, Double> predictions,
    method addInfo (line 154) | public void addInfo(Map<String, Double> labels,
    method addInfo (line 169) | public void addInfo(Map<String, Double> labels,
    method classDistribution (line 184) | public Map<String, Double> classDistribution() {
    method classDistribution (line 200) | public double classDistribution(String category) {
    method addHard (line 225) | public void addHard(Map<String, Double> softlabels,
    method addHard (line 239) | public void addHard(Map<String, Double> softlabels,
    method addHard (line 254) | public void addHard(String label, Map<String, Double> guess) {
    method addHard (line 271) | public void addHard(String label, Map<String, Double> guess, double fr...
    method softToHard (line 284) | private static String softToHard(Map<String, Double> scores) {
    method printDebug (line 296) | public String printDebug() {
    method toString (line 305) | public String toString() {
    method toStringRowNormalized (line 329) | public String toStringRowNormalized() {
    method toStringColNormalized (line 357) | public String toStringColNormalized() {
    method rowSums (line 384) | public Map<String, Double> rowSums() {
    method computeAccuracy (line 406) | public double computeAccuracy(String classId) {
    method computeAverageFmeasure (line 420) | public double computeAverageFmeasure() {
    method computeAveragePrecision (line 431) | public double computeAveragePrecision() {
    method computeAverageRecall (line 446) | public double computeAverageRecall() {
    method colSums (line 466) | public Map<String, Double> colSums() {
    method getMatrix (line 482) | public SortedMap<String, SortedMap<String, Double>> getMatrix() {
    method getDim (line 499) | public int getDim() {
    method computeAccuracy (line 509) | public double computeAccuracy() {
    method computePrecision (line 531) | public Map<String, Double> computePrecision() {
    method computeRecall (line 545) | public Map<String, Double> computeRecall() {
    method computeFmeasure (line 561) | public Map<String, Double> computeFmeasure() {
    method getIR (line 585) | public String getIR() {
    method computePrecision (line 609) | public double computePrecision(String category) {
    method computeRecall (line 625) | public double computeRecall(String category) {
    method computeFmeasure (line 643) | public double computeFmeasure(String category) {
    method total (line 656) | private double total(Map<String, Double> probs) {

FILE: src/main/java/com/etsy/conjecture/evaluation/MulticlassModelEvaluation.java
  class MulticlassModelEvaluation (line 17) | public class MulticlassModelEvaluation implements Serializable,
    method MulticlassModelEvaluation (line 29) | public MulticlassModelEvaluation(String[] categories) {
    method add (line 35) | public void add(String label, MulticlassPrediction prediction) {
    method merge (line 40) | public void merge(ModelEvaluation<MulticlassLabel> other) {
    method add (line 46) | public void add(GenericPair<String, MulticlassPrediction> labelPredict...
    method add (line 50) | public void add(MulticlassLabel real, MulticlassLabel pred) {
    method computeAUC (line 58) | public double computeAUC() {
    method computeAUC (line 62) | public double computeAUC(String dim) {
    method computeBrier (line 66) | public double computeBrier() {
    method computeAccy (line 70) | public double computeAccy() {
    method computeAccy (line 74) | public double computeAccy(String dim) {
    method computeFmeasure (line 78) | public double computeFmeasure() {
    method computeFmeasure (line 82) | public double computeFmeasure(String dim) {
    method computePrecision (line 86) | public double computePrecision() {
    method computePrecision (line 90) | public double computePrecision(String dim) {
    method computeRecall (line 94) | public double computeRecall() {
    method computeRecall (line 98) | public double computeRecall(String dim) {
    method computePercent (line 102) | public double computePercent(String dim) {
    method printDebug (line 106) | public String printDebug() {
    method getStatistics (line 110) | public Map<String, Double> getStatistics() {
    method toString (line 132) | public String toString() {
    method getObjects (line 141) | public HashMap<String, Object> getObjects() {

FILE: src/main/java/com/etsy/conjecture/evaluation/MulticlassReceiverOperatingCharacteristic.java
  class MulticlassReceiverOperatingCharacteristic (line 12) | public class MulticlassReceiverOperatingCharacteristic implements Serial...
    method MulticlassReceiverOperatingCharacteristic (line 28) | public MulticlassReceiverOperatingCharacteristic(String[] categories) {
    method add (line 37) | public void add(MulticlassReceiverOperatingCharacteristic other) {
    method add (line 54) | public void add(GenericPair<String, MulticlassPrediction> labelPredict...
    method add (line 58) | public void add(String label, MulticlassPrediction prediction) {
    method multiclassAUC (line 79) | public double multiclassAUC() {
    method singleClassAUC (line 91) | public double singleClassAUC(String category) {
    method multiclassBrierScore (line 95) | public double multiclassBrierScore() {
    method computePercent (line 104) | public double computePercent(String category) {
    method computeAUC (line 108) | public static double computeAUC(
    method computeBrierScore (line 118) | public static double computeBrierScore(

FILE: src/main/java/com/etsy/conjecture/evaluation/ReceiverOperatingCharacteristic.java
  class ReceiverOperatingCharacteristic (line 14) | public class ReceiverOperatingCharacteristic implements Serializable {
    class NumComparator (line 16) | private static class NumComparator implements Comparator<Double>,
      method compare (line 20) | @Override
    method ReceiverOperatingCharacteristic (line 44) | public ReceiverOperatingCharacteristic() {
    method add (line 51) | public void add(ReceiverOperatingCharacteristic r) {
    method increment (line 62) | private void increment(Double key, int[] value) {
    method increment (line 73) | private void increment(Double prediction, double label) {
    method add (line 92) | public void add(double label, double prediction) {
    method add (line 97) | public void add(PrimitivePair pair) {
    method ROC (line 106) | public double[][] ROC() {
    method brierScore (line 138) | public double brierScore() {
    method averagedBrierScore (line 163) | public double averagedBrierScore(int bins) {
    method binaryAUC (line 200) | public double binaryAUC() {
    method trapezoidArea (line 225) | private double trapezoidArea(double x1, double x2, double y1, double y...
    method toString (line 236) | @Override
    method computeAUC (line 253) | public static double computeAUC(
    method computeBrierScore (line 268) | public static double computeBrierScore(

FILE: src/main/java/com/etsy/conjecture/evaluation/RegressionModelEvaluation.java
  class RegressionModelEvaluation (line 12) | public class RegressionModelEvaluation implements
    method add (line 18) | public void add(RealValuedLabel real, RealValuedLabel pred) {
    method merge (line 22) | public void merge(ModelEvaluation<RealValuedLabel> other) {
    method add (line 29) | public void add(double label, double prediction) {
    method add (line 36) | public void add(PrimitivePair labelPrediction) {
    method computeMeanSquaredError (line 40) | public double computeMeanSquaredError() {
    method computeMeanAbsoluteError (line 44) | public double computeMeanAbsoluteError() {
    method getStatistics (line 48) | public HashMap<String, Double> getStatistics() {
    method toString (line 55) | @Override
    method getObjects (line 63) | public HashMap<String, Object> getObjects() {

FILE: src/main/java/com/etsy/conjecture/model/AdagradOptimizer.java
  class AdagradOptimizer (line 12) | public class AdagradOptimizer<L extends Label> extends SGDOptimizer<L> {
    method getUpdate (line 17) | @Override
    method updateAndGetFeatureLearningRate (line 35) | public double updateAndGetFeatureLearningRate(String feature, double g...
    method getFeatureLearningRate (line 52) | public double getFeatureLearningRate(String feature) {
    method lazyUpdate (line 64) | @Override
    method adagradL1 (line 80) | public double adagradL1(String feature, double param, long iter) {
    method teardown (line 92) | @Override

FILE: src/main/java/com/etsy/conjecture/model/ClusteringModel.java
  class ClusteringModel (line 22) | public abstract class ClusteringModel<ClusterLabel extends Label> implem...
    method update (line 31) | public void update(LabeledInstance<ClusterLabel> instance) {
    method update (line 35) | public void update(Collection<LabeledInstance<ClusterLabel>> instances) {
    method update (line 41) | public abstract void update(StringKeyedVector instance);
    method predict (line 43) | public abstract ClusterLabel predict(StringKeyedVector instance);
    method ClusteringModel (line 45) | protected ClusteringModel() {
    method ClusteringModel (line 53) | protected ClusteringModel(HashMap<String, StringKeyedVector> param) {
    method setFreezeFeatureSet (line 65) | public void setFreezeFeatureSet(boolean freeze) {
    method reScale (line 71) | public void reScale(double scale) {
    method merge (line 77) | public void merge(ClusteringModel<ClusterLabel> model, double scale) {
    method setNumClusters (line 83) | public ClusteringModel<ClusterLabel> setNumClusters(int k) {
    method setL1ProjectionErrorTolerance (line 89) | public ClusteringModel<ClusterLabel> setL1ProjectionErrorTolerance(dou...
    method setL1ProjectionBallRadius (line 95) | public ClusteringModel<ClusterLabel> setL1ProjectionBallRadius(double ...
    method decompose (line 101) | public Iterator<Map.Entry<String, Double>> decompose() {
    method setParameter (line 105) | public void setParameter(String name, double value){
    method getEpoch (line 109) | public long getEpoch() {
    method setEpoch (line 113) | public void setEpoch(long epoch) {

FILE: src/main/java/com/etsy/conjecture/model/ControlOptimizer.java
  class ControlOptimizer (line 10) | public class ControlOptimizer<L extends Label> extends SGDOptimizer<L> {
    method getUpdate (line 14) | @Override
    method updateAndGetFeatureLearningRate (line 32) | public double updateAndGetFeatureLearningRate(String feature, double g...
    method getFeatureLearningRate (line 48) | public double getFeatureLearningRate(String feature) {
    method teardown (line 52) | @Override

FILE: src/main/java/com/etsy/conjecture/model/Decomposable.java
  type Decomposable (line 9) | public interface Decomposable {
    method decompose (line 14) | public Iterator<Map.Entry<String, Double>> decompose();
    method setParameter (line 19) | public void setParameter(String name, double value);

FILE: src/main/java/com/etsy/conjecture/model/ElasticNetOptimizer.java
  class ElasticNetOptimizer (line 5) | public class ElasticNetOptimizer<L extends Label> extends SGDOptimizer<L...
    method getUpdate (line 7) | @Override

FILE: src/main/java/com/etsy/conjecture/model/FTRLOptimizer.java
  class FTRLOptimizer (line 16) | public class FTRLOptimizer<L extends Label> extends SGDOptimizer<L> {
    method getUpdate (line 23) | @Override
    method getFeatureLearningRate (line 48) | public double getFeatureLearningRate(String feature, double gradient) {
    method FTRLRegularization (line 57) | public void FTRLRegularization(LabeledInstance<L> instance) {
    method getRegularizedWeight (line 72) | public double getRegularizedWeight(String feature) {
    method lazyUpdate (line 91) | @Override
    method setAlpha (line 96) | public FTRLOptimizer<L> setAlpha(double alpha) {
    method setBeta (line 102) | public FTRLOptimizer<L> setBeta(double beta) {
    method teardown (line 108) | @Override

FILE: src/main/java/com/etsy/conjecture/model/Hinge.java
  class Hinge (line 14) | public class Hinge extends UpdateableLinearModel<BinaryLabel> {
    method Hinge (line 19) | public Hinge(SGDOptimizer optimizer) {
    method Hinge (line 23) | public Hinge(StringKeyedVector param, SGDOptimizer optimizer) {
    method predict (line 27) | @Override
    method loss (line 33) | @Override
    method getGradients (line 45) | @Override
    method getModelType (line 59) | @Override
    method setThreshold (line 64) | public Hinge setThreshold(double threshold) {

FILE: src/main/java/com/etsy/conjecture/model/KMeans.java
  class KMeans (line 20) | public class KMeans extends ClusteringModel<ClusterLabel> {
    method KMeans (line 25) | public KMeans(String[] categories) {
    method KMeans (line 34) | public KMeans(Map<String, StringKeyedVector> centers) {
    method predict (line 42) | public ClusterPrediction predict(StringKeyedVector instance) {
    method update (line 50) | public void update(StringKeyedVector instance) {
    method getCurrent (line 67) | public Double getCurrent(StringKeyedVector center, Double theta) {
    method findTheta (line 78) | public Double findTheta(StringKeyedVector center, Double norm) {
    method doProjection (line 95) | public void doProjection(StringKeyedVector center, Double theta) {
    method l1Projection (line 110) | public void l1Projection(StringKeyedVector center) {

FILE: src/main/java/com/etsy/conjecture/model/LeastSquaresRegressionModel.java
  class LeastSquaresRegressionModel (line 7) | public class LeastSquaresRegressionModel extends
    method LeastSquaresRegressionModel (line 12) | public LeastSquaresRegressionModel(SGDOptimizer optimizer) {
    method LeastSquaresRegressionModel (line 16) | public LeastSquaresRegressionModel(StringKeyedVector param, SGDOptimiz...
    method predict (line 20) | @Override
    method loss (line 25) | @Override
    method getGradients (line 32) | @Override
    method getModelType (line 41) | @Override

FILE: src/main/java/com/etsy/conjecture/model/LogisticRegression.java
  class LogisticRegression (line 11) | public class LogisticRegression extends UpdateableLinearModel<BinaryLabe...
    method LogisticRegression (line 15) | public LogisticRegression(SGDOptimizer optimizer) {
    method LogisticRegression (line 19) | public LogisticRegression(StringKeyedVector param, SGDOptimizer optimi...
    method predict (line 23) | @Override
    method loss (line 28) | @Override
    method getGradients (line 35) | @Override
    method getModelType (line 45) | protected String getModelType() {

FILE: src/main/java/com/etsy/conjecture/model/MIRA.java
  class MIRA (line 8) | public class MIRA extends UpdateableLinearModel<BinaryLabel> {
    method MIRA (line 12) | public MIRA() {
    method MIRA (line 16) | public MIRA(StringKeyedVector param, SGDOptimizer optimizer) {
    method loss (line 20) | @Override
    method predict (line 28) | @Override
    method getGradients (line 34) | @Override
    method getModelType (line 50) | @Override

FILE: src/main/java/com/etsy/conjecture/model/MIRAOptimizer.java
  class MIRAOptimizer (line 9) | public class MIRAOptimizer<L extends Label> extends SGDOptimizer<L> {
    method getUpdate (line 11) | @Override

FILE: src/main/java/com/etsy/conjecture/model/Model.java
  type Model (line 7) | public interface Model<L extends Label> extends Serializable {
    method predict (line 9) | public L predict(StringKeyedVector instance);

FILE: src/main/java/com/etsy/conjecture/model/PassiveAggressiveOptimizer.java
  class PassiveAggressiveOptimizer (line 15) | public class PassiveAggressiveOptimizer extends SGDOptimizer<RealValuedL...
    method getUpdate (line 20) | @Override
    method setC (line 38) | public PassiveAggressiveOptimizer setC(double C) {
    method isHinge (line 44) | public PassiveAggressiveOptimizer isHinge(boolean isHinge) {

FILE: src/main/java/com/etsy/conjecture/model/SGDOptimizer.java
  class SGDOptimizer (line 20) | public abstract class SGDOptimizer<L extends Label> implements LazyVecto...
    method SGDOptimizer (line 32) | public SGDOptimizer() {}
    method SGDOptimizer (line 34) | public SGDOptimizer(double g, double l) {
    method getUpdates (line 42) | public StringKeyedVector getUpdates(Collection<LabeledInstance<L>> min...
    method getUpdate (line 60) | public abstract StringKeyedVector getUpdate(LabeledInstance<L> instance);
    method teardown (line 62) | public void teardown() {
    method lazyUpdate (line 74) | public double lazyUpdate(String feature, double param, long start, lon...
    method getDecreasingLearningRate (line 104) | public double getDecreasingLearningRate(long t){
    method setInitialLearningRate (line 117) | public SGDOptimizer<L> setInitialLearningRate(double rate) {
    method setExamplesPerEpoch (line 123) | public SGDOptimizer<L> setExamplesPerEpoch(double examples) {
    method setUseExponentialLearningRate (line 130) | public SGDOptimizer<L> setUseExponentialLearningRate(boolean useExpone...
    method setExponentialLearningRateBase (line 135) | public SGDOptimizer<L> setExponentialLearningRateBase(double base) {
    method setGaussianRegularizationWeight (line 147) | public SGDOptimizer<L> setGaussianRegularizationWeight(double gaussian) {
    method setLaplaceRegularizationWeight (line 155) | public SGDOptimizer<L> setLaplaceRegularizationWeight(double laplace) {

FILE: src/main/java/com/etsy/conjecture/model/UpdateableLinearModel.java
  class UpdateableLinearModel (line 19) | public abstract class UpdateableLinearModel<L extends Label> implements
    method setArgString (line 40) | public void setArgString(String s) {
    method getArgString (line 44) | public String getArgString() {
    method dotWithParam (line 48) | public double dotWithParam(StringKeyedVector x) {
    method UpdateableLinearModel (line 52) | protected UpdateableLinearModel(SGDOptimizer optimizer) {
    method UpdateableLinearModel (line 59) | protected UpdateableLinearModel(StringKeyedVector param, SGDOptimizer ...
    method getGradients (line 70) | public abstract StringKeyedVector getGradients(LabeledInstance<L> inst...
    method update (line 75) | public void update(Collection<LabeledInstance<L>> instances) {
    method update (line 87) | public void update(LabeledInstance<L> instance) {
    method predict (line 98) | public abstract L predict(StringKeyedVector instance);
    method loss (line 100) | public abstract double loss(LabeledInstance<L> instance);
    method getModelType (line 102) | protected abstract String getModelType();
    method decompose (line 104) | public Iterator<Map.Entry<String, Double>> decompose() {
    method setParameter (line 108) | public void setParameter(String name, double value) {
    method getParam (line 112) | public StringKeyedVector getParam() {
    method reScale (line 116) | public void reScale(double scale) {
    method setFreezeFeatureSet (line 120) | public void setFreezeFeatureSet(boolean freeze) {
    method merge (line 124) | public void merge(UpdateableLinearModel<L> model, double scaling) {
    method teardown (line 129) | public void teardown() {
    method truncate (line 136) | public void truncate(LabeledInstance<L> instance) {
    method applyTruncation (line 142) | public void applyTruncation(StringKeyedVector instance) {
    method getEpoch (line 162) | public long getEpoch() {
    method setEpoch (line 166) | public void setEpoch(long e) {
    method setTruncationPeriod (line 170) | public UpdateableLinearModel<L> setTruncationPeriod(int period) {
    method setTruncationThreshold (line 177) | public UpdateableLinearModel<L> setTruncationThreshold(double threshol...
    method setTruncationUpdate (line 184) | public UpdateableLinearModel<L> setTruncationUpdate(double update) {
    method compareTo (line 191) | @Override
    method thresholdParameters (line 196) | public void thresholdParameters(double t) {
    method explainPrediction (line 205) | public String explainPrediction(StringKeyedVector x) {
    method explainPrediction (line 209) | public String explainPrediction(StringKeyedVector x, int n) {

FILE: src/main/java/com/etsy/conjecture/model/UpdateableModel.java
  type UpdateableModel (line 8) | public interface UpdateableModel<L extends Label, M extends UpdateableMo...
    method update (line 11) | public void update(LabeledInstance<L> instance);
    method update (line 14) | public void update(Collection<LabeledInstance<L>> instances);
    method merge (line 17) | public void merge(M model, double weight);
    method reScale (line 20) | public void reScale(double scale);
    method setFreezeFeatureSet (line 23) | public void setFreezeFeatureSet(boolean freeze);
    method setEpoch (line 26) | public void setEpoch(long epoch);
    method getEpoch (line 28) | public long getEpoch();

FILE: src/main/java/com/etsy/conjecture/model/UpdateableMulticlassLinearModel.java
  class UpdateableMulticlassLinearModel (line 24) | public class UpdateableMulticlassLinearModel implements
    method UpdateableMulticlassLinearModel (line 37) | public UpdateableMulticlassLinearModel(Map<String, UpdateableLinearMod...
    method setArgString (line 43) | public void setArgString(String s) {
    method getArgString (line 47) | public String getArgString() {
    method setModelType (line 51) | public void setModelType(String modelType) {
    method getModelType (line 55) | public String getModelType() {
    method decompose (line 59) | public Iterator<Map.Entry<String, Double>> decompose() {
    method setParameter (line 63) | public void setParameter(String name, double value) {
    method reScale (line 67) | public void reScale(double scale) {
    method setFreezeFeatureSet (line 73) | public void setFreezeFeatureSet(boolean freeze) {
    method update (line 82) | public void update(Collection<LabeledInstance<MulticlassLabel>> instan...
    method update (line 91) | public void update(LabeledInstance<MulticlassLabel> instance) {
    method predict (line 102) | @Override
    method merge (line 120) | public void merge(UpdateableMulticlassLinearModel model, double scale) {
    method teardown (line 127) | public void teardown() {
    method getEpoch (line 133) | public long getEpoch() {
    method setEpoch (line 137) | public void setEpoch(long e) {
    method compareTo (line 142) | @Override
    method thresholdParameters (line 147) | public void thresholdParameters(double t) {
    method explainPrediction (line 158) | public String explainPrediction(StringKeyedVector x) {
    method explainPrediction (line 162) | public String explainPrediction(StringKeyedVector x, int n) {

FILE: src/main/java/com/etsy/conjecture/topics/lda/LDADenseTopics.java
  class LDADenseTopics (line 6) | public class LDADenseTopics implements LDATopics, Serializable {
    method LDADenseTopics (line 15) | public LDADenseTopics(double[][] topic_prob) {
    method setTopicProb (line 21) | public void setTopicProb(int topic, double[] prob) {
    method setDict (line 25) | public void setDict(LDADict dict_) throws Exception {
    method getDict (line 34) | public LDADict getDict() {
    method wordProb (line 38) | public double wordProb(int word, int topic) {
    method numTopics (line 42) | public int numTopics() {
    method dictSize (line 46) | public int dictSize() {
    method toString (line 50) | public String toString() {

FILE: src/main/java/com/etsy/conjecture/topics/lda/LDADict.java
  class LDADict (line 8) | public class LDADict implements Serializable {
    method LDADict (line 14) | public LDADict(Set<String> unique_words) {
    method word (line 23) | public String word(int index) {
    method index (line 27) | public int index(String word) {
    method size (line 31) | public int size() {
    method contains (line 35) | public boolean contains(String word) {
    method toString (line 39) | public String toString() {

FILE: src/main/java/com/etsy/conjecture/topics/lda/LDADoc.java
  class LDADoc (line 11) | public class LDADoc implements Serializable {
    method LDADoc (line 21) | public LDADoc(Map<String, Double> word_counts, LDADict dict) {
    method topicProportions (line 40) | public double[] topicProportions() {
    method wordCount (line 44) | public double wordCount() {
    method updateTopicProportions (line 48) | public void updateTopicProportions(LDATopics topics, double alpha) {
    method toPartialTopics (line 99) | public LDAPartialTopics toPartialTopics() throws Exception {
    method toPartialTopic (line 107) | public LDAPartialTopics toPartialTopic(int topic) throws Exception {
    method toPartialSparseTopics (line 119) | public LDAPartialSparseTopics toPartialSparseTopics(int n) throws Exce...

FILE: src/main/java/com/etsy/conjecture/topics/lda/LDAPartialSparseTopics.java
  class LDAPartialSparseTopics (line 7) | public class LDAPartialSparseTopics implements Serializable {
    method LDAPartialSparseTopics (line 13) | public LDAPartialSparseTopics(int K, Map<Integer, Double> phi) {
    method merge (line 18) | public LDAPartialSparseTopics merge(LDAPartialSparseTopics rhs)
    method toTopics (line 37) | public LDASparseTopics toTopics() {

FILE: src/main/java/com/etsy/conjecture/topics/lda/LDAPartialTopics.java
  class LDAPartialTopics (line 5) | public class LDAPartialTopics implements Serializable {
    method LDAPartialTopics (line 11) | public LDAPartialTopics(int[] word_index, double[][] phi) {
    method countUniqueWords (line 16) | private int countUniqueWords(LDAPartialTopics rhs) {
    method merge (line 42) | public LDAPartialTopics merge(LDAPartialTopics rhs) throws Exception {
    method toString (line 100) | public String toString() {
    method toTopicVectors (line 112) | public double[][] toTopicVectors() {
    method toTopicVector (line 131) | public double[] toTopicVector() throws Exception {
    method toTopics (line 140) | public LDADenseTopics toTopics() {
    method main (line 144) | public static void main(String[] argv) throws Exception {

FILE: src/main/java/com/etsy/conjecture/topics/lda/LDARandomTopics.java
  class LDARandomTopics (line 6) | public class LDARandomTopics implements LDATopics, Serializable {
    method LDARandomTopics (line 14) | public LDARandomTopics(LDADict dict, int num_topics) {
    method wordProb (line 20) | public double wordProb(int word, int topic) {
    method numTopics (line 31) | public int numTopics() {
    method dictSize (line 35) | public int dictSize() {
    method getDict (line 39) | public LDADict getDict() {
    method setDict (line 43) | public void setDict(LDADict d) {

FILE: src/main/java/com/etsy/conjecture/topics/lda/LDASparseTopics.java
  class LDASparseTopics (line 6) | public class LDASparseTopics implements LDATopics, Serializable {
    method LDASparseTopics (line 13) | public LDASparseTopics(int K, Map<Integer, Double> prob) {
    method setDict (line 18) | public void setDict(LDADict dict_) {
    method getDict (line 22) | public LDADict getDict() {
    method wordProb (line 26) | public double wordProb(int word, int topic) {
    method numTopics (line 35) | public int numTopics() {
    method dictSize (line 39) | public int dictSize() {

FILE: src/main/java/com/etsy/conjecture/topics/lda/LDATopics.java
  type LDATopics (line 5) | public interface LDATopics extends Serializable {
    method setDict (line 7) | public void setDict(LDADict dict) throws Exception;
    method getDict (line 9) | public LDADict getDict();
    method wordProb (line 11) | public double wordProb(int word, int topic);
    method numTopics (line 13) | public int numTopics();
    method dictSize (line 15) | public int dictSize();

FILE: src/main/java/com/etsy/conjecture/topics/lda/LDAUtils.java
  class LDAUtils (line 5) | public class LDAUtils implements Serializable {
    method digamma (line 9) | public static double digamma(double x) {
    method logSumExp (line 26) | public static double logSumExp(double a, double b) {

FILE: src/test/java/com/etsy/conjecture/data/LazyVectorTest.java
  class LazyVectorTest (line 20) | public class LazyVectorTest {
    method lazyUpdate (line 29) | public double lazyUpdate(String k, double p, long a, long b) {
    method buildLV (line 35) | public LazyVector buildLV() {
    method testCoordinates (line 48) | @Test
    method testCoordinatesLazy (line 62) | @Test
    method testAddScaledToSKV (line 82) | @Test
    method testAddScaledToLV (line 100) | @Test
    method testAddScaledToSelf (line 122) | @Test
    method testAddScaledSKVToLV (line 136) | @Test
    method testDotProduct (line 157) | @Test
    method testFreezing (line 171) | @Test
    method testJavaSerialization (line 187) | @Test
    method testKryoSerialization (line 213) | @Test
    method testGson (line 240) | @Test

FILE: src/test/java/com/etsy/conjecture/data/StringKeyedVectorTest.java
  class StringKeyedVectorTest (line 20) | public class StringKeyedVectorTest {
    method buildSKV (line 25) | public StringKeyedVector buildSKV() {
    method testCoordinates (line 38) | @Test
    method testAddScaled (line 52) | @Test
    method testDotProduct (line 68) | @Test
    method testFreezing (line 81) | @Test
    method testJavaSerialization (line 97) | @Test
    method testKryoSerialization (line 121) | @Test
    method testGson (line 147) | @Test

FILE: src/test/java/com/etsy/conjecture/evaluation/TestReceiverOperatingCharacteristic.java
  class TestReceiverOperatingCharacteristic (line 7) | public class TestReceiverOperatingCharacteristic {
    method testAUC (line 18) | @Test

FILE: src/test/java/com/etsy/conjecture/model/UpdateableLinearModelTest.java
  class UpdateableLinearModelTest (line 11) | public class UpdateableLinearModelTest {
    method getPositiveInstance (line 16) | BinaryLabeledInstance getPositiveInstance() {
    method getNegativeInstance (line 23) | BinaryLabeledInstance getNegativeInstance() {
    method testLogisticRegressionBasic (line 30) | @Test
    method testLogisticRegressionLaplaceRegularization (line 45) | @Test
    method testLogisticRegressionGaussianRegularization (line 66) | @Test
    method testPerceptronBasic (line 81) | @Test
    method testInstanceNotModified (line 96) | public void testInstanceNotModified(UpdateableLinearModel model) {
    method testInstanceNotModifiedByOptimizer (line 104) | @Test
    method testInstanceNotModifiedByModel (line 122) | @Test
Condensed preview — 117 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (491K chars).
[
  {
    "path": ".gitignore",
    "chars": 240,
    "preview": "*.class\n*.log\n*.swp\n*.swo\n\n# sbt specific\ndist/*\ntarget/\nlib_managed/\nsrc_managed/\nproject/boot/\nproject/plugins/project"
  },
  {
    "path": ".travis.yml",
    "chars": 51,
    "preview": "sudo: false\nlanguage: scala\nscript:\n    - sbt +test"
  },
  {
    "path": "LICENSE.md",
    "chars": 1091,
    "preview": "The MIT License\n===============\n\nCopyright (c) 2009 Anton Grigoryev\n\nPermission is hereby granted, free of charge, to an"
  },
  {
    "path": "README.md",
    "chars": 5277,
    "preview": "# Conjecture [![Build Status](https://travis-ci.org/etsy/Conjecture.svg?branch=master)](https://travis-ci.org/etsy/Conje"
  },
  {
    "path": "bin/demo.sh",
    "chars": 575,
    "preview": "#!/bin/bash\n\n# - make monolithic conjecture jar.\nsbt clean assembly\n# - make the instances.\njava -cp target/conjecture-a"
  },
  {
    "path": "bin/model_diff.py",
    "chars": 612,
    "preview": "import json\nimport sys\nimport math\n\nif __name__ == '__main__':\n  if len(sys.argv) != 3:\n    sys.exit(\"Usage: python \" + "
  },
  {
    "path": "bin/model_param.py",
    "chars": 315,
    "preview": "import json\nimport sys\nimport math\n\nif __name__ == '__main__':\n  if len(sys.argv) != 2:\n    sys.exit(\"Usage: python \" + "
  },
  {
    "path": "bin/prediction_inspection.py",
    "chars": 2594,
    "preview": "import json\nimport sys\nfrom optparse import OptionParser\nfrom math import floor\n\ncolors = [\"FF0000\", \"FF1000\", \"FF2000\","
  },
  {
    "path": "build.sbt",
    "chars": 4091,
    "preview": "import sbt._\n\nname := \"conjecture\"\n\nversion := \"0.3.1-SNAPSHOT\"\n\norganization := \"com.etsy\"\n\nscalaVersion := \"2.11.11\"\nc"
  },
  {
    "path": "clients/phplib/Conjecture/BinaryClassifier.php",
    "chars": 943,
    "preview": "<?php\n\nclass Conjecture_BinaryClassifier {\n    private $param = null;\n\n    function __construct($param_vec) {\n        $t"
  },
  {
    "path": "clients/phplib/Conjecture/Config.php",
    "chars": 170,
    "preview": "<?php\n\ninterface Conjecture_Config {\n\n    public function useDummyConjectureModel();\n    public function getConjectureMo"
  },
  {
    "path": "clients/phplib/Conjecture/ConjectureException.php",
    "chars": 63,
    "preview": "<?php\n\nclass Conjecture_ConjectureException extends Exception{}"
  },
  {
    "path": "clients/phplib/Conjecture/Finder.php",
    "chars": 3817,
    "preview": "<?php\n\nclass Conjecture_Finder {\n\n    private $config = null;\n\n    public function __construct(Conjecture_Config $config"
  },
  {
    "path": "clients/phplib/Conjecture/Instance.php",
    "chars": 2610,
    "preview": "<?php\n\n  /**\n   * container class representing instances that are considered\n   * as input to predictive models in Conje"
  },
  {
    "path": "clients/phplib/Conjecture/MulticlassClassifier.php",
    "chars": 1520,
    "preview": "<?php\n\n\nclass Conjecture_MulticlassClassifier {\n\n    private $param = null;\n\n    /**\n     * each param is a Conjecture_V"
  },
  {
    "path": "clients/phplib/Conjecture/MulticlassLogisticRegressionClassifier.php",
    "chars": 595,
    "preview": "<?php\n\n\nclass Conjecture_MulticlassLogisticRegressionClassifier extends Conjecture_MulticlassClassifier {\n\n    private $"
  },
  {
    "path": "clients/phplib/Conjecture/MulticlassOneVsAllClassifier.php",
    "chars": 1251,
    "preview": "<?php\n\n\nclass Conjecture_MulticlassOneVsAllClassifier {\n\n    private $param = null;\n\n    /**\n     * $param is an array t"
  },
  {
    "path": "clients/phplib/Conjecture/Text.php",
    "chars": 4006,
    "preview": "<?php\n\n// This is bascially an exact replica of com.etsy.conjecture.text.Text\nclass Conjecture_Text {\n\n    private $inpu"
  },
  {
    "path": "clients/phplib/Conjecture/TextSequence.php",
    "chars": 21201,
    "preview": "<?php\n\n// This is bascially an exact replica of com.etsy.conjecture.text.TextSequence\nclass Conjecture_TextSequence {\n\n "
  },
  {
    "path": "clients/phplib/Conjecture/Vector.php",
    "chars": 673,
    "preview": "<?php \n\nclass Conjecture_Vector {\n\n    protected $vector = null;\n\n    function __construct($array = array()) {\n        $"
  },
  {
    "path": "data/iris.tsv",
    "chars": 4550,
    "preview": "7.0\t3.2\t4.7\t1.4\tIris-versicolor\n5.6\t3.0\t4.1\t1.3\tIris-versicolor\n5.4\t3.4\t1.7\t0.2\tIris-setosa\n5.0\t3.0\t1.6\t0.2\tIris-setosa\n"
  },
  {
    "path": "project/build.properties",
    "chars": 19,
    "preview": "sbt.version=0.13.9\n"
  },
  {
    "path": "project/plugins.sbt",
    "chars": 230,
    "preview": "addSbtPlugin(\"com.eed3si9n\" % \"sbt-assembly\" % \"0.13.0\")\n\naddSbtPlugin(\"no.arktekk.sbt\" % \"aether-deploy\" % \"0.14\")\n\nadd"
  },
  {
    "path": "sbt",
    "chars": 19258,
    "preview": "#!/usr/bin/env bash\n#\n# A more capable sbt runner, coincidentally also called sbt.\n# Author: Paul Phillips <paulp@improv"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/GenericPair.java",
    "chars": 1657,
    "preview": "package com.etsy.conjecture;\n\n/**\n * @author Diane Hu\n */\npublic class GenericPair<F, S> implements java.io.Serializable"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/PrimitivePair.java",
    "chars": 1740,
    "preview": "package com.etsy.conjecture;\n\n/**\n * PrimitivePair is JavaBean\n * \n * @author Josh Attenberg\n */\npublic class PrimitiveP"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/Utilities.java",
    "chars": 6706,
    "preview": "package com.etsy.conjecture;\n\nimport java.util.ArrayList;\nimport java.util.Arrays;\nimport java.util.Collection;\nimport j"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/data/AbstractInstance.java",
    "chars": 25530,
    "preview": "package com.etsy.conjecture.data;\n\nimport java.util.Collection;\nimport java.util.List;\nimport java.util.Map;\n\npublic abs"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/data/BinaryLabel.java",
    "chars": 658,
    "preview": "package com.etsy.conjecture.data;\n\nimport static com.google.common.base.Preconditions.checkArgument;\n\npublic class Binar"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/data/BinaryLabeledInstance.java",
    "chars": 2161,
    "preview": "package com.etsy.conjecture.data;\n\nimport java.util.Map;\n\n/**\n * TODO: when using method string all methods return a Rea"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/data/ByteArrayDoubleHashMap.java",
    "chars": 9997,
    "preview": "package com.etsy.conjecture.data;\n\nimport gnu.trove.function.TDoubleFunction;\nimport gnu.trove.iterator.TObjectDoubleIte"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/data/ClusterLabel.java",
    "chars": 1163,
    "preview": "package com.etsy.conjecture.data;\n\npublic class ClusterLabel extends Label{\n\n    private static final long serialVersion"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/data/ClusterPrediction.java",
    "chars": 954,
    "preview": "package com.etsy.conjecture.data;\n\nimport java.util.Map;\nimport com.google.common.collect.Maps;\n\n/**\n * Representing a p"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/data/Instance.java",
    "chars": 300,
    "preview": "package com.etsy.conjecture.data;\n\n\n//TODO: reset methods for string adders\n//TODO: for instance, vector subtraction?\npu"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/data/InstanceFactory.java",
    "chars": 1093,
    "preview": "package com.etsy.conjecture.data;\n\npublic class InstanceFactory {\n\n    private InstanceFactory() {\n    };\n\n    public st"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/data/InstanceInterface.java",
    "chars": 5388,
    "preview": "package com.etsy.conjecture.data;\n\nimport java.util.Collection;\nimport java.util.List;\nimport java.util.Map;\n\npublic int"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/data/Label.java",
    "chars": 173,
    "preview": "package com.etsy.conjecture.data;\n\npublic class Label implements java.io.Serializable {\n\n    private static final long s"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/data/LabeledInstance.java",
    "chars": 189,
    "preview": "package com.etsy.conjecture.data;\n\npublic interface LabeledInstance<L extends Label> {\n    public L getLabel();\n\n    pub"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/data/LazyVector.java",
    "chars": 11522,
    "preview": "package com.etsy.conjecture.data;\n\nimport gnu.trove.function.TDoubleFunction;\nimport gnu.trove.iterator.TObjectDoubleIte"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/data/MulticlassLabel.java",
    "chars": 1390,
    "preview": "package com.etsy.conjecture.data;\n\n/**\n * representing a 100% probability of membership in a particular class\n */\npublic"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/data/MulticlassLabeledInstance.java",
    "chars": 2326,
    "preview": "package com.etsy.conjecture.data;\n\nimport java.util.Map;\n\npublic class MulticlassLabeledInstance extends\n        Abstrac"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/data/MulticlassPrediction.java",
    "chars": 1259,
    "preview": "package com.etsy.conjecture.data;\n\nimport java.util.Map;\nimport com.google.common.collect.Maps;\n\n/**\n * representing a p"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/data/RealValueLabeledInstance.java",
    "chars": 2122,
    "preview": "package com.etsy.conjecture.data;\n\nimport java.util.Map;\n\npublic class RealValueLabeledInstance extends\n        Abstract"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/data/RealValuedLabel.java",
    "chars": 405,
    "preview": "package com.etsy.conjecture.data;\n\npublic class RealValuedLabel extends Label {\n\n    protected final Double value;\n    p"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/data/Recommendation.java",
    "chars": 345,
    "preview": "package com.etsy.conjecture.data;\n\nimport java.io.Serializable;\n\npublic class Recommendation implements Serializable {\n\n"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/data/StringKeyedVector.java",
    "chars": 8938,
    "preview": "package com.etsy.conjecture.data;\n\nimport gnu.trove.function.TDoubleFunction;\nimport gnu.trove.iterator.TObjectDoubleIte"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/evaluation/BinaryModelEvaluation.java",
    "chars": 3199,
    "preview": "package com.etsy.conjecture.evaluation;\n\nimport java.io.Serializable;\nimport java.util.HashMap;\nimport java.util.Map;\nim"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/evaluation/ConfusionMatrix.java",
    "chars": 22418,
    "preview": "package com.etsy.conjecture.evaluation;\n\nimport java.io.Serializable;\nimport java.util.Collection;\n\nimport com.etsy.conj"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/evaluation/EvaluationAggregator.java",
    "chars": 2760,
    "preview": "package com.etsy.conjecture.evaluation;\n\nimport java.io.Serializable;\nimport java.util.ArrayList;\nimport java.util.HashM"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/evaluation/ModelEvaluation.java",
    "chars": 345,
    "preview": "package com.etsy.conjecture.evaluation;\n\nimport com.etsy.conjecture.data.Label;\n\nimport java.util.Map;\n\npublic interface"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/evaluation/MulticlassConfusionMatrix.java",
    "chars": 22734,
    "preview": "package com.etsy.conjecture.evaluation;\n\nimport java.io.Serializable;\nimport java.util.Map;\nimport java.util.Set;\nimport"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/evaluation/MulticlassModelEvaluation.java",
    "chars": 4425,
    "preview": "package com.etsy.conjecture.evaluation;\n\nimport java.io.Serializable;\nimport java.util.HashMap;\nimport java.util.Map;\nim"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/evaluation/MulticlassReceiverOperatingCharacteristic.java",
    "chars": 4799,
    "preview": "package com.etsy.conjecture.evaluation;\n\nimport java.io.Serializable;\nimport java.util.Collection;\nimport java.util.Map;"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/evaluation/ReceiverOperatingCharacteristic.java",
    "chars": 7289,
    "preview": "package com.etsy.conjecture.evaluation;\n\nimport java.io.Serializable;\nimport java.util.ArrayList;\nimport java.util.Colle"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/evaluation/RegressionModelEvaluation.java",
    "chars": 2056,
    "preview": "package com.etsy.conjecture.evaluation;\n\nimport java.io.Serializable;\nimport java.util.HashMap;\n\nimport com.etsy.conject"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/model/AdagradOptimizer.java",
    "chars": 3564,
    "preview": "package com.etsy.conjecture.model;\n\nimport com.etsy.conjecture.*;\nimport com.etsy.conjecture.data.*;\n\nimport java.util.*"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/model/ClusteringModel.java",
    "chars": 3657,
    "preview": "package com.etsy.conjecture.model;\n\nimport com.etsy.conjecture.data.LabeledInstance;\nimport com.etsy.conjecture.data.Clu"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/model/ControlOptimizer.java",
    "chars": 1928,
    "preview": "package com.etsy.conjecture.model;\n\nimport com.etsy.conjecture.data.*;\n\nimport java.util.*;\n\n/**\n *  Current search ads "
  },
  {
    "path": "src/main/java/com/etsy/conjecture/model/Decomposable.java",
    "chars": 465,
    "preview": "package com.etsy.conjecture.model;\n\nimport java.util.Iterator;\nimport java.util.Map;\n\n/**\n * Type of model to be used wi"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/model/ElasticNetOptimizer.java",
    "chars": 478,
    "preview": "package com.etsy.conjecture.model;\n\nimport com.etsy.conjecture.data.*;\n\npublic class ElasticNetOptimizer<L extends Label"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/model/FTRLOptimizer.java",
    "chars": 4143,
    "preview": "package com.etsy.conjecture.model;\n\nimport com.etsy.conjecture.data.LazyVector;\nimport com.etsy.conjecture.data.StringKe"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/model/Hinge.java",
    "chars": 2000,
    "preview": "package com.etsy.conjecture.model;\n\nimport com.etsy.conjecture.Utilities;\nimport com.etsy.conjecture.data.BinaryLabel;\ni"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/model/KMeans.java",
    "chars": 3825,
    "preview": "package com.etsy.conjecture.model;\n\nimport com.etsy.conjecture.Utilities;\nimport com.etsy.conjecture.data.ClusterLabel;\n"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/model/LeastSquaresRegressionModel.java",
    "chars": 1452,
    "preview": "package com.etsy.conjecture.model;\n\nimport com.etsy.conjecture.data.LabeledInstance;\nimport com.etsy.conjecture.data.Rea"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/model/LogisticRegression.java",
    "chars": 1558,
    "preview": "package com.etsy.conjecture.model;\n\nimport com.etsy.conjecture.Utilities;\nimport com.etsy.conjecture.data.BinaryLabel;\ni"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/model/MIRA.java",
    "chars": 1685,
    "preview": "package com.etsy.conjecture.model;\n\nimport com.etsy.conjecture.Utilities;\nimport com.etsy.conjecture.data.BinaryLabel;\ni"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/model/MIRAOptimizer.java",
    "chars": 392,
    "preview": "package com.etsy.conjecture.model;\n\nimport com.etsy.conjecture.data.*;\n\n/**\n *  MIRA takes care of the full update. This"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/model/Model.java",
    "chars": 273,
    "preview": "package com.etsy.conjecture.model;\n\nimport java.io.Serializable;\nimport com.etsy.conjecture.data.Label;\nimport com.etsy."
  },
  {
    "path": "src/main/java/com/etsy/conjecture/model/PassiveAggressiveOptimizer.java",
    "chars": 1695,
    "preview": "package com.etsy.conjecture.model;\n\nimport com.etsy.conjecture.data.LazyVector;\nimport com.etsy.conjecture.data.StringKe"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/model/SGDOptimizer.java",
    "chars": 5829,
    "preview": "package com.etsy.conjecture.model;\n\nimport com.etsy.conjecture.data.LazyVector;\nimport com.etsy.conjecture.Utilities;\nim"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/model/UpdateableLinearModel.java",
    "chars": 7096,
    "preview": "package com.etsy.conjecture.model;\n\nimport static com.google.common.base.Preconditions.checkArgument;\nimport gnu.trove.f"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/model/UpdateableModel.java",
    "chars": 926,
    "preview": "package com.etsy.conjecture.model;\n\nimport java.util.Collection;\n\nimport com.etsy.conjecture.data.Label;\nimport com.etsy"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/model/UpdateableMulticlassLinearModel.java",
    "chars": 5275,
    "preview": "package com.etsy.conjecture.model;\n\nimport static com.google.common.base.Preconditions.checkArgument;\nimport gnu.trove.f"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/topics/lda/LDADenseTopics.java",
    "chars": 1833,
    "preview": "package com.etsy.conjecture.topics.lda;\n\nimport java.io.Serializable;\nimport java.util.Random;\n\npublic class LDADenseTop"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/topics/lda/LDADict.java",
    "chars": 1008,
    "preview": "package com.etsy.conjecture.topics.lda;\n\nimport java.io.Serializable;\nimport java.util.ArrayList;\nimport java.util.HashM"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/topics/lda/LDADoc.java",
    "chars": 5398,
    "preview": "package com.etsy.conjecture.topics.lda;\n\nimport com.etsy.conjecture.Utilities;\n\nimport java.io.Serializable;\nimport java"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/topics/lda/LDAPartialSparseTopics.java",
    "chars": 1606,
    "preview": "package com.etsy.conjecture.topics.lda;\n\nimport java.io.Serializable;\nimport java.util.Map;\nimport java.util.Set;\n\npubli"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/topics/lda/LDAPartialTopics.java",
    "chars": 5809,
    "preview": "package com.etsy.conjecture.topics.lda;\n\nimport java.io.Serializable;\n\npublic class LDAPartialTopics implements Serializ"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/topics/lda/LDARandomTopics.java",
    "chars": 1226,
    "preview": "package com.etsy.conjecture.topics.lda;\n\nimport java.io.Serializable;\nimport java.util.Random;\n\npublic class LDARandomTo"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/topics/lda/LDASparseTopics.java",
    "chars": 878,
    "preview": "package com.etsy.conjecture.topics.lda;\n\nimport java.io.Serializable;\nimport java.util.Map;\n\npublic class LDASparseTopic"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/topics/lda/LDATopics.java",
    "chars": 319,
    "preview": "package com.etsy.conjecture.topics.lda;\n\nimport java.io.Serializable;\n\npublic interface LDATopics extends Serializable {"
  },
  {
    "path": "src/main/java/com/etsy/conjecture/topics/lda/LDAUtils.java",
    "chars": 1071,
    "preview": "package com.etsy.conjecture.topics.lda;\n\nimport java.io.Serializable;\n\npublic class LDAUtils implements Serializable {\n\n"
  },
  {
    "path": "src/main/scala/com/etsy/conjecture/VWReader.scala",
    "chars": 3155,
    "preview": "package com.etsy.conjecture\n\nimport cascading.pipe.Pipe\nimport cascading.flow.FlowDef\nimport com.twitter.scalding._\nimpo"
  },
  {
    "path": "src/main/scala/com/etsy/conjecture/demo/DemoLinearHyperparameterSearch.scala",
    "chars": 2363,
    "preview": "package com.etsy.scalding.jobs.conjecture\n\nimport scala.util.Random\nimport com.etsy.conjecture.scalding.util._\nimport co"
  },
  {
    "path": "src/main/scala/com/etsy/conjecture/demo/IrisDataToMulticlassLabeledInstances.scala",
    "chars": 988,
    "preview": "package com.etsy.conjecture.demo\n\nimport com.twitter.scalding._\nimport com.etsy.conjecture.data._\n\nclass IrisDataToMulti"
  },
  {
    "path": "src/main/scala/com/etsy/conjecture/demo/LearnMulticlassClassifier.scala",
    "chars": 1762,
    "preview": "package com.etsy.conjecture.demo\n\nimport com.twitter.scalding._\nimport com.etsy.conjecture.scalding.evaluate.{ Multiclas"
  },
  {
    "path": "src/main/scala/com/etsy/conjecture/scalding/ALSJob.scala",
    "chars": 6835,
    "preview": "package com.etsy.conjecture.scalding\n\nimport cascading.pipe.Pipe\nimport cascading.pipe.joiner.InnerJoin\nimport com.twitt"
  },
  {
    "path": "src/main/scala/com/etsy/conjecture/scalding/FastKNN.scala",
    "chars": 9216,
    "preview": "package com.etsy.conjecture.scalding\n\nimport collection.mutable.PriorityQueue\nimport com.twitter.scalding._\nimport casca"
  },
  {
    "path": "src/main/scala/com/etsy/conjecture/scalding/LSH.scala",
    "chars": 5356,
    "preview": "package com.etsy.conjecture.scalding\n\nimport collection.mutable.PriorityQueue\n\nimport cascading.pipe.Pipe\nimport cascadi"
  },
  {
    "path": "src/main/scala/com/etsy/conjecture/scalding/NNMF.scala",
    "chars": 10437,
    "preview": "package com.etsy.conjecture.scalding\n\nimport org.apache.commons.math3.linear._\n\nimport com.etsy.scalding._\nimport com.tw"
  },
  {
    "path": "src/main/scala/com/etsy/conjecture/scalding/SVD.scala",
    "chars": 6057,
    "preview": "package com.etsy.conjecture.scalding\n\nimport org.apache.commons.math3.linear._\nimport cascading.pipe.Pipe\nimport cascadi"
  },
  {
    "path": "src/main/scala/com/etsy/conjecture/scalding/evaluate/GenericCrossValidator.scala",
    "chars": 3621,
    "preview": "package com.etsy.conjecture.scalding.evaluate\n\nimport com.twitter.scalding._\n\nimport cascading.pipe.Pipe\nimport cascadin"
  },
  {
    "path": "src/main/scala/com/etsy/conjecture/scalding/evaluate/GenericEvaluator.scala",
    "chars": 2266,
    "preview": "package com.etsy.conjecture.scalding.evaluate\n\nimport com.twitter.scalding._\nimport com.etsy.conjecture._\nimport com.ets"
  },
  {
    "path": "src/main/scala/com/etsy/conjecture/scalding/factorize/FactorizationTools.scala",
    "chars": 2448,
    "preview": "package com.etsy.conjecture.scalding.factorize\n\nimport cascading.pipe.Pipe\nimport org.apache.commons.math3.linear._\nimpo"
  },
  {
    "path": "src/main/scala/com/etsy/conjecture/scalding/train/AbstractModelTrainer.scala",
    "chars": 442,
    "preview": "package com.etsy.conjecture.scalding.train\n\nimport cascading.pipe.Pipe\n\nimport com.etsy.conjecture.data._\nimport com.ets"
  },
  {
    "path": "src/main/scala/com/etsy/conjecture/scalding/train/BinaryModelTrainer.scala",
    "chars": 6111,
    "preview": "package com.etsy.conjecture.scalding.train\n\nimport cascading.pipe.Pipe\nimport com.twitter.scalding._\nimport com.etsy.con"
  },
  {
    "path": "src/main/scala/com/etsy/conjecture/scalding/train/ClusteringModelTrainer.scala",
    "chars": 1810,
    "preview": "package com.etsy.conjecture.scalding.train\n\nimport cascading.pipe.Pipe\nimport com.twitter.scalding._\nimport com.etsy.con"
  },
  {
    "path": "src/main/scala/com/etsy/conjecture/scalding/train/LargeModelTrainer.scala",
    "chars": 5961,
    "preview": "package com.etsy.conjecture.scalding.train\n\nimport cascading.flow._\nimport cascading.operation._\nimport cascading.pipe._"
  },
  {
    "path": "src/main/scala/com/etsy/conjecture/scalding/train/ModelTrainerStrategy.scala",
    "chars": 3234,
    "preview": "package com.etsy.conjecture.scalding.train\n\nimport cascading.pipe.Pipe\n\nimport com.etsy.conjecture.data._\nimport com.ets"
  },
  {
    "path": "src/main/scala/com/etsy/conjecture/scalding/train/MulticlassModelTrainer.scala",
    "chars": 7365,
    "preview": "package com.etsy.conjecture.scalding.train\n\nimport cascading.pipe.Pipe\nimport com.twitter.scalding._\nimport com.etsy.con"
  },
  {
    "path": "src/main/scala/com/etsy/conjecture/scalding/train/RegressionModelTrainer.scala",
    "chars": 3750,
    "preview": "package com.etsy.conjecture.scalding.train\n\nimport cascading.pipe.Pipe\nimport com.twitter.scalding._\nimport com.etsy.con"
  },
  {
    "path": "src/main/scala/com/etsy/conjecture/scalding/train/SmallModelTrainer.scala",
    "chars": 3250,
    "preview": "package com.etsy.conjecture.scalding.train\n\nimport cascading.pipe.Pipe\n\nimport com.etsy.conjecture.data._\nimport com.ets"
  },
  {
    "path": "src/main/scala/com/etsy/conjecture/scalding/util/BaseGridSearcher.scala",
    "chars": 1760,
    "preview": "package com.etsy.conjecture.scalding.util\n\n\nimport scala.util.Random\nimport com.etsy.conjecture.scalding.util._\nimport c"
  },
  {
    "path": "src/main/scala/com/etsy/conjecture/scalding/util/DynamicOptions.scala",
    "chars": 3906,
    "preview": "package com.etsy.conjecture.scalding.util\n\nimport java.io.Serializable\nimport com.twitter.scalding.Args\nimport scala.col"
  },
  {
    "path": "src/main/scala/com/etsy/conjecture/scalding/util/HyperparameterSearcher.scala",
    "chars": 9100,
    "preview": "package com.etsy.conjecture.scalding.util\n\nimport java.io.Serializable\n\nimport cascading.pipe.Pipe\nimport com.etsy.conje"
  },
  {
    "path": "src/main/scala/com/etsy/conjecture/text/FeatureHelper.scala",
    "chars": 3716,
    "preview": "package com.etsy.conjecture.text\n\nimport com.etsy.conjecture.data.{ AbstractInstance, BinaryLabeledInstance, LabeledInst"
  },
  {
    "path": "src/main/scala/com/etsy/conjecture/text/Text.scala",
    "chars": 2914,
    "preview": "package com.etsy.conjecture.text\n\ncase class Text(val input: String) {\n\n    private implicit def text2str(txt: Text): St"
  },
  {
    "path": "src/main/scala/com/etsy/conjecture/text/TextSequence.scala",
    "chars": 12578,
    "preview": "package com.etsy.conjecture.text\n\nimport com.etsy.conjecture.data.{BinaryLabeledInstance,BinaryLabel,MulticlassLabel,Mul"
  },
  {
    "path": "src/main/scala/com/etsy/scalding/jobs/conjecture/AdHocClassifier.scala",
    "chars": 2082,
    "preview": "package com.etsy.scalding.jobs.conjecture\n\nimport com.twitter.scalding.{Args, Job, Mode, SequenceFile, Tsv}\nimport com.e"
  },
  {
    "path": "src/main/scala/com/etsy/scalding/jobs/conjecture/AdHocClusterer.scala",
    "chars": 14055,
    "preview": "package com.etsy.scalding.jobs.conjecture\n\nimport com.twitter.scalding.{Args, Job, Mode, SequenceFile, Tsv}\nimport com.e"
  },
  {
    "path": "src/main/scala/com/etsy/scalding/jobs/conjecture/AdHocMulticlassClassifier.scala",
    "chars": 2062,
    "preview": "package com.etsy.scalding.jobs.conjecture\n\nimport com.twitter.scalding.{Args, Job, Mode, SequenceFile, Tsv}\nimport com.e"
  },
  {
    "path": "src/main/scala/com/etsy/scalding/jobs/conjecture/AdHocPredictor.scala",
    "chars": 2259,
    "preview": "package com.etsy.scalding.jobs.conjecture\n\nimport com.twitter.scalding.{Args, Job, Mode, SequenceFile, Tsv}\nimport com.e"
  },
  {
    "path": "src/main/scala/com/etsy/scalding/jobs/conjecture/NNMFTest.scala",
    "chars": 1983,
    "preview": "package com.etsy.scalding.jobs.conjecture\n\nimport com.etsy.conjecture.scalding.NNMF\nimport com.twitter.scalding.{Args, J"
  },
  {
    "path": "src/test/java/com/etsy/conjecture/data/LazyVectorTest.java",
    "chars": 8526,
    "preview": "package com.etsy.conjecture.data;\n\nimport java.io.ByteArrayInputStream;\nimport java.io.ByteArrayOutputStream;\nimport jav"
  },
  {
    "path": "src/test/java/com/etsy/conjecture/data/StringKeyedVectorTest.java",
    "chars": 5306,
    "preview": "package com.etsy.conjecture.data;\n\nimport java.io.ByteArrayInputStream;\nimport java.io.ByteArrayOutputStream;\nimport jav"
  },
  {
    "path": "src/test/java/com/etsy/conjecture/evaluation/TestReceiverOperatingCharacteristic.java",
    "chars": 853,
    "preview": "package com.etsy.conjecture.evaluation;\n\nimport static org.junit.Assert.assertEquals;\n\nimport org.junit.Test;\n\npublic cl"
  },
  {
    "path": "src/test/java/com/etsy/conjecture/model/UpdateableLinearModelTest.java",
    "chars": 5815,
    "preview": "package com.etsy.conjecture.model;\n\nimport static org.junit.Assert.assertEquals;\nimport static org.junit.Assert.assertTr"
  }
]

About this extraction

This page contains the full source code of the etsy/Conjecture GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 117 files (449.7 KB), approximately 119.2k tokens, and a symbol index with 966 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!