Copy disabled (too large)
Download .txt
Showing preview only (50,132K chars total). Download the full file to get everything.
Repository: parrt/animl
Branch: master
Commit: 9680ac83b778
Files: 89
Total size: 194.9 MB
Directory structure:
gitextract_7mrp8x7l/
├── .gitignore
├── LICENSE
├── README.md
├── data/
│ ├── cars.csv
│ └── titanic/
│ └── titanic.csv
├── developer-cert-of-origin.txt
├── dtreeviz/
│ ├── __init__.py
│ ├── ai_explanation.py
│ ├── classifiers.py
│ ├── colors.py
│ ├── compatibility.py
│ ├── interpretation.py
│ ├── models/
│ │ ├── __init__.py
│ │ ├── lightgbm_decision_tree.py
│ │ ├── shadow_decision_tree.py
│ │ ├── sklearn_decision_trees.py
│ │ ├── spark_decision_tree.py
│ │ ├── tensorflow_decision_tree.py
│ │ └── xgb_decision_tree.py
│ ├── trees.py
│ ├── utils.py
│ └── version.py
├── notebooks/
│ ├── classifier-boundary-animations.ipynb
│ ├── classifier-decision-boundaries.ipynb
│ ├── colors.ipynb
│ ├── dtreeviz_lightgbm_visualisations.ipynb
│ ├── dtreeviz_sklearn_AI_visualisations.ipynb
│ ├── dtreeviz_sklearn_pipeline_visualisations.ipynb
│ ├── dtreeviz_sklearn_visualisations.ipynb
│ ├── dtreeviz_spark_visualisations.ipynb
│ ├── dtreeviz_tensorflow_visualisations.ipynb
│ └── dtreeviz_xgboost_visualisations.ipynb
├── play.ipynb
├── releasing.txt
├── scripts/
│ └── github_release_notes.py
├── setup.cfg
├── setup.py
├── talk/
│ └── dtreeviz.pptx
└── testing/
├── __init__.py
├── animate_rtree_bivar_3D.py
├── bin/
│ ├── icons.sh
│ └── topng.sh
├── cancer.py
├── data/
│ ├── cars.csv
│ ├── forestfires.csv
│ ├── knowledge.csv
│ └── sweetrs.csv
├── gen_feature_space_samples.py
├── gen_samples.py
├── iris.py
├── issues_investigations.ipynb
├── paper_examples.py
├── play_ctree.py
├── play_ctree_bivar.py
├── play_lightgbm.py
├── play_rtree.py
├── play_rtree_bivar_3D.py
├── play_rtree_bivar_heatmap.py
├── play_spark.py
├── playground.ipynb
├── slides.ipynb
├── testlib/
│ ├── __init__.py
│ └── models/
│ ├── __init__.py
│ ├── conftest.py
│ ├── fixtures/
│ │ ├── dataset.csv
│ │ ├── dataset_lightgbm.csv
│ │ ├── dataset_spark_tf.csv
│ │ ├── lightgbm_model_classifier.txt
│ │ ├── sk_decision_tree_classifier.joblib
│ │ ├── spark_2_decision_tree_classifier.model/
│ │ │ ├── data/
│ │ │ │ ├── part-00000-d3b57c8e-2190-44de-a427-3f57f84c7b67-c000.snappy.parquet
│ │ │ │ └── part-00001-d3b57c8e-2190-44de-a427-3f57f84c7b67-c000.snappy.parquet
│ │ │ └── metadata/
│ │ │ └── part-00000
│ │ ├── spark_3_0_decision_tree_classifier.model/
│ │ │ ├── data/
│ │ │ │ ├── part-00000-65d1fe70-5c43-4fd5-b250-7020b561625a-c000.snappy.parquet
│ │ │ │ └── part-00001-65d1fe70-5c43-4fd5-b250-7020b561625a-c000.snappy.parquet
│ │ │ └── metadata/
│ │ │ └── part-00000
│ │ ├── xgb_model_classifier.joblib
│ │ └── xgb_model_regressor.joblib
│ ├── test_decision_tree_lightgbm_classifier.py
│ ├── test_decision_tree_spark_classifier.py
│ ├── test_decision_tree_tensorflow_classifier.py
│ ├── test_decision_tree_xgb_regressor.py
│ ├── test_decision_trees_sk_classifier.py
│ ├── test_decision_trees_sk_pipeline.py
│ └── test_decision_trees_xgb_classifier.py
├── testone.py
├── tf-catvars.py
├── tf_catvars2.py
├── tf_catvars3.py
└── tf_regr_catvars.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.idea/
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2021 Terence Parr
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
# dtreeviz : Decision Tree Visualization
## Description
A python library for decision tree visualization and model interpretation. Decision trees are the fundamental building block of [gradient boosting machines](http://explained.ai/gradient-boosting/index.html) and [Random Forests](https://en.wikipedia.org/wiki/Random_forest)(tm), probably the two most popular machine learning models for structured data. Visualizing decision trees is a tremendous aid when learning how these models work and when interpreting models. The visualizations are inspired by an educational animation by [R2D3](http://www.r2d3.us/); [A visual introduction to machine learning](http://www.r2d3.us/visual-intro-to-machine-learning-part-1/). Please see [How to visualize decision trees](http://explained.ai/decision-tree-viz/index.html) for deeper discussion of our decision tree visualization library and the visual design decisions we made.
Currently dtreeviz supports: [scikit-learn](https://scikit-learn.org/stable), [XGBoost](https://xgboost.readthedocs.io/en/latest), [Spark MLlib](https://spark.apache.org/mllib/), [LightGBM](https://lightgbm.readthedocs.io/en/latest/), and [Tensorflow](https://www.tensorflow.org/decision_forests). See [Installation instructions](README.md#Installation).
### Authors
* [Terence Parr](https://explained.ai/), a tech lead at Google, and until 2022 was a professor of data science / computer science at Univ. of San Francisco, where he was founding director of the [University of San Francisco's MS in data science program](https://www.usfca.edu/arts-sciences/graduate-programs/data-science) in 2012.
* [Tudor Lapusan](https://www.linkedin.com/in/tudor-lapusan-5902593b/)
* [Prince Grover](https://www.linkedin.com/in/groverpr)
With major code and visualization clean up contributions done by [Matthew Epland](https://www.linkedin.com/in/matthew-epland) (@mepland).
## Sample Visualizations
### Tree visualizations
<table cellpadding="0" cellspacing="0">
<tr>
<td><img src="testing/samples/iris-TD-2.svg" width="250"></td>
<td><img src="testing/samples/boston-TD-2.svg" width="250"></td>
<td><img src="testing/samples/knowledge-TD-4-simple.svg" width="250"></td>
</tr>
</table>
### Prediction path explanations
<table cellpadding="0" cellspacing="0">
<tr>
<td><img src="testing/samples/breast_cancer-TD-3-X.svg" width="250"></td>
<td><img src="testing/samples/diabetes-LR-2-X.svg" width="300"></td>
<td><img src="testing/samples/knowledge-TD-15-X-simple.svg" width="250"></td>
</tr>
</table>
### Leaf information
<table cellpadding="0" cellspacing="0">
<tr>
<td><img src="testing/samples/titanic-leaf-regression.png" width="150"></td>
<td><img src="testing/samples/titanic-leaf-samples-by-class.png" width="250"></td>
</tr>
</table>
### Feature space exploration
#### Regression
<table cellpadding="0" cellspacing="0">
<tr>
<td><img src="testing/samples/cars-univar-2.svg" width="250"></td>
<td><img src="https://user-images.githubusercontent.com/178777/49104999-4edb0d80-f234-11e8-9010-73b7c0ba5fb9.png" width="250"></td>
<td><img src="https://user-images.githubusercontent.com/178777/49107627-08d57800-f23b-11e8-85a2-ab5894055092.png" width="250"></td>
</tr>
</table>
#### Classification
<table cellpadding="0" cellspacing="0">
<tr>
<td><img src="https://user-images.githubusercontent.com/178777/49105084-9497d600-f234-11e8-9097-56835558c1a6.png" width="250"></td>
<td><img src="https://user-images.githubusercontent.com/178777/49105085-9792c680-f234-11e8-8af5-bc2fde950ab1.png" width="250"></td>
</tr>
</table>
#### Classification boundaries
As a utility function, dtreeviz provides `dtreeviz.decision_boundaries()` that illustrates one and two-dimensional feature space for classifiers, including colors that represent probabilities, decision boundaries, and misclassified entities. This method is not limited to tree models, by the way, and should work with any model that answers method `predict_proba()`. That means any model from scikit-learn should work (but we also made it work with Keras models that define `predict()`). (As it does not work with trees specifically, the function does not use adaptors obtained from `dtreeviz.model()`.) See [classifier-decision-boundaries.ipynb](https://github.com/parrt/dtreeviz/tree/master/notebooks/classifier-decision-boundaries.ipynb).
<table cellpadding="0" cellspacing="0">
<tr>
<td><img src="https://user-images.githubusercontent.com/178777/113516364-b608db00-952e-11eb-91cf-efe2386622f1.png" width="250"><br><img src="https://user-images.githubusercontent.com/178777/113516379-d5076d00-952e-11eb-955e-1dd7c09f2f29.png" width="250"></td>
<td><img src="https://user-images.githubusercontent.com/178777/113516349-a12c4780-952e-11eb-86f3-0ae457eb500f.png" width="250"></td>
</tr>
</table>
Sometimes it's helpful to see animations that change some of the hyper parameters. If you look in notebook [classifier-boundary-animations.ipynb](https://github.com/parrt/dtreeviz/tree/master/notebooks/classifier-boundary-animations.ipynb), you will see code that generates animations such as the following (animated png files):
<table cellpadding="0" cellspacing="0">
<tr>
<td><img src="testing/samples/smiley-dtree-maxdepth.png" width="250"></td>
<td><img src="testing/samples/smiley-numtrees.png" width="250"></td>
</tr>
</table>
## Quick start
See [Installation instructions](README.md#Installation) then take a look at the specific [notebooks](https://github.com/parrt/dtreeviz/tree/master/notebooks) for the supported ML library you're using:
* [sklearn-based examples](notebooks/dtreeviz_sklearn_visualisations.ipynb) ([colab](https://colab.research.google.com/github/parrt/dtreeviz/blob/master/notebooks/dtreeviz_sklearn_visualisations.ipynb))
* [LightGBM-based examples](notebooks/dtreeviz_lightgbm_visualisations.ipynb) ([colab](https://colab.research.google.com/github/parrt/dtreeviz/blob/master/notebooks/dtreeviz_lightgbm_visualisations.ipynb))
* [Spark-based examples](notebooks/dtreeviz_spark_visualisations.ipynb) ([colab](https://colab.research.google.com/github/parrt/dtreeviz/blob/master/notebooks/dtreeviz_spark_visualisations.ipynb))
* [TensorFlow-based examples](notebooks/dtreeviz_tensorflow_visualisations.ipynb) ([colab](https://colab.research.google.com/github/parrt/dtreeviz/blob/master/notebooks/dtreeviz_tensorflow_visualisations.ipynb)) Also see blog at tensorflow.org [Visualizing TensorFlow Decision Forest Trees with dtreeviz](https://www.tensorflow.org/decision_forests/tutorials/dtreeviz_colab)
* [XGBoost-based examples](notebooks/dtreeviz_xgboost_visualisations.ipynb) ([colab](https://colab.research.google.com/github/parrt/dtreeviz/blob/master/notebooks/dtreeviz_xgboost_visualisations.ipynb))
* [Classifier decision boundaries for any scikit-learn model.ipynb](https://github.com/parrt/dtreeviz/tree/master/notebooks/classifier-decision-boundaries.ipynb) ([colab](https://colab.research.google.com/github/parrt/dtreeviz/blob/master/notebooks/classifier-decision-boundaries.ipynb))
* [Changing colors notebook](notebooks/colors.ipynb) ([colab](https://colab.research.google.com/github/parrt/dtreeviz/blob/master/notebooks/colors.ipynb))
* [AI-powered tree analysis (sklearn)](notebooks/dtreeviz_sklearn_AI_visualisations.ipynb) - Interactive chat and explanations using LLMs
To interopt with these different libraries, dtreeviz uses an adaptor object, obtained from function `dtreeviz.model()`, to extract model information necessary for visualization. Given such an adaptor object, all of the dtreeviz functionality is available to you using the same programmer interface. The basic dtreeviz usage recipe is:
1. Import dtreeviz and your decision tree library
2. Acquire and load data into memory
3. Train a classifier or regressor model using your decision tree library
4. Obtain a dtreeviz adaptor model using<br>`viz_model = dtreeviz.model(your_trained_model,...)`
5. Call dtreeviz functions, such as<br>`viz_model.view()` or `viz_model.explain_prediction_path(sample_x)`
**Example**
Here's a complete example Python file that displays the following tree in a popup window:
<img src="testing/samples/iris-TD-4.svg" width="200">
```python
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
import dtreeviz
iris = load_iris()
X = iris.data
y = iris.target
clf = DecisionTreeClassifier(max_depth=4)
clf.fit(X, y)
viz_model = dtreeviz.model(clf,
X_train=X, y_train=y,
feature_names=iris.feature_names,
target_name='iris',
class_names=iris.target_names)
v = viz_model.view() # render as SVG into internal object
v.show() # pop up window
v.save("/tmp/iris.svg") # optionally save as svg
```
In a notebook, you can render inline without calling `show()`. Just call `view()`:
```python
viz_model.view() # in notebook, displays inline
```
### AI-Powered Tree Analysis
With AI integration enabled, you can ask ad hoc questions about your decision tree model using the `chat()` method. The AI has access to comprehensive knowledge about your tree structure, nodes, and training data, enabling it to answer questions about:
* **Tree structure**: Overall architecture, depth, node count, splitting criteria, and tree type (classification/regression)
* **Tree nodes**: Split conditions, feature usage, node statistics, sample distributions, and purity measures at internal nodes
* **Leaf nodes**: Predictions, confidence scores, sample counts, and class distributions
* **Training dataset**: Feature statistics, target distributions, and data characteristics within nodes or leaves
```python
# Enable AI chat when creating the model
viz_model = dtreeviz.model(tree_classifier,
X_train=dataset[features], y_train=dataset[target],
feature_names=features,
target_name=target, class_names=["perish", "survive"],
ai_chat=True,
ai_model="gpt-4.1-mini",
max_history_messages=10)
# Ask questions about your tree
viz_model.chat("Please give me a short summary of the tree structure?")
viz_model.chat("Which leaf nodes have the lowest prediction confidence?")
```
Additionally, when `ai_chat=True` is enabled, the main visualization methods (like `view()`) will automatically include an LLM-generated explanation alongside the visual output, providing both graphical and natural language interpretations of your decision tree.
Requires `pip install dtreeviz[ai]` and an OpenAI API key set as `OPENAI_API_KEY` environment variable. See the [AI-powered tree analysis notebook](notebooks/dtreeviz_sklearn_AI_visualisations.ipynb) for more examples.
## Installation
Install anaconda3 on your system, if not already done.
You might verify that you do not have conda-installed graphviz-related packages installed because dtreeviz needs the pip versions; you can remove them from conda space by doing:
```bash
conda uninstall python-graphviz
conda uninstall graphviz
```
To install (Python >=3.6 only), do this (from Anaconda Prompt on Windows!):
```bash
pip install dtreeviz # install dtreeviz for sklearn
pip install dtreeviz[xgboost] # install XGBoost related dependency
pip install dtreeviz[pyspark] # install pyspark related dependency
pip install dtreeviz[lightgbm] # install LightGBM related dependency
pip install dtreeviz[tensorflow_decision_forests] # install tensorflow_decision_forests related dependency
pip install dtreeviz[ai] # install AI chat/explanation features (requires OpenAI API key)
pip install dtreeviz[all] # install all related dependencies
```
This should also pull in the `graphviz` Python library (>=0.9), which we are using for platform specific stuff.
**Limitations.** Only svg files can be generated at this time, which reduces dependencies and dramatically simplifies install process.
Please email [Terence](mailto:parrt@antlr.org) with any helpful notes on making dtreeviz work (better) on other platforms. Thanks!
For your specific platform, please see the following subsections.
### Mac
Make sure to have the latest XCode installed and command-line tools installed. You can run `xcode-select --install` from the command-line to install those if XCode is already installed. You also have to sign the XCode license agreement, which you can do with `sudo xcodebuild -license` from command-line. The brew install shown next needs to build graphviz, so you need XCode set up properly.
You need the graphviz binary for `dot`. Make sure you have latest version (verified on 10.13, 10.14):
```bash
brew reinstall graphviz
```
Just to be sure, remove `dot` from any anaconda installation, for example:
```bash
rm ~/anaconda3/bin/dot
```
From command line, this command
```bash
dot -Tsvg
```
should work, in the sense that it just stares at you without giving an error. You can hit control-C to escape back to the shell. Make sure that you are using the right `dot` as installed by brew:
```bash
$ which dot
/usr/local/bin/dot
$ ls -l $(which dot)
lrwxr-xr-x 1 parrt wheel 33 May 26 11:04 /usr/local/bin/dot@ -> ../Cellar/graphviz/2.40.1/bin/dot
$
```
**Limitations.** Jupyter notebook has a bug where they do not show .svg files correctly, but Juypter Lab has no problem.
### Linux (Ubuntu 18.04)
To get the `dot` binary do:
```bash
sudo apt install graphviz
```
**Limitations.** The `view()` method works to pop up a new window and images appear inline for jupyter notebook but not jupyter lab (It gets an error parsing the SVG XML.) The notebook images also have a font substitution from the Arial we use and so some text overlaps. Only .svg files can be generated on this platform.
### Windows 10
(Make sure to `pip install graphviz`, which is common to all platforms, and make sure to do this from Anaconda Prompt on Windows!)
[Download graphviz-2.38.msi](https://graphviz.gitlab.io/_pages/Download/Download_windows.html) and update your `Path` environment variable. Add `C:\Program Files (x86)\Graphviz2.38\bin` to User path and `C:\Program Files (x86)\Graphviz2.38\bin\dot.exe` to System Path. It's windows so you might need a reboot after updating that environment variable. You should see this from the Anaconda Prompt:
```
(base) C:\Users\Terence Parr>where dot
C:\Program Files (x86)\Graphviz2.38\bin\dot.exe
```
(Do not use `conda install -c conda-forge python-graphviz` as you get an old version of `graphviz` python library.)
Verify from the Anaconda Prompt that this works (capital `-V` not lowercase `-v`):
```
dot -V
```
If it doesn't work, you have a `Path` problem. I found the following test programs useful. The first one sees if Python can find `dot`:
```python
import os
import subprocess
proc = subprocess.Popen(['dot','-V'])
print( os.getenv('Path') )
```
The following version does the same thing except uses `graphviz` Python libraries backend support utilities, which is what we use in dtreeviz:
```python
import graphviz.backend as be
cmd = ["dot", "-V"]
stdout, stderr = be.run(cmd, capture_output=True, check=True, quiet=False)
print( stderr )
```
If you are having issues with run command you can try copying the following files from: https://github.com/xflr6/graphviz/tree/master/graphviz.
Place them in the AppData\Local\Continuum\anaconda3\Lib\site-packages\graphviz folder.
Clean out the __pycache__ directory too.
For graphviz windows install 8.0.5 and python interface v0.18+ :
```python
import graphviz.backend as be
cmd = ["dot", "-V"]
stdout = be.execute.run_check(cmd, capture_output=True, check=True, quiet=False)
print( stdout )
```
Jupyter Lab and Jupyter notebook both show the inline .svg images well.
### Verify graphviz installation
Try making text file `t.dot` with content `digraph T { A -> B }` (paste that into a text editor, for example) and then running this from the command line:
```
dot -Tsvg -o t.svg t.dot
```
That should give a simple `t.svg` file that opens properly. If you get errors from `dot`, it will not work from the dtreeviz python code. If it can't find `dot` then you didn't update your `PATH` environment variable or there is some other install issue with `graphviz`.
### Limitations
Finally, don't use IE to view .svg files. Use Edge as they look much better. I suspect that IE is displaying them as a rasterized not vector images. Only .svg files can be generated on this platform.
## Install dtreeviz locally
Make sure to follow the install guidelines above.
In order to run tests, you need to install the library with the `[dev]` extra:
```bash
pip install dtreeviz[dev] # Install develop dependencies
```
To push the `dtreeviz` library to your local egg cache (force updates) during development, do this (from anaconda prompt on Windows):
```bash
python setup.py install -f
```
E.g., on Terence's box, it adds `/Users/parrt/anaconda3/lib/python3.6/site-packages/dtreeviz-2.3.2-py3.6.egg`.
## Feedback
We welcome info from users on how they use dtreeviz, what features they'd like, etc... via [email (to parrt)](mailto:parrt@antlr.org) or via an [issue](https://github.com/parrt/dtreeviz/issues).
## Useful Resources
* [How to visualize decision trees](http://explained.ai/decision-tree-viz/index.html)
* [How to explain gradient boosting](http://explained.ai/gradient-boosting/index.html)
* [The Mechanics of Machine Learning](https://mlbook.explained.ai/)
* [Animation by R2D3](http://www.r2d3.us/)
* [A visual introductionn to machine learning](http://www.r2d3.us/visual-intro-to-machine-learning-part-1/)
* [fast.ai's Introduction to Machine Learning for Coders MOOC](https://course18.fast.ai/ml.html)
* Stef van den Elzen's [Interactive Construction, Analysis and
Visualization of Decision Trees](http://alexandria.tue.nl/extra1/afstversl/wsk-i/elzen2011.pdf)
* Some similar feature-space visualizations in [Towards an effective cooperation of the user and the computer for classification, SIGKDD 2000](https://github.com/EE2dev/publications/blob/master/cooperativeClassification.pdf)
* [Beautiful Decisions: Inside BigML’s Decision Trees](https://blog.bigml.com/2012/01/23/beautiful-decisions-inside-bigmls-decision-trees/)
* "SunBurst" approach to tree visualization: [An evaluation of space-filling information visualizations
for depicting hierarchical structures](https://www.cc.gatech.edu/~john.stasko/papers/ijhcs00.pdf)
## License
This project is licensed under the terms of the MIT license, see [LICENSE](LICENSE).
================================================
FILE: data/cars.csv
================================================
MPG,CYL,ENG,WGT
18,8,307,3504
15,8,350,3693
18,8,318,3436
16,8,304,3433
17,8,302,3449
15,8,429,4341
14,8,454,4354
14,8,440,4312
14,8,455,4425
15,8,390,3850
15,8,383,3563
14,8,340,3609
15,8,400,3761
14,8,455,3086
24,4,113,2372
22,6,198,2833
18,6,199,2774
21,6,200,2587
27,4,97,2130
26,4,97,1835
25,4,110,2672
24,4,107,2430
25,4,104,2375
26,4,121,2234
21,6,199,2648
10,8,360,4615
10,8,307,4376
11,8,318,4382
9,8,304,4732
27,4,97,2130
28,4,140,2264
25,4,113,2228
19,6,232,2634
16,6,225,3439
17,6,250,3329
19,6,250,3302
18,6,232,3288
14,8,350,4209
14,8,400,4464
14,8,351,4154
14,8,318,4096
12,8,383,4955
13,8,400,4746
13,8,400,5140
18,6,258,2962
22,4,140,2408
19,6,250,3282
18,6,250,3139
23,4,122,2220
28,4,116,2123
30,4,79,2074
30,4,88,2065
31,4,71,1773
35,4,72,1613
27,4,97,1834
26,4,91,1955
24,4,113,2278
25,4,97.5,2126
23,4,97,2254
20,4,140,2408
21,4,122,2226
13,8,350,4274
14,8,400,4385
15,8,318,4135
14,8,351,4129
17,8,304,3672
11,8,429,4633
13,8,350,4502
12,8,350,4456
13,8,400,4422
19,3,70,2330
15,8,304,3892
13,8,307,4098
13,8,302,4294
14,8,318,4077
18,4,121,2933
22,4,121,2511
21,4,120,2979
26,4,96,2189
22,4,122,2395
28,4,97,2288
23,4,120,2506
28,4,98,2164
27,4,97,2100
13,8,350,4100
14,8,304,3672
13,8,350,3988
14,8,302,4042
15,8,318,3777
12,8,429,4952
13,8,400,4464
13,8,351,4363
14,8,318,4237
13,8,440,4735
12,8,455,4951
13,8,360,3821
18,6,225,3121
16,6,250,3278
18,6,232,2945
18,6,250,3021
23,6,198,2904
26,4,97,1950
11,8,400,4997
12,8,400,4906
13,8,360,4654
12,8,350,4499
18,6,232,2789
20,4,97,2279
21,4,140,2401
22,4,108,2379
18,3,70,2124
19,4,122,2310
21,6,155,2472
26,4,98,2265
15,8,350,4082
16,8,400,4278
29,4,68,1867
24,4,116,2158
20,4,114,2582
19,4,121,2868
15,8,318,3399
24,4,121,2660
20,6,156,2807
11,8,350,3664
20,6,198,3102
19,6,232,2901
15,6,250,3336
31,4,79,1950
26,4,122,2451
32,4,71,1836
25,4,140,2542
16,6,250,3781
16,6,258,3632
18,6,225,3613
16,8,302,4141
13,8,350,4699
14,8,318,4457
14,8,302,4638
14,8,304,4257
29,4,98,2219
26,4,79,1963
26,4,97,2300
31,4,76,1649
32,4,83,2003
28,4,90,2125
24,4,90,2108
26,4,116,2246
24,4,120,2489
26,4,108,2391
31,4,79,2000
19,6,225,3264
18,6,250,3459
15,6,250,3432
15,6,250,3158
16,8,400,4668
15,8,350,4440
16,8,318,4498
14,8,351,4657
17,6,231,3907
16,6,250,3897
15,6,258,3730
18,6,225,3785
21,6,231,3039
20,8,262,3221
13,8,302,3169
29,4,97,2171
23,4,140,2639
20,6,232,2914
23,4,140,2592
24,4,134,2702
25,4,90,2223
24,4,119,2545
18,6,171,2984
29,4,90,1937
19,6,232,3211
23,4,115,2694
23,4,120,2957
22,4,121,2945
25,4,121,2671
33,4,91,1795
28,4,107,2464
25,4,116,2220
25,4,140,2572
26,4,98,2255
27,4,101,2202
17.5,8,305,4215
16,8,318,4190
15.5,8,304,3962
14.5,8,351,4215
22,6,225,3233
22,6,250,3353
24,6,200,3012
22.5,6,232,3085
29,4,85,2035
24.5,4,98,2164
29,4,90,1937
33,4,91,1795
20,6,225,3651
18,6,250,3574
18.5,6,250,3645
17.5,6,258,3193
29.5,4,97,1825
32,4,85,1990
28,4,97,2155
26.5,4,140,2565
20,4,130,3150
13,8,318,3940
19,4,120,3270
19,6,156,2930
16.5,6,168,3820
16.5,8,350,4380
13,8,350,4055
13,8,302,3870
13,8,318,3755
31.5,4,98,2045
30,4,111,2155
36,4,79,1825
25.5,4,122,2300
33.5,4,85,1945
17.5,8,305,3880
17,8,260,4060
15.5,8,318,4140
15,8,302,4295
17.5,6,250,3520
20.5,6,231,3425
19,6,225,3630
18.5,6,250,3525
16,8,400,4220
15.5,8,350,4165
15.5,8,400,4325
16,8,351,4335
29,4,97,1940
24.5,4,151,2740
26,4,97,2265
25.5,4,140,2755
30.5,4,98,2051
33.5,4,98,2075
30,4,97,1985
30.5,4,97,2190
22,6,146,2815
21.5,4,121,2600
21.5,3,80,2720
43.099998,4,90,1985
36.099998,4,98,1800
32.799999,4,78,1985
39.400002,4,85,2070
36.099998,4,91,1800
19.9,8,260,3365
19.4,8,318,3735
20.200001,8,302,3570
19.200001,6,231,3535
20.5,6,200,3155
20.200001,6,200,2965
25.1,4,140,2720
20.5,6,225,3430
19.4,6,232,3210
20.6,6,231,3380
20.799999,6,200,3070
18.6,6,225,3620
18.1,6,258,3410
19.200001,8,305,3425
17.700001,6,231,3445
18.1,8,302,3205
17.5,8,318,4080
30,4,98,2155
27.5,4,134,2560
27.200001,4,119,2300
30.9,4,105,2230
21.1,4,134,2515
23.200001,4,156,2745
23.799999,4,151,2855
23.9,4,119,2405
20.299999,5,131,2830
17,6,163,3140
21.6,4,121,2795
16.200001,6,163,3410
31.5,4,89,1990
29.5,4,98,2135
21.5,6,231,3245
19.799999,6,200,2990
22.299999,4,140,2890
20.200001,6,232,3265
20.6,6,225,3360
17,8,305,3840
17.6,8,302,3725
16.5,8,351,3955
18.200001,8,318,3830
16.9,8,350,4360
15.5,8,351,4054
19.200001,8,267,3605
18.5,8,360,3940
31.9,4,89,1925
34.099998,4,86,1975
35.700001,4,98,1915
27.4,4,121,2670
25.4,5,183,3530
23,8,350,3900
27.200001,4,141,3190
23.9,8,260,3420
34.200001,4,105,2200
34.5,4,105,2150
31.799999,4,85,2020
37.299999,4,91,2130
28.4,4,151,2670
28.799999,6,173,2595
26.799999,6,173,2700
33.5,4,151,2556
41.5,4,98,2144
38.099998,4,89,1968
32.099998,4,98,2120
37.200001,4,86,2019
28,4,151,2678
26.4,4,140,2870
24.299999,4,151,3003
19.1,6,225,3381
34.299999,4,97,2188
29.799999,4,134,2711
31.299999,4,120,2542
37,4,119,2434
32.200001,4,108,2265
46.599998,4,86,2110
27.9,4,156,2800
40.799999,4,85,2110
44.299999,4,90,2085
43.400002,4,90,2335
36.400002,5,121,2950
30,4,146,3250
44.599998,4,91,1850
33.799999,4,97,2145
29.799999,4,89,1845
32.700001,6,168,2910
23.700001,3,70,2420
35,4,122,2500
32.400002,4,107,2290
27.200001,4,135,2490
26.6,4,151,2635
25.799999,4,156,2620
23.5,6,173,2725
30,4,135,2385
39.099998,4,79,1755
39,4,86,1875
35.099998,4,81,1760
32.299999,4,97,2065
37,4,85,1975
37.700001,4,89,2050
34.099998,4,91,1985
34.700001,4,105,2215
34.400002,4,98,2045
29.9,4,98,2380
33,4,105,2190
33.700001,4,107,2210
32.400002,4,108,2350
32.900002,4,119,2615
31.6,4,120,2635
28.1,4,141,3230
30.700001,6,145,3160
25.4,6,168,2900
24.200001,6,146,2930
22.4,6,231,3415
26.6,8,350,3725
20.200001,6,200,3060
17.6,6,225,3465
28,4,112,2605
27,4,112,2640
34,4,112,2395
31,4,112,2575
29,4,135,2525
27,4,151,2735
24,4,140,2865
36,4,105,1980
37,4,91,2025
31,4,91,1970
38,4,105,2125
36,4,98,2125
36,4,120,2160
36,4,107,2205
34,4,108,2245
38,4,91,1965
32,4,91,1965
38,4,91,1995
25,6,181,2945
38,6,262,3015
26,4,156,2585
22,6,232,2835
32,4,144,2665
36,4,135,2370
27,4,151,2950
27,4,140,2790
44,4,97,2130
32,4,135,2295
28,4,120,2625
31,4,119,2720
================================================
FILE: data/titanic/titanic.csv
================================================
PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
1,0,3,"Braund, Mr. Owen Harris",male,22,1,0,A/5 21171,7.25,,S
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38,1,0,PC 17599,71.2833,C85,C
3,1,3,"Heikkinen, Miss. Laina",female,26,0,0,STON/O2. 3101282,7.925,,S
4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35,1,0,113803,53.1,C123,S
5,0,3,"Allen, Mr. William Henry",male,35,0,0,373450,8.05,,S
6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
7,0,1,"McCarthy, Mr. Timothy J",male,54,0,0,17463,51.8625,E46,S
8,0,3,"Palsson, Master. Gosta Leonard",male,2,3,1,349909,21.075,,S
9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27,0,2,347742,11.1333,,S
10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14,1,0,237736,30.0708,,C
11,1,3,"Sandstrom, Miss. Marguerite Rut",female,4,1,1,PP 9549,16.7,G6,S
12,1,1,"Bonnell, Miss. Elizabeth",female,58,0,0,113783,26.55,C103,S
13,0,3,"Saundercock, Mr. William Henry",male,20,0,0,A/5. 2151,8.05,,S
14,0,3,"Andersson, Mr. Anders Johan",male,39,1,5,347082,31.275,,S
15,0,3,"Vestrom, Miss. Hulda Amanda Adolfina",female,14,0,0,350406,7.8542,,S
16,1,2,"Hewlett, Mrs. (Mary D Kingcome) ",female,55,0,0,248706,16,,S
17,0,3,"Rice, Master. Eugene",male,2,4,1,382652,29.125,,Q
18,1,2,"Williams, Mr. Charles Eugene",male,,0,0,244373,13,,S
19,0,3,"Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele)",female,31,1,0,345763,18,,S
20,1,3,"Masselmani, Mrs. Fatima",female,,0,0,2649,7.225,,C
21,0,2,"Fynney, Mr. Joseph J",male,35,0,0,239865,26,,S
22,1,2,"Beesley, Mr. Lawrence",male,34,0,0,248698,13,D56,S
23,1,3,"McGowan, Miss. Anna ""Annie""",female,15,0,0,330923,8.0292,,Q
24,1,1,"Sloper, Mr. William Thompson",male,28,0,0,113788,35.5,A6,S
25,0,3,"Palsson, Miss. Torborg Danira",female,8,3,1,349909,21.075,,S
26,1,3,"Asplund, Mrs. Carl Oscar (Selma Augusta Emilia Johansson)",female,38,1,5,347077,31.3875,,S
27,0,3,"Emir, Mr. Farred Chehab",male,,0,0,2631,7.225,,C
28,0,1,"Fortune, Mr. Charles Alexander",male,19,3,2,19950,263,C23 C25 C27,S
29,1,3,"O'Dwyer, Miss. Ellen ""Nellie""",female,,0,0,330959,7.8792,,Q
30,0,3,"Todoroff, Mr. Lalio",male,,0,0,349216,7.8958,,S
31,0,1,"Uruchurtu, Don. Manuel E",male,40,0,0,PC 17601,27.7208,,C
32,1,1,"Spencer, Mrs. William Augustus (Marie Eugenie)",female,,1,0,PC 17569,146.5208,B78,C
33,1,3,"Glynn, Miss. Mary Agatha",female,,0,0,335677,7.75,,Q
34,0,2,"Wheadon, Mr. Edward H",male,66,0,0,C.A. 24579,10.5,,S
35,0,1,"Meyer, Mr. Edgar Joseph",male,28,1,0,PC 17604,82.1708,,C
36,0,1,"Holverson, Mr. Alexander Oskar",male,42,1,0,113789,52,,S
37,1,3,"Mamee, Mr. Hanna",male,,0,0,2677,7.2292,,C
38,0,3,"Cann, Mr. Ernest Charles",male,21,0,0,A./5. 2152,8.05,,S
39,0,3,"Vander Planke, Miss. Augusta Maria",female,18,2,0,345764,18,,S
40,1,3,"Nicola-Yarred, Miss. Jamila",female,14,1,0,2651,11.2417,,C
41,0,3,"Ahlin, Mrs. Johan (Johanna Persdotter Larsson)",female,40,1,0,7546,9.475,,S
42,0,2,"Turpin, Mrs. William John Robert (Dorothy Ann Wonnacott)",female,27,1,0,11668,21,,S
43,0,3,"Kraeff, Mr. Theodor",male,,0,0,349253,7.8958,,C
44,1,2,"Laroche, Miss. Simonne Marie Anne Andree",female,3,1,2,SC/Paris 2123,41.5792,,C
45,1,3,"Devaney, Miss. Margaret Delia",female,19,0,0,330958,7.8792,,Q
46,0,3,"Rogers, Mr. William John",male,,0,0,S.C./A.4. 23567,8.05,,S
47,0,3,"Lennon, Mr. Denis",male,,1,0,370371,15.5,,Q
48,1,3,"O'Driscoll, Miss. Bridget",female,,0,0,14311,7.75,,Q
49,0,3,"Samaan, Mr. Youssef",male,,2,0,2662,21.6792,,C
50,0,3,"Arnold-Franchi, Mrs. Josef (Josefine Franchi)",female,18,1,0,349237,17.8,,S
51,0,3,"Panula, Master. Juha Niilo",male,7,4,1,3101295,39.6875,,S
52,0,3,"Nosworthy, Mr. Richard Cater",male,21,0,0,A/4. 39886,7.8,,S
53,1,1,"Harper, Mrs. Henry Sleeper (Myna Haxtun)",female,49,1,0,PC 17572,76.7292,D33,C
54,1,2,"Faunthorpe, Mrs. Lizzie (Elizabeth Anne Wilkinson)",female,29,1,0,2926,26,,S
55,0,1,"Ostby, Mr. Engelhart Cornelius",male,65,0,1,113509,61.9792,B30,C
56,1,1,"Woolner, Mr. Hugh",male,,0,0,19947,35.5,C52,S
57,1,2,"Rugg, Miss. Emily",female,21,0,0,C.A. 31026,10.5,,S
58,0,3,"Novel, Mr. Mansouer",male,28.5,0,0,2697,7.2292,,C
59,1,2,"West, Miss. Constance Mirium",female,5,1,2,C.A. 34651,27.75,,S
60,0,3,"Goodwin, Master. William Frederick",male,11,5,2,CA 2144,46.9,,S
61,0,3,"Sirayanian, Mr. Orsen",male,22,0,0,2669,7.2292,,C
62,1,1,"Icard, Miss. Amelie",female,38,0,0,113572,80,B28,
63,0,1,"Harris, Mr. Henry Birkhardt",male,45,1,0,36973,83.475,C83,S
64,0,3,"Skoog, Master. Harald",male,4,3,2,347088,27.9,,S
65,0,1,"Stewart, Mr. Albert A",male,,0,0,PC 17605,27.7208,,C
66,1,3,"Moubarek, Master. Gerios",male,,1,1,2661,15.2458,,C
67,1,2,"Nye, Mrs. (Elizabeth Ramell)",female,29,0,0,C.A. 29395,10.5,F33,S
68,0,3,"Crease, Mr. Ernest James",male,19,0,0,S.P. 3464,8.1583,,S
69,1,3,"Andersson, Miss. Erna Alexandra",female,17,4,2,3101281,7.925,,S
70,0,3,"Kink, Mr. Vincenz",male,26,2,0,315151,8.6625,,S
71,0,2,"Jenkin, Mr. Stephen Curnow",male,32,0,0,C.A. 33111,10.5,,S
72,0,3,"Goodwin, Miss. Lillian Amy",female,16,5,2,CA 2144,46.9,,S
73,0,2,"Hood, Mr. Ambrose Jr",male,21,0,0,S.O.C. 14879,73.5,,S
74,0,3,"Chronopoulos, Mr. Apostolos",male,26,1,0,2680,14.4542,,C
75,1,3,"Bing, Mr. Lee",male,32,0,0,1601,56.4958,,S
76,0,3,"Moen, Mr. Sigurd Hansen",male,25,0,0,348123,7.65,F G73,S
77,0,3,"Staneff, Mr. Ivan",male,,0,0,349208,7.8958,,S
78,0,3,"Moutal, Mr. Rahamin Haim",male,,0,0,374746,8.05,,S
79,1,2,"Caldwell, Master. Alden Gates",male,0.83,0,2,248738,29,,S
80,1,3,"Dowdell, Miss. Elizabeth",female,30,0,0,364516,12.475,,S
81,0,3,"Waelens, Mr. Achille",male,22,0,0,345767,9,,S
82,1,3,"Sheerlinck, Mr. Jan Baptist",male,29,0,0,345779,9.5,,S
83,1,3,"McDermott, Miss. Brigdet Delia",female,,0,0,330932,7.7875,,Q
84,0,1,"Carrau, Mr. Francisco M",male,28,0,0,113059,47.1,,S
85,1,2,"Ilett, Miss. Bertha",female,17,0,0,SO/C 14885,10.5,,S
86,1,3,"Backstrom, Mrs. Karl Alfred (Maria Mathilda Gustafsson)",female,33,3,0,3101278,15.85,,S
87,0,3,"Ford, Mr. William Neal",male,16,1,3,W./C. 6608,34.375,,S
88,0,3,"Slocovski, Mr. Selman Francis",male,,0,0,SOTON/OQ 392086,8.05,,S
89,1,1,"Fortune, Miss. Mabel Helen",female,23,3,2,19950,263,C23 C25 C27,S
90,0,3,"Celotti, Mr. Francesco",male,24,0,0,343275,8.05,,S
91,0,3,"Christmann, Mr. Emil",male,29,0,0,343276,8.05,,S
92,0,3,"Andreasson, Mr. Paul Edvin",male,20,0,0,347466,7.8542,,S
93,0,1,"Chaffee, Mr. Herbert Fuller",male,46,1,0,W.E.P. 5734,61.175,E31,S
94,0,3,"Dean, Mr. Bertram Frank",male,26,1,2,C.A. 2315,20.575,,S
95,0,3,"Coxon, Mr. Daniel",male,59,0,0,364500,7.25,,S
96,0,3,"Shorney, Mr. Charles Joseph",male,,0,0,374910,8.05,,S
97,0,1,"Goldschmidt, Mr. George B",male,71,0,0,PC 17754,34.6542,A5,C
98,1,1,"Greenfield, Mr. William Bertram",male,23,0,1,PC 17759,63.3583,D10 D12,C
99,1,2,"Doling, Mrs. John T (Ada Julia Bone)",female,34,0,1,231919,23,,S
100,0,2,"Kantor, Mr. Sinai",male,34,1,0,244367,26,,S
101,0,3,"Petranec, Miss. Matilda",female,28,0,0,349245,7.8958,,S
102,0,3,"Petroff, Mr. Pastcho (""Pentcho"")",male,,0,0,349215,7.8958,,S
103,0,1,"White, Mr. Richard Frasar",male,21,0,1,35281,77.2875,D26,S
104,0,3,"Johansson, Mr. Gustaf Joel",male,33,0,0,7540,8.6542,,S
105,0,3,"Gustafsson, Mr. Anders Vilhelm",male,37,2,0,3101276,7.925,,S
106,0,3,"Mionoff, Mr. Stoytcho",male,28,0,0,349207,7.8958,,S
107,1,3,"Salkjelsvik, Miss. Anna Kristine",female,21,0,0,343120,7.65,,S
108,1,3,"Moss, Mr. Albert Johan",male,,0,0,312991,7.775,,S
109,0,3,"Rekic, Mr. Tido",male,38,0,0,349249,7.8958,,S
110,1,3,"Moran, Miss. Bertha",female,,1,0,371110,24.15,,Q
111,0,1,"Porter, Mr. Walter Chamberlain",male,47,0,0,110465,52,C110,S
112,0,3,"Zabour, Miss. Hileni",female,14.5,1,0,2665,14.4542,,C
113,0,3,"Barton, Mr. David John",male,22,0,0,324669,8.05,,S
114,0,3,"Jussila, Miss. Katriina",female,20,1,0,4136,9.825,,S
115,0,3,"Attalah, Miss. Malake",female,17,0,0,2627,14.4583,,C
116,0,3,"Pekoniemi, Mr. Edvard",male,21,0,0,STON/O 2. 3101294,7.925,,S
117,0,3,"Connors, Mr. Patrick",male,70.5,0,0,370369,7.75,,Q
118,0,2,"Turpin, Mr. William John Robert",male,29,1,0,11668,21,,S
119,0,1,"Baxter, Mr. Quigg Edmond",male,24,0,1,PC 17558,247.5208,B58 B60,C
120,0,3,"Andersson, Miss. Ellis Anna Maria",female,2,4,2,347082,31.275,,S
121,0,2,"Hickman, Mr. Stanley George",male,21,2,0,S.O.C. 14879,73.5,,S
122,0,3,"Moore, Mr. Leonard Charles",male,,0,0,A4. 54510,8.05,,S
123,0,2,"Nasser, Mr. Nicholas",male,32.5,1,0,237736,30.0708,,C
124,1,2,"Webber, Miss. Susan",female,32.5,0,0,27267,13,E101,S
125,0,1,"White, Mr. Percival Wayland",male,54,0,1,35281,77.2875,D26,S
126,1,3,"Nicola-Yarred, Master. Elias",male,12,1,0,2651,11.2417,,C
127,0,3,"McMahon, Mr. Martin",male,,0,0,370372,7.75,,Q
128,1,3,"Madsen, Mr. Fridtjof Arne",male,24,0,0,C 17369,7.1417,,S
129,1,3,"Peter, Miss. Anna",female,,1,1,2668,22.3583,F E69,C
130,0,3,"Ekstrom, Mr. Johan",male,45,0,0,347061,6.975,,S
131,0,3,"Drazenoic, Mr. Jozef",male,33,0,0,349241,7.8958,,C
132,0,3,"Coelho, Mr. Domingos Fernandeo",male,20,0,0,SOTON/O.Q. 3101307,7.05,,S
133,0,3,"Robins, Mrs. Alexander A (Grace Charity Laury)",female,47,1,0,A/5. 3337,14.5,,S
134,1,2,"Weisz, Mrs. Leopold (Mathilde Francoise Pede)",female,29,1,0,228414,26,,S
135,0,2,"Sobey, Mr. Samuel James Hayden",male,25,0,0,C.A. 29178,13,,S
136,0,2,"Richard, Mr. Emile",male,23,0,0,SC/PARIS 2133,15.0458,,C
137,1,1,"Newsom, Miss. Helen Monypeny",female,19,0,2,11752,26.2833,D47,S
138,0,1,"Futrelle, Mr. Jacques Heath",male,37,1,0,113803,53.1,C123,S
139,0,3,"Osen, Mr. Olaf Elon",male,16,0,0,7534,9.2167,,S
140,0,1,"Giglio, Mr. Victor",male,24,0,0,PC 17593,79.2,B86,C
141,0,3,"Boulos, Mrs. Joseph (Sultana)",female,,0,2,2678,15.2458,,C
142,1,3,"Nysten, Miss. Anna Sofia",female,22,0,0,347081,7.75,,S
143,1,3,"Hakkarainen, Mrs. Pekka Pietari (Elin Matilda Dolck)",female,24,1,0,STON/O2. 3101279,15.85,,S
144,0,3,"Burke, Mr. Jeremiah",male,19,0,0,365222,6.75,,Q
145,0,2,"Andrew, Mr. Edgardo Samuel",male,18,0,0,231945,11.5,,S
146,0,2,"Nicholls, Mr. Joseph Charles",male,19,1,1,C.A. 33112,36.75,,S
147,1,3,"Andersson, Mr. August Edvard (""Wennerstrom"")",male,27,0,0,350043,7.7958,,S
148,0,3,"Ford, Miss. Robina Maggie ""Ruby""",female,9,2,2,W./C. 6608,34.375,,S
149,0,2,"Navratil, Mr. Michel (""Louis M Hoffman"")",male,36.5,0,2,230080,26,F2,S
150,0,2,"Byles, Rev. Thomas Roussel Davids",male,42,0,0,244310,13,,S
151,0,2,"Bateman, Rev. Robert James",male,51,0,0,S.O.P. 1166,12.525,,S
152,1,1,"Pears, Mrs. Thomas (Edith Wearne)",female,22,1,0,113776,66.6,C2,S
153,0,3,"Meo, Mr. Alfonzo",male,55.5,0,0,A.5. 11206,8.05,,S
154,0,3,"van Billiard, Mr. Austin Blyler",male,40.5,0,2,A/5. 851,14.5,,S
155,0,3,"Olsen, Mr. Ole Martin",male,,0,0,Fa 265302,7.3125,,S
156,0,1,"Williams, Mr. Charles Duane",male,51,0,1,PC 17597,61.3792,,C
157,1,3,"Gilnagh, Miss. Katherine ""Katie""",female,16,0,0,35851,7.7333,,Q
158,0,3,"Corn, Mr. Harry",male,30,0,0,SOTON/OQ 392090,8.05,,S
159,0,3,"Smiljanic, Mr. Mile",male,,0,0,315037,8.6625,,S
160,0,3,"Sage, Master. Thomas Henry",male,,8,2,CA. 2343,69.55,,S
161,0,3,"Cribb, Mr. John Hatfield",male,44,0,1,371362,16.1,,S
162,1,2,"Watt, Mrs. James (Elizabeth ""Bessie"" Inglis Milne)",female,40,0,0,C.A. 33595,15.75,,S
163,0,3,"Bengtsson, Mr. John Viktor",male,26,0,0,347068,7.775,,S
164,0,3,"Calic, Mr. Jovo",male,17,0,0,315093,8.6625,,S
165,0,3,"Panula, Master. Eino Viljami",male,1,4,1,3101295,39.6875,,S
166,1,3,"Goldsmith, Master. Frank John William ""Frankie""",male,9,0,2,363291,20.525,,S
167,1,1,"Chibnall, Mrs. (Edith Martha Bowerman)",female,,0,1,113505,55,E33,S
168,0,3,"Skoog, Mrs. William (Anna Bernhardina Karlsson)",female,45,1,4,347088,27.9,,S
169,0,1,"Baumann, Mr. John D",male,,0,0,PC 17318,25.925,,S
170,0,3,"Ling, Mr. Lee",male,28,0,0,1601,56.4958,,S
171,0,1,"Van der hoef, Mr. Wyckoff",male,61,0,0,111240,33.5,B19,S
172,0,3,"Rice, Master. Arthur",male,4,4,1,382652,29.125,,Q
173,1,3,"Johnson, Miss. Eleanor Ileen",female,1,1,1,347742,11.1333,,S
174,0,3,"Sivola, Mr. Antti Wilhelm",male,21,0,0,STON/O 2. 3101280,7.925,,S
175,0,1,"Smith, Mr. James Clinch",male,56,0,0,17764,30.6958,A7,C
176,0,3,"Klasen, Mr. Klas Albin",male,18,1,1,350404,7.8542,,S
177,0,3,"Lefebre, Master. Henry Forbes",male,,3,1,4133,25.4667,,S
178,0,1,"Isham, Miss. Ann Elizabeth",female,50,0,0,PC 17595,28.7125,C49,C
179,0,2,"Hale, Mr. Reginald",male,30,0,0,250653,13,,S
180,0,3,"Leonard, Mr. Lionel",male,36,0,0,LINE,0,,S
181,0,3,"Sage, Miss. Constance Gladys",female,,8,2,CA. 2343,69.55,,S
182,0,2,"Pernot, Mr. Rene",male,,0,0,SC/PARIS 2131,15.05,,C
183,0,3,"Asplund, Master. Clarence Gustaf Hugo",male,9,4,2,347077,31.3875,,S
184,1,2,"Becker, Master. Richard F",male,1,2,1,230136,39,F4,S
185,1,3,"Kink-Heilmann, Miss. Luise Gretchen",female,4,0,2,315153,22.025,,S
186,0,1,"Rood, Mr. Hugh Roscoe",male,,0,0,113767,50,A32,S
187,1,3,"O'Brien, Mrs. Thomas (Johanna ""Hannah"" Godfrey)",female,,1,0,370365,15.5,,Q
188,1,1,"Romaine, Mr. Charles Hallace (""Mr C Rolmane"")",male,45,0,0,111428,26.55,,S
189,0,3,"Bourke, Mr. John",male,40,1,1,364849,15.5,,Q
190,0,3,"Turcin, Mr. Stjepan",male,36,0,0,349247,7.8958,,S
191,1,2,"Pinsky, Mrs. (Rosa)",female,32,0,0,234604,13,,S
192,0,2,"Carbines, Mr. William",male,19,0,0,28424,13,,S
193,1,3,"Andersen-Jensen, Miss. Carla Christine Nielsine",female,19,1,0,350046,7.8542,,S
194,1,2,"Navratil, Master. Michel M",male,3,1,1,230080,26,F2,S
195,1,1,"Brown, Mrs. James Joseph (Margaret Tobin)",female,44,0,0,PC 17610,27.7208,B4,C
196,1,1,"Lurette, Miss. Elise",female,58,0,0,PC 17569,146.5208,B80,C
197,0,3,"Mernagh, Mr. Robert",male,,0,0,368703,7.75,,Q
198,0,3,"Olsen, Mr. Karl Siegwart Andreas",male,42,0,1,4579,8.4042,,S
199,1,3,"Madigan, Miss. Margaret ""Maggie""",female,,0,0,370370,7.75,,Q
200,0,2,"Yrois, Miss. Henriette (""Mrs Harbeck"")",female,24,0,0,248747,13,,S
201,0,3,"Vande Walle, Mr. Nestor Cyriel",male,28,0,0,345770,9.5,,S
202,0,3,"Sage, Mr. Frederick",male,,8,2,CA. 2343,69.55,,S
203,0,3,"Johanson, Mr. Jakob Alfred",male,34,0,0,3101264,6.4958,,S
204,0,3,"Youseff, Mr. Gerious",male,45.5,0,0,2628,7.225,,C
205,1,3,"Cohen, Mr. Gurshon ""Gus""",male,18,0,0,A/5 3540,8.05,,S
206,0,3,"Strom, Miss. Telma Matilda",female,2,0,1,347054,10.4625,G6,S
207,0,3,"Backstrom, Mr. Karl Alfred",male,32,1,0,3101278,15.85,,S
208,1,3,"Albimona, Mr. Nassef Cassem",male,26,0,0,2699,18.7875,,C
209,1,3,"Carr, Miss. Helen ""Ellen""",female,16,0,0,367231,7.75,,Q
210,1,1,"Blank, Mr. Henry",male,40,0,0,112277,31,A31,C
211,0,3,"Ali, Mr. Ahmed",male,24,0,0,SOTON/O.Q. 3101311,7.05,,S
212,1,2,"Cameron, Miss. Clear Annie",female,35,0,0,F.C.C. 13528,21,,S
213,0,3,"Perkin, Mr. John Henry",male,22,0,0,A/5 21174,7.25,,S
214,0,2,"Givard, Mr. Hans Kristensen",male,30,0,0,250646,13,,S
215,0,3,"Kiernan, Mr. Philip",male,,1,0,367229,7.75,,Q
216,1,1,"Newell, Miss. Madeleine",female,31,1,0,35273,113.275,D36,C
217,1,3,"Honkanen, Miss. Eliina",female,27,0,0,STON/O2. 3101283,7.925,,S
218,0,2,"Jacobsohn, Mr. Sidney Samuel",male,42,1,0,243847,27,,S
219,1,1,"Bazzani, Miss. Albina",female,32,0,0,11813,76.2917,D15,C
220,0,2,"Harris, Mr. Walter",male,30,0,0,W/C 14208,10.5,,S
221,1,3,"Sunderland, Mr. Victor Francis",male,16,0,0,SOTON/OQ 392089,8.05,,S
222,0,2,"Bracken, Mr. James H",male,27,0,0,220367,13,,S
223,0,3,"Green, Mr. George Henry",male,51,0,0,21440,8.05,,S
224,0,3,"Nenkoff, Mr. Christo",male,,0,0,349234,7.8958,,S
225,1,1,"Hoyt, Mr. Frederick Maxfield",male,38,1,0,19943,90,C93,S
226,0,3,"Berglund, Mr. Karl Ivar Sven",male,22,0,0,PP 4348,9.35,,S
227,1,2,"Mellors, Mr. William John",male,19,0,0,SW/PP 751,10.5,,S
228,0,3,"Lovell, Mr. John Hall (""Henry"")",male,20.5,0,0,A/5 21173,7.25,,S
229,0,2,"Fahlstrom, Mr. Arne Jonas",male,18,0,0,236171,13,,S
230,0,3,"Lefebre, Miss. Mathilde",female,,3,1,4133,25.4667,,S
231,1,1,"Harris, Mrs. Henry Birkhardt (Irene Wallach)",female,35,1,0,36973,83.475,C83,S
232,0,3,"Larsson, Mr. Bengt Edvin",male,29,0,0,347067,7.775,,S
233,0,2,"Sjostedt, Mr. Ernst Adolf",male,59,0,0,237442,13.5,,S
234,1,3,"Asplund, Miss. Lillian Gertrud",female,5,4,2,347077,31.3875,,S
235,0,2,"Leyson, Mr. Robert William Norman",male,24,0,0,C.A. 29566,10.5,,S
236,0,3,"Harknett, Miss. Alice Phoebe",female,,0,0,W./C. 6609,7.55,,S
237,0,2,"Hold, Mr. Stephen",male,44,1,0,26707,26,,S
238,1,2,"Collyer, Miss. Marjorie ""Lottie""",female,8,0,2,C.A. 31921,26.25,,S
239,0,2,"Pengelly, Mr. Frederick William",male,19,0,0,28665,10.5,,S
240,0,2,"Hunt, Mr. George Henry",male,33,0,0,SCO/W 1585,12.275,,S
241,0,3,"Zabour, Miss. Thamine",female,,1,0,2665,14.4542,,C
242,1,3,"Murphy, Miss. Katherine ""Kate""",female,,1,0,367230,15.5,,Q
243,0,2,"Coleridge, Mr. Reginald Charles",male,29,0,0,W./C. 14263,10.5,,S
244,0,3,"Maenpaa, Mr. Matti Alexanteri",male,22,0,0,STON/O 2. 3101275,7.125,,S
245,0,3,"Attalah, Mr. Sleiman",male,30,0,0,2694,7.225,,C
246,0,1,"Minahan, Dr. William Edward",male,44,2,0,19928,90,C78,Q
247,0,3,"Lindahl, Miss. Agda Thorilda Viktoria",female,25,0,0,347071,7.775,,S
248,1,2,"Hamalainen, Mrs. William (Anna)",female,24,0,2,250649,14.5,,S
249,1,1,"Beckwith, Mr. Richard Leonard",male,37,1,1,11751,52.5542,D35,S
250,0,2,"Carter, Rev. Ernest Courtenay",male,54,1,0,244252,26,,S
251,0,3,"Reed, Mr. James George",male,,0,0,362316,7.25,,S
252,0,3,"Strom, Mrs. Wilhelm (Elna Matilda Persson)",female,29,1,1,347054,10.4625,G6,S
253,0,1,"Stead, Mr. William Thomas",male,62,0,0,113514,26.55,C87,S
254,0,3,"Lobb, Mr. William Arthur",male,30,1,0,A/5. 3336,16.1,,S
255,0,3,"Rosblom, Mrs. Viktor (Helena Wilhelmina)",female,41,0,2,370129,20.2125,,S
256,1,3,"Touma, Mrs. Darwis (Hanne Youssef Razi)",female,29,0,2,2650,15.2458,,C
257,1,1,"Thorne, Mrs. Gertrude Maybelle",female,,0,0,PC 17585,79.2,,C
258,1,1,"Cherry, Miss. Gladys",female,30,0,0,110152,86.5,B77,S
259,1,1,"Ward, Miss. Anna",female,35,0,0,PC 17755,512.3292,,C
260,1,2,"Parrish, Mrs. (Lutie Davis)",female,50,0,1,230433,26,,S
261,0,3,"Smith, Mr. Thomas",male,,0,0,384461,7.75,,Q
262,1,3,"Asplund, Master. Edvin Rojj Felix",male,3,4,2,347077,31.3875,,S
263,0,1,"Taussig, Mr. Emil",male,52,1,1,110413,79.65,E67,S
264,0,1,"Harrison, Mr. William",male,40,0,0,112059,0,B94,S
265,0,3,"Henry, Miss. Delia",female,,0,0,382649,7.75,,Q
266,0,2,"Reeves, Mr. David",male,36,0,0,C.A. 17248,10.5,,S
267,0,3,"Panula, Mr. Ernesti Arvid",male,16,4,1,3101295,39.6875,,S
268,1,3,"Persson, Mr. Ernst Ulrik",male,25,1,0,347083,7.775,,S
269,1,1,"Graham, Mrs. William Thompson (Edith Junkins)",female,58,0,1,PC 17582,153.4625,C125,S
270,1,1,"Bissette, Miss. Amelia",female,35,0,0,PC 17760,135.6333,C99,S
271,0,1,"Cairns, Mr. Alexander",male,,0,0,113798,31,,S
272,1,3,"Tornquist, Mr. William Henry",male,25,0,0,LINE,0,,S
273,1,2,"Mellinger, Mrs. (Elizabeth Anne Maidment)",female,41,0,1,250644,19.5,,S
274,0,1,"Natsch, Mr. Charles H",male,37,0,1,PC 17596,29.7,C118,C
275,1,3,"Healy, Miss. Hanora ""Nora""",female,,0,0,370375,7.75,,Q
276,1,1,"Andrews, Miss. Kornelia Theodosia",female,63,1,0,13502,77.9583,D7,S
277,0,3,"Lindblom, Miss. Augusta Charlotta",female,45,0,0,347073,7.75,,S
278,0,2,"Parkes, Mr. Francis ""Frank""",male,,0,0,239853,0,,S
279,0,3,"Rice, Master. Eric",male,7,4,1,382652,29.125,,Q
280,1,3,"Abbott, Mrs. Stanton (Rosa Hunt)",female,35,1,1,C.A. 2673,20.25,,S
281,0,3,"Duane, Mr. Frank",male,65,0,0,336439,7.75,,Q
282,0,3,"Olsson, Mr. Nils Johan Goransson",male,28,0,0,347464,7.8542,,S
283,0,3,"de Pelsmaeker, Mr. Alfons",male,16,0,0,345778,9.5,,S
284,1,3,"Dorking, Mr. Edward Arthur",male,19,0,0,A/5. 10482,8.05,,S
285,0,1,"Smith, Mr. Richard William",male,,0,0,113056,26,A19,S
286,0,3,"Stankovic, Mr. Ivan",male,33,0,0,349239,8.6625,,C
287,1,3,"de Mulder, Mr. Theodore",male,30,0,0,345774,9.5,,S
288,0,3,"Naidenoff, Mr. Penko",male,22,0,0,349206,7.8958,,S
289,1,2,"Hosono, Mr. Masabumi",male,42,0,0,237798,13,,S
290,1,3,"Connolly, Miss. Kate",female,22,0,0,370373,7.75,,Q
291,1,1,"Barber, Miss. Ellen ""Nellie""",female,26,0,0,19877,78.85,,S
292,1,1,"Bishop, Mrs. Dickinson H (Helen Walton)",female,19,1,0,11967,91.0792,B49,C
293,0,2,"Levy, Mr. Rene Jacques",male,36,0,0,SC/Paris 2163,12.875,D,C
294,0,3,"Haas, Miss. Aloisia",female,24,0,0,349236,8.85,,S
295,0,3,"Mineff, Mr. Ivan",male,24,0,0,349233,7.8958,,S
296,0,1,"Lewy, Mr. Ervin G",male,,0,0,PC 17612,27.7208,,C
297,0,3,"Hanna, Mr. Mansour",male,23.5,0,0,2693,7.2292,,C
298,0,1,"Allison, Miss. Helen Loraine",female,2,1,2,113781,151.55,C22 C26,S
299,1,1,"Saalfeld, Mr. Adolphe",male,,0,0,19988,30.5,C106,S
300,1,1,"Baxter, Mrs. James (Helene DeLaudeniere Chaput)",female,50,0,1,PC 17558,247.5208,B58 B60,C
301,1,3,"Kelly, Miss. Anna Katherine ""Annie Kate""",female,,0,0,9234,7.75,,Q
302,1,3,"McCoy, Mr. Bernard",male,,2,0,367226,23.25,,Q
303,0,3,"Johnson, Mr. William Cahoone Jr",male,19,0,0,LINE,0,,S
304,1,2,"Keane, Miss. Nora A",female,,0,0,226593,12.35,E101,Q
305,0,3,"Williams, Mr. Howard Hugh ""Harry""",male,,0,0,A/5 2466,8.05,,S
306,1,1,"Allison, Master. Hudson Trevor",male,0.92,1,2,113781,151.55,C22 C26,S
307,1,1,"Fleming, Miss. Margaret",female,,0,0,17421,110.8833,,C
308,1,1,"Penasco y Castellana, Mrs. Victor de Satode (Maria Josefa Perez de Soto y Vallejo)",female,17,1,0,PC 17758,108.9,C65,C
309,0,2,"Abelson, Mr. Samuel",male,30,1,0,P/PP 3381,24,,C
310,1,1,"Francatelli, Miss. Laura Mabel",female,30,0,0,PC 17485,56.9292,E36,C
311,1,1,"Hays, Miss. Margaret Bechstein",female,24,0,0,11767,83.1583,C54,C
312,1,1,"Ryerson, Miss. Emily Borie",female,18,2,2,PC 17608,262.375,B57 B59 B63 B66,C
313,0,2,"Lahtinen, Mrs. William (Anna Sylfven)",female,26,1,1,250651,26,,S
314,0,3,"Hendekovic, Mr. Ignjac",male,28,0,0,349243,7.8958,,S
315,0,2,"Hart, Mr. Benjamin",male,43,1,1,F.C.C. 13529,26.25,,S
316,1,3,"Nilsson, Miss. Helmina Josefina",female,26,0,0,347470,7.8542,,S
317,1,2,"Kantor, Mrs. Sinai (Miriam Sternin)",female,24,1,0,244367,26,,S
318,0,2,"Moraweck, Dr. Ernest",male,54,0,0,29011,14,,S
319,1,1,"Wick, Miss. Mary Natalie",female,31,0,2,36928,164.8667,C7,S
320,1,1,"Spedden, Mrs. Frederic Oakley (Margaretta Corning Stone)",female,40,1,1,16966,134.5,E34,C
321,0,3,"Dennis, Mr. Samuel",male,22,0,0,A/5 21172,7.25,,S
322,0,3,"Danoff, Mr. Yoto",male,27,0,0,349219,7.8958,,S
323,1,2,"Slayter, Miss. Hilda Mary",female,30,0,0,234818,12.35,,Q
324,1,2,"Caldwell, Mrs. Albert Francis (Sylvia Mae Harbaugh)",female,22,1,1,248738,29,,S
325,0,3,"Sage, Mr. George John Jr",male,,8,2,CA. 2343,69.55,,S
326,1,1,"Young, Miss. Marie Grice",female,36,0,0,PC 17760,135.6333,C32,C
327,0,3,"Nysveen, Mr. Johan Hansen",male,61,0,0,345364,6.2375,,S
328,1,2,"Ball, Mrs. (Ada E Hall)",female,36,0,0,28551,13,D,S
329,1,3,"Goldsmith, Mrs. Frank John (Emily Alice Brown)",female,31,1,1,363291,20.525,,S
330,1,1,"Hippach, Miss. Jean Gertrude",female,16,0,1,111361,57.9792,B18,C
331,1,3,"McCoy, Miss. Agnes",female,,2,0,367226,23.25,,Q
332,0,1,"Partner, Mr. Austen",male,45.5,0,0,113043,28.5,C124,S
333,0,1,"Graham, Mr. George Edward",male,38,0,1,PC 17582,153.4625,C91,S
334,0,3,"Vander Planke, Mr. Leo Edmondus",male,16,2,0,345764,18,,S
335,1,1,"Frauenthal, Mrs. Henry William (Clara Heinsheimer)",female,,1,0,PC 17611,133.65,,S
336,0,3,"Denkoff, Mr. Mitto",male,,0,0,349225,7.8958,,S
337,0,1,"Pears, Mr. Thomas Clinton",male,29,1,0,113776,66.6,C2,S
338,1,1,"Burns, Miss. Elizabeth Margaret",female,41,0,0,16966,134.5,E40,C
339,1,3,"Dahl, Mr. Karl Edwart",male,45,0,0,7598,8.05,,S
340,0,1,"Blackwell, Mr. Stephen Weart",male,45,0,0,113784,35.5,T,S
341,1,2,"Navratil, Master. Edmond Roger",male,2,1,1,230080,26,F2,S
342,1,1,"Fortune, Miss. Alice Elizabeth",female,24,3,2,19950,263,C23 C25 C27,S
343,0,2,"Collander, Mr. Erik Gustaf",male,28,0,0,248740,13,,S
344,0,2,"Sedgwick, Mr. Charles Frederick Waddington",male,25,0,0,244361,13,,S
345,0,2,"Fox, Mr. Stanley Hubert",male,36,0,0,229236,13,,S
346,1,2,"Brown, Miss. Amelia ""Mildred""",female,24,0,0,248733,13,F33,S
347,1,2,"Smith, Miss. Marion Elsie",female,40,0,0,31418,13,,S
348,1,3,"Davison, Mrs. Thomas Henry (Mary E Finck)",female,,1,0,386525,16.1,,S
349,1,3,"Coutts, Master. William Loch ""William""",male,3,1,1,C.A. 37671,15.9,,S
350,0,3,"Dimic, Mr. Jovan",male,42,0,0,315088,8.6625,,S
351,0,3,"Odahl, Mr. Nils Martin",male,23,0,0,7267,9.225,,S
352,0,1,"Williams-Lambert, Mr. Fletcher Fellows",male,,0,0,113510,35,C128,S
353,0,3,"Elias, Mr. Tannous",male,15,1,1,2695,7.2292,,C
354,0,3,"Arnold-Franchi, Mr. Josef",male,25,1,0,349237,17.8,,S
355,0,3,"Yousif, Mr. Wazli",male,,0,0,2647,7.225,,C
356,0,3,"Vanden Steen, Mr. Leo Peter",male,28,0,0,345783,9.5,,S
357,1,1,"Bowerman, Miss. Elsie Edith",female,22,0,1,113505,55,E33,S
358,0,2,"Funk, Miss. Annie Clemmer",female,38,0,0,237671,13,,S
359,1,3,"McGovern, Miss. Mary",female,,0,0,330931,7.8792,,Q
360,1,3,"Mockler, Miss. Helen Mary ""Ellie""",female,,0,0,330980,7.8792,,Q
361,0,3,"Skoog, Mr. Wilhelm",male,40,1,4,347088,27.9,,S
362,0,2,"del Carlo, Mr. Sebastiano",male,29,1,0,SC/PARIS 2167,27.7208,,C
363,0,3,"Barbara, Mrs. (Catherine David)",female,45,0,1,2691,14.4542,,C
364,0,3,"Asim, Mr. Adola",male,35,0,0,SOTON/O.Q. 3101310,7.05,,S
365,0,3,"O'Brien, Mr. Thomas",male,,1,0,370365,15.5,,Q
366,0,3,"Adahl, Mr. Mauritz Nils Martin",male,30,0,0,C 7076,7.25,,S
367,1,1,"Warren, Mrs. Frank Manley (Anna Sophia Atkinson)",female,60,1,0,110813,75.25,D37,C
368,1,3,"Moussa, Mrs. (Mantoura Boulos)",female,,0,0,2626,7.2292,,C
369,1,3,"Jermyn, Miss. Annie",female,,0,0,14313,7.75,,Q
370,1,1,"Aubart, Mme. Leontine Pauline",female,24,0,0,PC 17477,69.3,B35,C
371,1,1,"Harder, Mr. George Achilles",male,25,1,0,11765,55.4417,E50,C
372,0,3,"Wiklund, Mr. Jakob Alfred",male,18,1,0,3101267,6.4958,,S
373,0,3,"Beavan, Mr. William Thomas",male,19,0,0,323951,8.05,,S
374,0,1,"Ringhini, Mr. Sante",male,22,0,0,PC 17760,135.6333,,C
375,0,3,"Palsson, Miss. Stina Viola",female,3,3,1,349909,21.075,,S
376,1,1,"Meyer, Mrs. Edgar Joseph (Leila Saks)",female,,1,0,PC 17604,82.1708,,C
377,1,3,"Landergren, Miss. Aurora Adelia",female,22,0,0,C 7077,7.25,,S
378,0,1,"Widener, Mr. Harry Elkins",male,27,0,2,113503,211.5,C82,C
379,0,3,"Betros, Mr. Tannous",male,20,0,0,2648,4.0125,,C
380,0,3,"Gustafsson, Mr. Karl Gideon",male,19,0,0,347069,7.775,,S
381,1,1,"Bidois, Miss. Rosalie",female,42,0,0,PC 17757,227.525,,C
382,1,3,"Nakid, Miss. Maria (""Mary"")",female,1,0,2,2653,15.7417,,C
383,0,3,"Tikkanen, Mr. Juho",male,32,0,0,STON/O 2. 3101293,7.925,,S
384,1,1,"Holverson, Mrs. Alexander Oskar (Mary Aline Towner)",female,35,1,0,113789,52,,S
385,0,3,"Plotcharsky, Mr. Vasil",male,,0,0,349227,7.8958,,S
386,0,2,"Davies, Mr. Charles Henry",male,18,0,0,S.O.C. 14879,73.5,,S
387,0,3,"Goodwin, Master. Sidney Leonard",male,1,5,2,CA 2144,46.9,,S
388,1,2,"Buss, Miss. Kate",female,36,0,0,27849,13,,S
389,0,3,"Sadlier, Mr. Matthew",male,,0,0,367655,7.7292,,Q
390,1,2,"Lehmann, Miss. Bertha",female,17,0,0,SC 1748,12,,C
391,1,1,"Carter, Mr. William Ernest",male,36,1,2,113760,120,B96 B98,S
392,1,3,"Jansson, Mr. Carl Olof",male,21,0,0,350034,7.7958,,S
393,0,3,"Gustafsson, Mr. Johan Birger",male,28,2,0,3101277,7.925,,S
394,1,1,"Newell, Miss. Marjorie",female,23,1,0,35273,113.275,D36,C
395,1,3,"Sandstrom, Mrs. Hjalmar (Agnes Charlotta Bengtsson)",female,24,0,2,PP 9549,16.7,G6,S
396,0,3,"Johansson, Mr. Erik",male,22,0,0,350052,7.7958,,S
397,0,3,"Olsson, Miss. Elina",female,31,0,0,350407,7.8542,,S
398,0,2,"McKane, Mr. Peter David",male,46,0,0,28403,26,,S
399,0,2,"Pain, Dr. Alfred",male,23,0,0,244278,10.5,,S
400,1,2,"Trout, Mrs. William H (Jessie L)",female,28,0,0,240929,12.65,,S
401,1,3,"Niskanen, Mr. Juha",male,39,0,0,STON/O 2. 3101289,7.925,,S
402,0,3,"Adams, Mr. John",male,26,0,0,341826,8.05,,S
403,0,3,"Jussila, Miss. Mari Aina",female,21,1,0,4137,9.825,,S
404,0,3,"Hakkarainen, Mr. Pekka Pietari",male,28,1,0,STON/O2. 3101279,15.85,,S
405,0,3,"Oreskovic, Miss. Marija",female,20,0,0,315096,8.6625,,S
406,0,2,"Gale, Mr. Shadrach",male,34,1,0,28664,21,,S
407,0,3,"Widegren, Mr. Carl/Charles Peter",male,51,0,0,347064,7.75,,S
408,1,2,"Richards, Master. William Rowe",male,3,1,1,29106,18.75,,S
409,0,3,"Birkeland, Mr. Hans Martin Monsen",male,21,0,0,312992,7.775,,S
410,0,3,"Lefebre, Miss. Ida",female,,3,1,4133,25.4667,,S
411,0,3,"Sdycoff, Mr. Todor",male,,0,0,349222,7.8958,,S
412,0,3,"Hart, Mr. Henry",male,,0,0,394140,6.8583,,Q
413,1,1,"Minahan, Miss. Daisy E",female,33,1,0,19928,90,C78,Q
414,0,2,"Cunningham, Mr. Alfred Fleming",male,,0,0,239853,0,,S
415,1,3,"Sundman, Mr. Johan Julian",male,44,0,0,STON/O 2. 3101269,7.925,,S
416,0,3,"Meek, Mrs. Thomas (Annie Louise Rowley)",female,,0,0,343095,8.05,,S
417,1,2,"Drew, Mrs. James Vivian (Lulu Thorne Christian)",female,34,1,1,28220,32.5,,S
418,1,2,"Silven, Miss. Lyyli Karoliina",female,18,0,2,250652,13,,S
419,0,2,"Matthews, Mr. William John",male,30,0,0,28228,13,,S
420,0,3,"Van Impe, Miss. Catharina",female,10,0,2,345773,24.15,,S
421,0,3,"Gheorgheff, Mr. Stanio",male,,0,0,349254,7.8958,,C
422,0,3,"Charters, Mr. David",male,21,0,0,A/5. 13032,7.7333,,Q
423,0,3,"Zimmerman, Mr. Leo",male,29,0,0,315082,7.875,,S
424,0,3,"Danbom, Mrs. Ernst Gilbert (Anna Sigrid Maria Brogren)",female,28,1,1,347080,14.4,,S
425,0,3,"Rosblom, Mr. Viktor Richard",male,18,1,1,370129,20.2125,,S
426,0,3,"Wiseman, Mr. Phillippe",male,,0,0,A/4. 34244,7.25,,S
427,1,2,"Clarke, Mrs. Charles V (Ada Maria Winfield)",female,28,1,0,2003,26,,S
428,1,2,"Phillips, Miss. Kate Florence (""Mrs Kate Louise Phillips Marshall"")",female,19,0,0,250655,26,,S
429,0,3,"Flynn, Mr. James",male,,0,0,364851,7.75,,Q
430,1,3,"Pickard, Mr. Berk (Berk Trembisky)",male,32,0,0,SOTON/O.Q. 392078,8.05,E10,S
431,1,1,"Bjornstrom-Steffansson, Mr. Mauritz Hakan",male,28,0,0,110564,26.55,C52,S
432,1,3,"Thorneycroft, Mrs. Percival (Florence Kate White)",female,,1,0,376564,16.1,,S
433,1,2,"Louch, Mrs. Charles Alexander (Alice Adelaide Slow)",female,42,1,0,SC/AH 3085,26,,S
434,0,3,"Kallio, Mr. Nikolai Erland",male,17,0,0,STON/O 2. 3101274,7.125,,S
435,0,1,"Silvey, Mr. William Baird",male,50,1,0,13507,55.9,E44,S
436,1,1,"Carter, Miss. Lucile Polk",female,14,1,2,113760,120,B96 B98,S
437,0,3,"Ford, Miss. Doolina Margaret ""Daisy""",female,21,2,2,W./C. 6608,34.375,,S
438,1,2,"Richards, Mrs. Sidney (Emily Hocking)",female,24,2,3,29106,18.75,,S
439,0,1,"Fortune, Mr. Mark",male,64,1,4,19950,263,C23 C25 C27,S
440,0,2,"Kvillner, Mr. Johan Henrik Johannesson",male,31,0,0,C.A. 18723,10.5,,S
441,1,2,"Hart, Mrs. Benjamin (Esther Ada Bloomfield)",female,45,1,1,F.C.C. 13529,26.25,,S
442,0,3,"Hampe, Mr. Leon",male,20,0,0,345769,9.5,,S
443,0,3,"Petterson, Mr. Johan Emil",male,25,1,0,347076,7.775,,S
444,1,2,"Reynaldo, Ms. Encarnacion",female,28,0,0,230434,13,,S
445,1,3,"Johannesen-Bratthammer, Mr. Bernt",male,,0,0,65306,8.1125,,S
446,1,1,"Dodge, Master. Washington",male,4,0,2,33638,81.8583,A34,S
447,1,2,"Mellinger, Miss. Madeleine Violet",female,13,0,1,250644,19.5,,S
448,1,1,"Seward, Mr. Frederic Kimber",male,34,0,0,113794,26.55,,S
449,1,3,"Baclini, Miss. Marie Catherine",female,5,2,1,2666,19.2583,,C
450,1,1,"Peuchen, Major. Arthur Godfrey",male,52,0,0,113786,30.5,C104,S
451,0,2,"West, Mr. Edwy Arthur",male,36,1,2,C.A. 34651,27.75,,S
452,0,3,"Hagland, Mr. Ingvald Olai Olsen",male,,1,0,65303,19.9667,,S
453,0,1,"Foreman, Mr. Benjamin Laventall",male,30,0,0,113051,27.75,C111,C
454,1,1,"Goldenberg, Mr. Samuel L",male,49,1,0,17453,89.1042,C92,C
455,0,3,"Peduzzi, Mr. Joseph",male,,0,0,A/5 2817,8.05,,S
456,1,3,"Jalsevac, Mr. Ivan",male,29,0,0,349240,7.8958,,C
457,0,1,"Millet, Mr. Francis Davis",male,65,0,0,13509,26.55,E38,S
458,1,1,"Kenyon, Mrs. Frederick R (Marion)",female,,1,0,17464,51.8625,D21,S
459,1,2,"Toomey, Miss. Ellen",female,50,0,0,F.C.C. 13531,10.5,,S
460,0,3,"O'Connor, Mr. Maurice",male,,0,0,371060,7.75,,Q
461,1,1,"Anderson, Mr. Harry",male,48,0,0,19952,26.55,E12,S
462,0,3,"Morley, Mr. William",male,34,0,0,364506,8.05,,S
463,0,1,"Gee, Mr. Arthur H",male,47,0,0,111320,38.5,E63,S
464,0,2,"Milling, Mr. Jacob Christian",male,48,0,0,234360,13,,S
465,0,3,"Maisner, Mr. Simon",male,,0,0,A/S 2816,8.05,,S
466,0,3,"Goncalves, Mr. Manuel Estanslas",male,38,0,0,SOTON/O.Q. 3101306,7.05,,S
467,0,2,"Campbell, Mr. William",male,,0,0,239853,0,,S
468,0,1,"Smart, Mr. John Montgomery",male,56,0,0,113792,26.55,,S
469,0,3,"Scanlan, Mr. James",male,,0,0,36209,7.725,,Q
470,1,3,"Baclini, Miss. Helene Barbara",female,0.75,2,1,2666,19.2583,,C
471,0,3,"Keefe, Mr. Arthur",male,,0,0,323592,7.25,,S
472,0,3,"Cacic, Mr. Luka",male,38,0,0,315089,8.6625,,S
473,1,2,"West, Mrs. Edwy Arthur (Ada Mary Worth)",female,33,1,2,C.A. 34651,27.75,,S
474,1,2,"Jerwan, Mrs. Amin S (Marie Marthe Thuillard)",female,23,0,0,SC/AH Basle 541,13.7917,D,C
475,0,3,"Strandberg, Miss. Ida Sofia",female,22,0,0,7553,9.8375,,S
476,0,1,"Clifford, Mr. George Quincy",male,,0,0,110465,52,A14,S
477,0,2,"Renouf, Mr. Peter Henry",male,34,1,0,31027,21,,S
478,0,3,"Braund, Mr. Lewis Richard",male,29,1,0,3460,7.0458,,S
479,0,3,"Karlsson, Mr. Nils August",male,22,0,0,350060,7.5208,,S
480,1,3,"Hirvonen, Miss. Hildur E",female,2,0,1,3101298,12.2875,,S
481,0,3,"Goodwin, Master. Harold Victor",male,9,5,2,CA 2144,46.9,,S
482,0,2,"Frost, Mr. Anthony Wood ""Archie""",male,,0,0,239854,0,,S
483,0,3,"Rouse, Mr. Richard Henry",male,50,0,0,A/5 3594,8.05,,S
484,1,3,"Turkula, Mrs. (Hedwig)",female,63,0,0,4134,9.5875,,S
485,1,1,"Bishop, Mr. Dickinson H",male,25,1,0,11967,91.0792,B49,C
486,0,3,"Lefebre, Miss. Jeannie",female,,3,1,4133,25.4667,,S
487,1,1,"Hoyt, Mrs. Frederick Maxfield (Jane Anne Forby)",female,35,1,0,19943,90,C93,S
488,0,1,"Kent, Mr. Edward Austin",male,58,0,0,11771,29.7,B37,C
489,0,3,"Somerton, Mr. Francis William",male,30,0,0,A.5. 18509,8.05,,S
490,1,3,"Coutts, Master. Eden Leslie ""Neville""",male,9,1,1,C.A. 37671,15.9,,S
491,0,3,"Hagland, Mr. Konrad Mathias Reiersen",male,,1,0,65304,19.9667,,S
492,0,3,"Windelov, Mr. Einar",male,21,0,0,SOTON/OQ 3101317,7.25,,S
493,0,1,"Molson, Mr. Harry Markland",male,55,0,0,113787,30.5,C30,S
494,0,1,"Artagaveytia, Mr. Ramon",male,71,0,0,PC 17609,49.5042,,C
495,0,3,"Stanley, Mr. Edward Roland",male,21,0,0,A/4 45380,8.05,,S
496,0,3,"Yousseff, Mr. Gerious",male,,0,0,2627,14.4583,,C
497,1,1,"Eustis, Miss. Elizabeth Mussey",female,54,1,0,36947,78.2667,D20,C
498,0,3,"Shellard, Mr. Frederick William",male,,0,0,C.A. 6212,15.1,,S
499,0,1,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25,1,2,113781,151.55,C22 C26,S
500,0,3,"Svensson, Mr. Olof",male,24,0,0,350035,7.7958,,S
501,0,3,"Calic, Mr. Petar",male,17,0,0,315086,8.6625,,S
502,0,3,"Canavan, Miss. Mary",female,21,0,0,364846,7.75,,Q
503,0,3,"O'Sullivan, Miss. Bridget Mary",female,,0,0,330909,7.6292,,Q
504,0,3,"Laitinen, Miss. Kristina Sofia",female,37,0,0,4135,9.5875,,S
505,1,1,"Maioni, Miss. Roberta",female,16,0,0,110152,86.5,B79,S
506,0,1,"Penasco y Castellana, Mr. Victor de Satode",male,18,1,0,PC 17758,108.9,C65,C
507,1,2,"Quick, Mrs. Frederick Charles (Jane Richards)",female,33,0,2,26360,26,,S
508,1,1,"Bradley, Mr. George (""George Arthur Brayton"")",male,,0,0,111427,26.55,,S
509,0,3,"Olsen, Mr. Henry Margido",male,28,0,0,C 4001,22.525,,S
510,1,3,"Lang, Mr. Fang",male,26,0,0,1601,56.4958,,S
511,1,3,"Daly, Mr. Eugene Patrick",male,29,0,0,382651,7.75,,Q
512,0,3,"Webber, Mr. James",male,,0,0,SOTON/OQ 3101316,8.05,,S
513,1,1,"McGough, Mr. James Robert",male,36,0,0,PC 17473,26.2875,E25,S
514,1,1,"Rothschild, Mrs. Martin (Elizabeth L. Barrett)",female,54,1,0,PC 17603,59.4,,C
515,0,3,"Coleff, Mr. Satio",male,24,0,0,349209,7.4958,,S
516,0,1,"Walker, Mr. William Anderson",male,47,0,0,36967,34.0208,D46,S
517,1,2,"Lemore, Mrs. (Amelia Milley)",female,34,0,0,C.A. 34260,10.5,F33,S
518,0,3,"Ryan, Mr. Patrick",male,,0,0,371110,24.15,,Q
519,1,2,"Angle, Mrs. William A (Florence ""Mary"" Agnes Hughes)",female,36,1,0,226875,26,,S
520,0,3,"Pavlovic, Mr. Stefo",male,32,0,0,349242,7.8958,,S
521,1,1,"Perreault, Miss. Anne",female,30,0,0,12749,93.5,B73,S
522,0,3,"Vovk, Mr. Janko",male,22,0,0,349252,7.8958,,S
523,0,3,"Lahoud, Mr. Sarkis",male,,0,0,2624,7.225,,C
524,1,1,"Hippach, Mrs. Louis Albert (Ida Sophia Fischer)",female,44,0,1,111361,57.9792,B18,C
525,0,3,"Kassem, Mr. Fared",male,,0,0,2700,7.2292,,C
526,0,3,"Farrell, Mr. James",male,40.5,0,0,367232,7.75,,Q
527,1,2,"Ridsdale, Miss. Lucy",female,50,0,0,W./C. 14258,10.5,,S
528,0,1,"Farthing, Mr. John",male,,0,0,PC 17483,221.7792,C95,S
529,0,3,"Salonen, Mr. Johan Werner",male,39,0,0,3101296,7.925,,S
530,0,2,"Hocking, Mr. Richard George",male,23,2,1,29104,11.5,,S
531,1,2,"Quick, Miss. Phyllis May",female,2,1,1,26360,26,,S
532,0,3,"Toufik, Mr. Nakli",male,,0,0,2641,7.2292,,C
533,0,3,"Elias, Mr. Joseph Jr",male,17,1,1,2690,7.2292,,C
534,1,3,"Peter, Mrs. Catherine (Catherine Rizk)",female,,0,2,2668,22.3583,,C
535,0,3,"Cacic, Miss. Marija",female,30,0,0,315084,8.6625,,S
536,1,2,"Hart, Miss. Eva Miriam",female,7,0,2,F.C.C. 13529,26.25,,S
537,0,1,"Butt, Major. Archibald Willingham",male,45,0,0,113050,26.55,B38,S
538,1,1,"LeRoy, Miss. Bertha",female,30,0,0,PC 17761,106.425,,C
539,0,3,"Risien, Mr. Samuel Beard",male,,0,0,364498,14.5,,S
540,1,1,"Frolicher, Miss. Hedwig Margaritha",female,22,0,2,13568,49.5,B39,C
541,1,1,"Crosby, Miss. Harriet R",female,36,0,2,WE/P 5735,71,B22,S
542,0,3,"Andersson, Miss. Ingeborg Constanzia",female,9,4,2,347082,31.275,,S
543,0,3,"Andersson, Miss. Sigrid Elisabeth",female,11,4,2,347082,31.275,,S
544,1,2,"Beane, Mr. Edward",male,32,1,0,2908,26,,S
545,0,1,"Douglas, Mr. Walter Donald",male,50,1,0,PC 17761,106.425,C86,C
546,0,1,"Nicholson, Mr. Arthur Ernest",male,64,0,0,693,26,,S
547,1,2,"Beane, Mrs. Edward (Ethel Clarke)",female,19,1,0,2908,26,,S
548,1,2,"Padro y Manent, Mr. Julian",male,,0,0,SC/PARIS 2146,13.8625,,C
549,0,3,"Goldsmith, Mr. Frank John",male,33,1,1,363291,20.525,,S
550,1,2,"Davies, Master. John Morgan Jr",male,8,1,1,C.A. 33112,36.75,,S
551,1,1,"Thayer, Mr. John Borland Jr",male,17,0,2,17421,110.8833,C70,C
552,0,2,"Sharp, Mr. Percival James R",male,27,0,0,244358,26,,S
553,0,3,"O'Brien, Mr. Timothy",male,,0,0,330979,7.8292,,Q
554,1,3,"Leeni, Mr. Fahim (""Philip Zenni"")",male,22,0,0,2620,7.225,,C
555,1,3,"Ohman, Miss. Velin",female,22,0,0,347085,7.775,,S
556,0,1,"Wright, Mr. George",male,62,0,0,113807,26.55,,S
557,1,1,"Duff Gordon, Lady. (Lucille Christiana Sutherland) (""Mrs Morgan"")",female,48,1,0,11755,39.6,A16,C
558,0,1,"Robbins, Mr. Victor",male,,0,0,PC 17757,227.525,,C
559,1,1,"Taussig, Mrs. Emil (Tillie Mandelbaum)",female,39,1,1,110413,79.65,E67,S
560,1,3,"de Messemaeker, Mrs. Guillaume Joseph (Emma)",female,36,1,0,345572,17.4,,S
561,0,3,"Morrow, Mr. Thomas Rowan",male,,0,0,372622,7.75,,Q
562,0,3,"Sivic, Mr. Husein",male,40,0,0,349251,7.8958,,S
563,0,2,"Norman, Mr. Robert Douglas",male,28,0,0,218629,13.5,,S
564,0,3,"Simmons, Mr. John",male,,0,0,SOTON/OQ 392082,8.05,,S
565,0,3,"Meanwell, Miss. (Marion Ogden)",female,,0,0,SOTON/O.Q. 392087,8.05,,S
566,0,3,"Davies, Mr. Alfred J",male,24,2,0,A/4 48871,24.15,,S
567,0,3,"Stoytcheff, Mr. Ilia",male,19,0,0,349205,7.8958,,S
568,0,3,"Palsson, Mrs. Nils (Alma Cornelia Berglund)",female,29,0,4,349909,21.075,,S
569,0,3,"Doharr, Mr. Tannous",male,,0,0,2686,7.2292,,C
570,1,3,"Jonsson, Mr. Carl",male,32,0,0,350417,7.8542,,S
571,1,2,"Harris, Mr. George",male,62,0,0,S.W./PP 752,10.5,,S
572,1,1,"Appleton, Mrs. Edward Dale (Charlotte Lamson)",female,53,2,0,11769,51.4792,C101,S
573,1,1,"Flynn, Mr. John Irwin (""Irving"")",male,36,0,0,PC 17474,26.3875,E25,S
574,1,3,"Kelly, Miss. Mary",female,,0,0,14312,7.75,,Q
575,0,3,"Rush, Mr. Alfred George John",male,16,0,0,A/4. 20589,8.05,,S
576,0,3,"Patchett, Mr. George",male,19,0,0,358585,14.5,,S
577,1,2,"Garside, Miss. Ethel",female,34,0,0,243880,13,,S
578,1,1,"Silvey, Mrs. William Baird (Alice Munger)",female,39,1,0,13507,55.9,E44,S
579,0,3,"Caram, Mrs. Joseph (Maria Elias)",female,,1,0,2689,14.4583,,C
580,1,3,"Jussila, Mr. Eiriik",male,32,0,0,STON/O 2. 3101286,7.925,,S
581,1,2,"Christy, Miss. Julie Rachel",female,25,1,1,237789,30,,S
582,1,1,"Thayer, Mrs. John Borland (Marian Longstreth Morris)",female,39,1,1,17421,110.8833,C68,C
583,0,2,"Downton, Mr. William James",male,54,0,0,28403,26,,S
584,0,1,"Ross, Mr. John Hugo",male,36,0,0,13049,40.125,A10,C
585,0,3,"Paulner, Mr. Uscher",male,,0,0,3411,8.7125,,C
586,1,1,"Taussig, Miss. Ruth",female,18,0,2,110413,79.65,E68,S
587,0,2,"Jarvis, Mr. John Denzil",male,47,0,0,237565,15,,S
588,1,1,"Frolicher-Stehli, Mr. Maxmillian",male,60,1,1,13567,79.2,B41,C
589,0,3,"Gilinski, Mr. Eliezer",male,22,0,0,14973,8.05,,S
590,0,3,"Murdlin, Mr. Joseph",male,,0,0,A./5. 3235,8.05,,S
591,0,3,"Rintamaki, Mr. Matti",male,35,0,0,STON/O 2. 3101273,7.125,,S
592,1,1,"Stephenson, Mrs. Walter Bertram (Martha Eustis)",female,52,1,0,36947,78.2667,D20,C
593,0,3,"Elsbury, Mr. William James",male,47,0,0,A/5 3902,7.25,,S
594,0,3,"Bourke, Miss. Mary",female,,0,2,364848,7.75,,Q
595,0,2,"Chapman, Mr. John Henry",male,37,1,0,SC/AH 29037,26,,S
596,0,3,"Van Impe, Mr. Jean Baptiste",male,36,1,1,345773,24.15,,S
597,1,2,"Leitch, Miss. Jessie Wills",female,,0,0,248727,33,,S
598,0,3,"Johnson, Mr. Alfred",male,49,0,0,LINE,0,,S
599,0,3,"Boulos, Mr. Hanna",male,,0,0,2664,7.225,,C
600,1,1,"Duff Gordon, Sir. Cosmo Edmund (""Mr Morgan"")",male,49,1,0,PC 17485,56.9292,A20,C
601,1,2,"Jacobsohn, Mrs. Sidney Samuel (Amy Frances Christy)",female,24,2,1,243847,27,,S
602,0,3,"Slabenoff, Mr. Petco",male,,0,0,349214,7.8958,,S
603,0,1,"Harrington, Mr. Charles H",male,,0,0,113796,42.4,,S
604,0,3,"Torber, Mr. Ernst William",male,44,0,0,364511,8.05,,S
605,1,1,"Homer, Mr. Harry (""Mr E Haven"")",male,35,0,0,111426,26.55,,C
606,0,3,"Lindell, Mr. Edvard Bengtsson",male,36,1,0,349910,15.55,,S
607,0,3,"Karaic, Mr. Milan",male,30,0,0,349246,7.8958,,S
608,1,1,"Daniel, Mr. Robert Williams",male,27,0,0,113804,30.5,,S
609,1,2,"Laroche, Mrs. Joseph (Juliette Marie Louise Lafargue)",female,22,1,2,SC/Paris 2123,41.5792,,C
610,1,1,"Shutes, Miss. Elizabeth W",female,40,0,0,PC 17582,153.4625,C125,S
611,0,3,"Andersson, Mrs. Anders Johan (Alfrida Konstantia Brogren)",female,39,1,5,347082,31.275,,S
612,0,3,"Jardin, Mr. Jose Neto",male,,0,0,SOTON/O.Q. 3101305,7.05,,S
613,1,3,"Murphy, Miss. Margaret Jane",female,,1,0,367230,15.5,,Q
614,0,3,"Horgan, Mr. John",male,,0,0,370377,7.75,,Q
615,0,3,"Brocklebank, Mr. William Alfred",male,35,0,0,364512,8.05,,S
616,1,2,"Herman, Miss. Alice",female,24,1,2,220845,65,,S
617,0,3,"Danbom, Mr. Ernst Gilbert",male,34,1,1,347080,14.4,,S
618,0,3,"Lobb, Mrs. William Arthur (Cordelia K Stanlick)",female,26,1,0,A/5. 3336,16.1,,S
619,1,2,"Becker, Miss. Marion Louise",female,4,2,1,230136,39,F4,S
620,0,2,"Gavey, Mr. Lawrence",male,26,0,0,31028,10.5,,S
621,0,3,"Yasbeck, Mr. Antoni",male,27,1,0,2659,14.4542,,C
622,1,1,"Kimball, Mr. Edwin Nelson Jr",male,42,1,0,11753,52.5542,D19,S
623,1,3,"Nakid, Mr. Sahid",male,20,1,1,2653,15.7417,,C
624,0,3,"Hansen, Mr. Henry Damsgaard",male,21,0,0,350029,7.8542,,S
625,0,3,"Bowen, Mr. David John ""Dai""",male,21,0,0,54636,16.1,,S
626,0,1,"Sutton, Mr. Frederick",male,61,0,0,36963,32.3208,D50,S
627,0,2,"Kirkland, Rev. Charles Leonard",male,57,0,0,219533,12.35,,Q
628,1,1,"Longley, Miss. Gretchen Fiske",female,21,0,0,13502,77.9583,D9,S
629,0,3,"Bostandyeff, Mr. Guentcho",male,26,0,0,349224,7.8958,,S
630,0,3,"O'Connell, Mr. Patrick D",male,,0,0,334912,7.7333,,Q
631,1,1,"Barkworth, Mr. Algernon Henry Wilson",male,80,0,0,27042,30,A23,S
632,0,3,"Lundahl, Mr. Johan Svensson",male,51,0,0,347743,7.0542,,S
633,1,1,"Stahelin-Maeglin, Dr. Max",male,32,0,0,13214,30.5,B50,C
634,0,1,"Parr, Mr. William Henry Marsh",male,,0,0,112052,0,,S
635,0,3,"Skoog, Miss. Mabel",female,9,3,2,347088,27.9,,S
636,1,2,"Davis, Miss. Mary",female,28,0,0,237668,13,,S
637,0,3,"Leinonen, Mr. Antti Gustaf",male,32,0,0,STON/O 2. 3101292,7.925,,S
638,0,2,"Collyer, Mr. Harvey",male,31,1,1,C.A. 31921,26.25,,S
639,0,3,"Panula, Mrs. Juha (Maria Emilia Ojala)",female,41,0,5,3101295,39.6875,,S
640,0,3,"Thorneycroft, Mr. Percival",male,,1,0,376564,16.1,,S
641,0,3,"Jensen, Mr. Hans Peder",male,20,0,0,350050,7.8542,,S
642,1,1,"Sagesser, Mlle. Emma",female,24,0,0,PC 17477,69.3,B35,C
643,0,3,"Skoog, Miss. Margit Elizabeth",female,2,3,2,347088,27.9,,S
644,1,3,"Foo, Mr. Choong",male,,0,0,1601,56.4958,,S
645,1,3,"Baclini, Miss. Eugenie",female,0.75,2,1,2666,19.2583,,C
646,1,1,"Harper, Mr. Henry Sleeper",male,48,1,0,PC 17572,76.7292,D33,C
647,0,3,"Cor, Mr. Liudevit",male,19,0,0,349231,7.8958,,S
648,1,1,"Simonius-Blumer, Col. Oberst Alfons",male,56,0,0,13213,35.5,A26,C
649,0,3,"Willey, Mr. Edward",male,,0,0,S.O./P.P. 751,7.55,,S
650,1,3,"Stanley, Miss. Amy Zillah Elsie",female,23,0,0,CA. 2314,7.55,,S
651,0,3,"Mitkoff, Mr. Mito",male,,0,0,349221,7.8958,,S
652,1,2,"Doling, Miss. Elsie",female,18,0,1,231919,23,,S
653,0,3,"Kalvik, Mr. Johannes Halvorsen",male,21,0,0,8475,8.4333,,S
654,1,3,"O'Leary, Miss. Hanora ""Norah""",female,,0,0,330919,7.8292,,Q
655,0,3,"Hegarty, Miss. Hanora ""Nora""",female,18,0,0,365226,6.75,,Q
656,0,2,"Hickman, Mr. Leonard Mark",male,24,2,0,S.O.C. 14879,73.5,,S
657,0,3,"Radeff, Mr. Alexander",male,,0,0,349223,7.8958,,S
658,0,3,"Bourke, Mrs. John (Catherine)",female,32,1,1,364849,15.5,,Q
659,0,2,"Eitemiller, Mr. George Floyd",male,23,0,0,29751,13,,S
660,0,1,"Newell, Mr. Arthur Webster",male,58,0,2,35273,113.275,D48,C
661,1,1,"Frauenthal, Dr. Henry William",male,50,2,0,PC 17611,133.65,,S
662,0,3,"Badt, Mr. Mohamed",male,40,0,0,2623,7.225,,C
663,0,1,"Colley, Mr. Edward Pomeroy",male,47,0,0,5727,25.5875,E58,S
664,0,3,"Coleff, Mr. Peju",male,36,0,0,349210,7.4958,,S
665,1,3,"Lindqvist, Mr. Eino William",male,20,1,0,STON/O 2. 3101285,7.925,,S
666,0,2,"Hickman, Mr. Lewis",male,32,2,0,S.O.C. 14879,73.5,,S
667,0,2,"Butler, Mr. Reginald Fenton",male,25,0,0,234686,13,,S
668,0,3,"Rommetvedt, Mr. Knud Paust",male,,0,0,312993,7.775,,S
669,0,3,"Cook, Mr. Jacob",male,43,0,0,A/5 3536,8.05,,S
670,1,1,"Taylor, Mrs. Elmer Zebley (Juliet Cummins Wright)",female,,1,0,19996,52,C126,S
671,1,2,"Brown, Mrs. Thomas William Solomon (Elizabeth Catherine Ford)",female,40,1,1,29750,39,,S
672,0,1,"Davidson, Mr. Thornton",male,31,1,0,F.C. 12750,52,B71,S
673,0,2,"Mitchell, Mr. Henry Michael",male,70,0,0,C.A. 24580,10.5,,S
674,1,2,"Wilhelms, Mr. Charles",male,31,0,0,244270,13,,S
675,0,2,"Watson, Mr. Ennis Hastings",male,,0,0,239856,0,,S
676,0,3,"Edvardsson, Mr. Gustaf Hjalmar",male,18,0,0,349912,7.775,,S
677,0,3,"Sawyer, Mr. Frederick Charles",male,24.5,0,0,342826,8.05,,S
678,1,3,"Turja, Miss. Anna Sofia",female,18,0,0,4138,9.8417,,S
679,0,3,"Goodwin, Mrs. Frederick (Augusta Tyler)",female,43,1,6,CA 2144,46.9,,S
680,1,1,"Cardeza, Mr. Thomas Drake Martinez",male,36,0,1,PC 17755,512.3292,B51 B53 B55,C
681,0,3,"Peters, Miss. Katie",female,,0,0,330935,8.1375,,Q
682,1,1,"Hassab, Mr. Hammad",male,27,0,0,PC 17572,76.7292,D49,C
683,0,3,"Olsvigen, Mr. Thor Anderson",male,20,0,0,6563,9.225,,S
684,0,3,"Goodwin, Mr. Charles Edward",male,14,5,2,CA 2144,46.9,,S
685,0,2,"Brown, Mr. Thomas William Solomon",male,60,1,1,29750,39,,S
686,0,2,"Laroche, Mr. Joseph Philippe Lemercier",male,25,1,2,SC/Paris 2123,41.5792,,C
687,0,3,"Panula, Mr. Jaako Arnold",male,14,4,1,3101295,39.6875,,S
688,0,3,"Dakic, Mr. Branko",male,19,0,0,349228,10.1708,,S
689,0,3,"Fischer, Mr. Eberhard Thelander",male,18,0,0,350036,7.7958,,S
690,1,1,"Madill, Miss. Georgette Alexandra",female,15,0,1,24160,211.3375,B5,S
691,1,1,"Dick, Mr. Albert Adrian",male,31,1,0,17474,57,B20,S
692,1,3,"Karun, Miss. Manca",female,4,0,1,349256,13.4167,,C
693,1,3,"Lam, Mr. Ali",male,,0,0,1601,56.4958,,S
694,0,3,"Saad, Mr. Khalil",male,25,0,0,2672,7.225,,C
695,0,1,"Weir, Col. John",male,60,0,0,113800,26.55,,S
696,0,2,"Chapman, Mr. Charles Henry",male,52,0,0,248731,13.5,,S
697,0,3,"Kelly, Mr. James",male,44,0,0,363592,8.05,,S
698,1,3,"Mullens, Miss. Katherine ""Katie""",female,,0,0,35852,7.7333,,Q
699,0,1,"Thayer, Mr. John Borland",male,49,1,1,17421,110.8833,C68,C
700,0,3,"Humblen, Mr. Adolf Mathias Nicolai Olsen",male,42,0,0,348121,7.65,F G63,S
701,1,1,"Astor, Mrs. John Jacob (Madeleine Talmadge Force)",female,18,1,0,PC 17757,227.525,C62 C64,C
702,1,1,"Silverthorne, Mr. Spencer Victor",male,35,0,0,PC 17475,26.2875,E24,S
703,0,3,"Barbara, Miss. Saiide",female,18,0,1,2691,14.4542,,C
704,0,3,"Gallagher, Mr. Martin",male,25,0,0,36864,7.7417,,Q
705,0,3,"Hansen, Mr. Henrik Juul",male,26,1,0,350025,7.8542,,S
706,0,2,"Morley, Mr. Henry Samuel (""Mr Henry Marshall"")",male,39,0,0,250655,26,,S
707,1,2,"Kelly, Mrs. Florence ""Fannie""",female,45,0,0,223596,13.5,,S
708,1,1,"Calderhead, Mr. Edward Pennington",male,42,0,0,PC 17476,26.2875,E24,S
709,1,1,"Cleaver, Miss. Alice",female,22,0,0,113781,151.55,,S
710,1,3,"Moubarek, Master. Halim Gonios (""William George"")",male,,1,1,2661,15.2458,,C
711,1,1,"Mayne, Mlle. Berthe Antonine (""Mrs de Villiers"")",female,24,0,0,PC 17482,49.5042,C90,C
712,0,1,"Klaber, Mr. Herman",male,,0,0,113028,26.55,C124,S
713,1,1,"Taylor, Mr. Elmer Zebley",male,48,1,0,19996,52,C126,S
714,0,3,"Larsson, Mr. August Viktor",male,29,0,0,7545,9.4833,,S
715,0,2,"Greenberg, Mr. Samuel",male,52,0,0,250647,13,,S
716,0,3,"Soholt, Mr. Peter Andreas Lauritz Andersen",male,19,0,0,348124,7.65,F G73,S
717,1,1,"Endres, Miss. Caroline Louise",female,38,0,0,PC 17757,227.525,C45,C
718,1,2,"Troutt, Miss. Edwina Celia ""Winnie""",female,27,0,0,34218,10.5,E101,S
719,0,3,"McEvoy, Mr. Michael",male,,0,0,36568,15.5,,Q
720,0,3,"Johnson, Mr. Malkolm Joackim",male,33,0,0,347062,7.775,,S
721,1,2,"Harper, Miss. Annie Jessie ""Nina""",female,6,0,1,248727,33,,S
722,0,3,"Jensen, Mr. Svend Lauritz",male,17,1,0,350048,7.0542,,S
723,0,2,"Gillespie, Mr. William Henry",male,34,0,0,12233,13,,S
724,0,2,"Hodges, Mr. Henry Price",male,50,0,0,250643,13,,S
725,1,1,"Chambers, Mr. Norman Campbell",male,27,1,0,113806,53.1,E8,S
726,0,3,"Oreskovic, Mr. Luka",male,20,0,0,315094,8.6625,,S
727,1,2,"Renouf, Mrs. Peter Henry (Lillian Jefferys)",female,30,3,0,31027,21,,S
728,1,3,"Mannion, Miss. Margareth",female,,0,0,36866,7.7375,,Q
729,0,2,"Bryhl, Mr. Kurt Arnold Gottfrid",male,25,1,0,236853,26,,S
730,0,3,"Ilmakangas, Miss. Pieta Sofia",female,25,1,0,STON/O2. 3101271,7.925,,S
731,1,1,"Allen, Miss. Elisabeth Walton",female,29,0,0,24160,211.3375,B5,S
732,0,3,"Hassan, Mr. Houssein G N",male,11,0,0,2699,18.7875,,C
733,0,2,"Knight, Mr. Robert J",male,,0,0,239855,0,,S
734,0,2,"Berriman, Mr. William John",male,23,0,0,28425,13,,S
735,0,2,"Troupiansky, Mr. Moses Aaron",male,23,0,0,233639,13,,S
736,0,3,"Williams, Mr. Leslie",male,28.5,0,0,54636,16.1,,S
737,0,3,"Ford, Mrs. Edward (Margaret Ann Watson)",female,48,1,3,W./C. 6608,34.375,,S
738,1,1,"Lesurer, Mr. Gustave J",male,35,0,0,PC 17755,512.3292,B101,C
739,0,3,"Ivanoff, Mr. Kanio",male,,0,0,349201,7.8958,,S
740,0,3,"Nankoff, Mr. Minko",male,,0,0,349218,7.8958,,S
741,1,1,"Hawksford, Mr. Walter James",male,,0,0,16988,30,D45,S
742,0,1,"Cavendish, Mr. Tyrell William",male,36,1,0,19877,78.85,C46,S
743,1,1,"Ryerson, Miss. Susan Parker ""Suzette""",female,21,2,2,PC 17608,262.375,B57 B59 B63 B66,C
744,0,3,"McNamee, Mr. Neal",male,24,1,0,376566,16.1,,S
745,1,3,"Stranden, Mr. Juho",male,31,0,0,STON/O 2. 3101288,7.925,,S
746,0,1,"Crosby, Capt. Edward Gifford",male,70,1,1,WE/P 5735,71,B22,S
747,0,3,"Abbott, Mr. Rossmore Edward",male,16,1,1,C.A. 2673,20.25,,S
748,1,2,"Sinkkonen, Miss. Anna",female,30,0,0,250648,13,,S
749,0,1,"Marvin, Mr. Daniel Warner",male,19,1,0,113773,53.1,D30,S
750,0,3,"Connaghton, Mr. Michael",male,31,0,0,335097,7.75,,Q
751,1,2,"Wells, Miss. Joan",female,4,1,1,29103,23,,S
752,1,3,"Moor, Master. Meier",male,6,0,1,392096,12.475,E121,S
753,0,3,"Vande Velde, Mr. Johannes Joseph",male,33,0,0,345780,9.5,,S
754,0,3,"Jonkoff, Mr. Lalio",male,23,0,0,349204,7.8958,,S
755,1,2,"Herman, Mrs. Samuel (Jane Laver)",female,48,1,2,220845,65,,S
756,1,2,"Hamalainen, Master. Viljo",male,0.67,1,1,250649,14.5,,S
757,0,3,"Carlsson, Mr. August Sigfrid",male,28,0,0,350042,7.7958,,S
758,0,2,"Bailey, Mr. Percy Andrew",male,18,0,0,29108,11.5,,S
759,0,3,"Theobald, Mr. Thomas Leonard",male,34,0,0,363294,8.05,,S
760,1,1,"Rothes, the Countess. of (Lucy Noel Martha Dyer-Edwards)",female,33,0,0,110152,86.5,B77,S
761,0,3,"Garfirth, Mr. John",male,,0,0,358585,14.5,,S
762,0,3,"Nirva, Mr. Iisakki Antino Aijo",male,41,0,0,SOTON/O2 3101272,7.125,,S
763,1,3,"Barah, Mr. Hanna Assi",male,20,0,0,2663,7.2292,,C
764,1,1,"Carter, Mrs. William Ernest (Lucile Polk)",female,36,1,2,113760,120,B96 B98,S
765,0,3,"Eklund, Mr. Hans Linus",male,16,0,0,347074,7.775,,S
766,1,1,"Hogeboom, Mrs. John C (Anna Andrews)",female,51,1,0,13502,77.9583,D11,S
767,0,1,"Brewe, Dr. Arthur Jackson",male,,0,0,112379,39.6,,C
768,0,3,"Mangan, Miss. Mary",female,30.5,0,0,364850,7.75,,Q
769,0,3,"Moran, Mr. Daniel J",male,,1,0,371110,24.15,,Q
770,0,3,"Gronnestad, Mr. Daniel Danielsen",male,32,0,0,8471,8.3625,,S
771,0,3,"Lievens, Mr. Rene Aime",male,24,0,0,345781,9.5,,S
772,0,3,"Jensen, Mr. Niels Peder",male,48,0,0,350047,7.8542,,S
773,0,2,"Mack, Mrs. (Mary)",female,57,0,0,S.O./P.P. 3,10.5,E77,S
774,0,3,"Elias, Mr. Dibo",male,,0,0,2674,7.225,,C
775,1,2,"Hocking, Mrs. Elizabeth (Eliza Needs)",female,54,1,3,29105,23,,S
776,0,3,"Myhrman, Mr. Pehr Fabian Oliver Malkolm",male,18,0,0,347078,7.75,,S
777,0,3,"Tobin, Mr. Roger",male,,0,0,383121,7.75,F38,Q
778,1,3,"Emanuel, Miss. Virginia Ethel",female,5,0,0,364516,12.475,,S
779,0,3,"Kilgannon, Mr. Thomas J",male,,0,0,36865,7.7375,,Q
780,1,1,"Robert, Mrs. Edward Scott (Elisabeth Walton McMillan)",female,43,0,1,24160,211.3375,B3,S
781,1,3,"Ayoub, Miss. Banoura",female,13,0,0,2687,7.2292,,C
782,1,1,"Dick, Mrs. Albert Adrian (Vera Gillespie)",female,17,1,0,17474,57,B20,S
783,0,1,"Long, Mr. Milton Clyde",male,29,0,0,113501,30,D6,S
784,0,3,"Johnston, Mr. Andrew G",male,,1,2,W./C. 6607,23.45,,S
785,0,3,"Ali, Mr. William",male,25,0,0,SOTON/O.Q. 3101312,7.05,,S
786,0,3,"Harmer, Mr. Abraham (David Lishin)",male,25,0,0,374887,7.25,,S
787,1,3,"Sjoblom, Miss. Anna Sofia",female,18,0,0,3101265,7.4958,,S
788,0,3,"Rice, Master. George Hugh",male,8,4,1,382652,29.125,,Q
789,1,3,"Dean, Master. Bertram Vere",male,1,1,2,C.A. 2315,20.575,,S
790,0,1,"Guggenheim, Mr. Benjamin",male,46,0,0,PC 17593,79.2,B82 B84,C
791,0,3,"Keane, Mr. Andrew ""Andy""",male,,0,0,12460,7.75,,Q
792,0,2,"Gaskell, Mr. Alfred",male,16,0,0,239865,26,,S
793,0,3,"Sage, Miss. Stella Anna",female,,8,2,CA. 2343,69.55,,S
794,0,1,"Hoyt, Mr. William Fisher",male,,0,0,PC 17600,30.6958,,C
795,0,3,"Dantcheff, Mr. Ristiu",male,25,0,0,349203,7.8958,,S
796,0,2,"Otter, Mr. Richard",male,39,0,0,28213,13,,S
797,1,1,"Leader, Dr. Alice (Farnham)",female,49,0,0,17465,25.9292,D17,S
798,1,3,"Osman, Mrs. Mara",female,31,0,0,349244,8.6833,,S
799,0,3,"Ibrahim Shawah, Mr. Yousseff",male,30,0,0,2685,7.2292,,C
800,0,3,"Van Impe, Mrs. Jean Baptiste (Rosalie Paula Govaert)",female,30,1,1,345773,24.15,,S
801,0,2,"Ponesell, Mr. Martin",male,34,0,0,250647,13,,S
802,1,2,"Collyer, Mrs. Harvey (Charlotte Annie Tate)",female,31,1,1,C.A. 31921,26.25,,S
803,1,1,"Carter, Master. William Thornton II",male,11,1,2,113760,120,B96 B98,S
804,1,3,"Thomas, Master. Assad Alexander",male,0.42,0,1,2625,8.5167,,C
805,1,3,"Hedman, Mr. Oskar Arvid",male,27,0,0,347089,6.975,,S
806,0,3,"Johansson, Mr. Karl Johan",male,31,0,0,347063,7.775,,S
807,0,1,"Andrews, Mr. Thomas Jr",male,39,0,0,112050,0,A36,S
808,0,3,"Pettersson, Miss. Ellen Natalia",female,18,0,0,347087,7.775,,S
809,0,2,"Meyer, Mr. August",male,39,0,0,248723,13,,S
810,1,1,"Chambers, Mrs. Norman Campbell (Bertha Griggs)",female,33,1,0,113806,53.1,E8,S
811,0,3,"Alexander, Mr. William",male,26,0,0,3474,7.8875,,S
812,0,3,"Lester, Mr. James",male,39,0,0,A/4 48871,24.15,,S
813,0,2,"Slemen, Mr. Richard James",male,35,0,0,28206,10.5,,S
814,0,3,"Andersson, Miss. Ebba Iris Alfrida",female,6,4,2,347082,31.275,,S
815,0,3,"Tomlin, Mr. Ernest Portage",male,30.5,0,0,364499,8.05,,S
816,0,1,"Fry, Mr. Richard",male,,0,0,112058,0,B102,S
817,0,3,"Heininen, Miss. Wendla Maria",female,23,0,0,STON/O2. 3101290,7.925,,S
818,0,2,"Mallet, Mr. Albert",male,31,1,1,S.C./PARIS 2079,37.0042,,C
819,0,3,"Holm, Mr. John Fredrik Alexander",male,43,0,0,C 7075,6.45,,S
820,0,3,"Skoog, Master. Karl Thorsten",male,10,3,2,347088,27.9,,S
821,1,1,"Hays, Mrs. Charles Melville (Clara Jennings Gregg)",female,52,1,1,12749,93.5,B69,S
822,1,3,"Lulic, Mr. Nikola",male,27,0,0,315098,8.6625,,S
823,0,1,"Reuchlin, Jonkheer. John George",male,38,0,0,19972,0,,S
824,1,3,"Moor, Mrs. (Beila)",female,27,0,1,392096,12.475,E121,S
825,0,3,"Panula, Master. Urho Abraham",male,2,4,1,3101295,39.6875,,S
826,0,3,"Flynn, Mr. John",male,,0,0,368323,6.95,,Q
827,0,3,"Lam, Mr. Len",male,,0,0,1601,56.4958,,S
828,1,2,"Mallet, Master. Andre",male,1,0,2,S.C./PARIS 2079,37.0042,,C
829,1,3,"McCormack, Mr. Thomas Joseph",male,,0,0,367228,7.75,,Q
830,1,1,"Stone, Mrs. George Nelson (Martha Evelyn)",female,62,0,0,113572,80,B28,
831,1,3,"Yasbeck, Mrs. Antoni (Selini Alexander)",female,15,1,0,2659,14.4542,,C
832,1,2,"Richards, Master. George Sibley",male,0.83,1,1,29106,18.75,,S
833,0,3,"Saad, Mr. Amin",male,,0,0,2671,7.2292,,C
834,0,3,"Augustsson, Mr. Albert",male,23,0,0,347468,7.8542,,S
835,0,3,"Allum, Mr. Owen George",male,18,0,0,2223,8.3,,S
836,1,1,"Compton, Miss. Sara Rebecca",female,39,1,1,PC 17756,83.1583,E49,C
837,0,3,"Pasic, Mr. Jakob",male,21,0,0,315097,8.6625,,S
838,0,3,"Sirota, Mr. Maurice",male,,0,0,392092,8.05,,S
839,1,3,"Chip, Mr. Chang",male,32,0,0,1601,56.4958,,S
840,1,1,"Marechal, Mr. Pierre",male,,0,0,11774,29.7,C47,C
841,0,3,"Alhomaki, Mr. Ilmari Rudolf",male,20,0,0,SOTON/O2 3101287,7.925,,S
842,0,2,"Mudd, Mr. Thomas Charles",male,16,0,0,S.O./P.P. 3,10.5,,S
843,1,1,"Serepeca, Miss. Augusta",female,30,0,0,113798,31,,C
844,0,3,"Lemberopolous, Mr. Peter L",male,34.5,0,0,2683,6.4375,,C
845,0,3,"Culumovic, Mr. Jeso",male,17,0,0,315090,8.6625,,S
846,0,3,"Abbing, Mr. Anthony",male,42,0,0,C.A. 5547,7.55,,S
847,0,3,"Sage, Mr. Douglas Bullen",male,,8,2,CA. 2343,69.55,,S
848,0,3,"Markoff, Mr. Marin",male,35,0,0,349213,7.8958,,C
849,0,2,"Harper, Rev. John",male,28,0,1,248727,33,,S
850,1,1,"Goldenberg, Mrs. Samuel L (Edwiga Grabowska)",female,,1,0,17453,89.1042,C92,C
851,0,3,"Andersson, Master. Sigvard Harald Elias",male,4,4,2,347082,31.275,,S
852,0,3,"Svensson, Mr. Johan",male,74,0,0,347060,7.775,,S
853,0,3,"Boulos, Miss. Nourelain",female,9,1,1,2678,15.2458,,C
854,1,1,"Lines, Miss. Mary Conover",female,16,0,1,PC 17592,39.4,D28,S
855,0,2,"Carter, Mrs. Ernest Courtenay (Lilian Hughes)",female,44,1,0,244252,26,,S
856,1,3,"Aks, Mrs. Sam (Leah Rosen)",female,18,0,1,392091,9.35,,S
857,1,1,"Wick, Mrs. George Dennick (Mary Hitchcock)",female,45,1,1,36928,164.8667,,S
858,1,1,"Daly, Mr. Peter Denis ",male,51,0,0,113055,26.55,E17,S
859,1,3,"Baclini, Mrs. Solomon (Latifa Qurban)",female,24,0,3,2666,19.2583,,C
860,0,3,"Razi, Mr. Raihed",male,,0,0,2629,7.2292,,C
861,0,3,"Hansen, Mr. Claus Peter",male,41,2,0,350026,14.1083,,S
862,0,2,"Giles, Mr. Frederick Edward",male,21,1,0,28134,11.5,,S
863,1,1,"Swift, Mrs. Frederick Joel (Margaret Welles Barron)",female,48,0,0,17466,25.9292,D17,S
864,0,3,"Sage, Miss. Dorothy Edith ""Dolly""",female,,8,2,CA. 2343,69.55,,S
865,0,2,"Gill, Mr. John William",male,24,0,0,233866,13,,S
866,1,2,"Bystrom, Mrs. (Karolina)",female,42,0,0,236852,13,,S
867,1,2,"Duran y More, Miss. Asuncion",female,27,1,0,SC/PARIS 2149,13.8583,,C
868,0,1,"Roebling, Mr. Washington Augustus II",male,31,0,0,PC 17590,50.4958,A24,S
869,0,3,"van Melkebeke, Mr. Philemon",male,,0,0,345777,9.5,,S
870,1,3,"Johnson, Master. Harold Theodor",male,4,1,1,347742,11.1333,,S
871,0,3,"Balkic, Mr. Cerin",male,26,0,0,349248,7.8958,,S
872,1,1,"Beckwith, Mrs. Richard Leonard (Sallie Monypeny)",female,47,1,1,11751,52.5542,D35,S
873,0,1,"Carlsson, Mr. Frans Olof",male,33,0,0,695,5,B51 B53 B55,S
874,0,3,"Vander Cruyssen, Mr. Victor",male,47,0,0,345765,9,,S
875,1,2,"Abelson, Mrs. Samuel (Hannah Wizosky)",female,28,1,0,P/PP 3381,24,,C
876,1,3,"Najib, Miss. Adele Kiamie ""Jane""",female,15,0,0,2667,7.225,,C
877,0,3,"Gustafsson, Mr. Alfred Ossian",male,20,0,0,7534,9.8458,,S
878,0,3,"Petroff, Mr. Nedelio",male,19,0,0,349212,7.8958,,S
879,0,3,"Laleff, Mr. Kristo",male,,0,0,349217,7.8958,,S
880,1,1,"Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)",female,56,0,1,11767,83.1583,C50,C
881,1,2,"Shelley, Mrs. William (Imanita Parrish Hall)",female,25,0,1,230433,26,,S
882,0,3,"Markun, Mr. Johann",male,33,0,0,349257,7.8958,,S
883,0,3,"Dahlberg, Miss. Gerda Ulrika",female,22,0,0,7552,10.5167,,S
884,0,2,"Banfield, Mr. Frederick James",male,28,0,0,C.A./SOTON 34068,10.5,,S
885,0,3,"Sutehall, Mr. Henry Jr",male,25,0,0,SOTON/OQ 392076,7.05,,S
886,0,3,"Rice, Mrs. William (Margaret Norton)",female,39,0,5,382652,29.125,,Q
887,0,2,"Montvila, Rev. Juozas",male,27,0,0,211536,13,,S
888,1,1,"Graham, Miss. Margaret Edith",female,19,0,0,112053,30,B42,S
889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S
890,1,1,"Behr, Mr. Karl Howell",male,26,0,0,111369,30,C148,C
891,0,3,"Dooley, Mr. Patrick",male,32,0,0,370376,7.75,,Q
================================================
FILE: developer-cert-of-origin.txt
================================================
dtreeviz uses the Linux Foundation's Developer
Certificate of Origin, DCO, version 1.1. See either
https://developercertificate.org/ or the text below.
Each commit requires a "signature", which is simple as
using `-s` (not `-S`) to the git commit command:
git commit -s -m 'This is my commit message'
Github's pull request process enforces the sig and gives
instructions on how to fix any commits that lack the sig.
See https://github.com/apps/dco for more info.
----- https://developercertificate.org/ ------
Developer Certificate of Origin
Version 1.1
Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
Everyone is permitted to copy and distribute verbatim copies of this
license document, but changing it is not allowed.
Developer's Certificate of Origin 1.1
By making a contribution to this project, I certify that:
(a) The contribution was created in whole or in part by me and I
have the right to submit it under the open source license
indicated in the file; or
(b) The contribution is based upon previous work that, to the best
of my knowledge, is covered under an appropriate open source
license and I have the right under that license to submit that
work with modifications, whether created in whole or in part
by me, under the same open source license (unless I am
permitted to submit under a different license), as indicated
in the file; or
(c) The contribution was provided directly to me by some other
person who certified (a), (b) or (c) and I have not modified
it.
(d) I understand and agree that this project and the contribution
are public and that a record of the contribution (including all
personal information I submit with it, including my sign-off) is
maintained indefinitely and may be redistributed consistent with
this project or the open source license(s) involved.
================================================
FILE: dtreeviz/__init__.py
================================================
from .version import __version__
# NEW API
# import dtreeviz
# call m = dtreeviz.model(...) then m.view() etc...
from dtreeviz.utils import DTreeVizRender
from dtreeviz.trees import DTreeVizAPI, model
# OLD API
from dtreeviz.compatibility import rtreeviz_univar, \
rtreeviz_bivar_heatmap, \
rtreeviz_bivar_3D, \
ctreeviz_univar, \
ctreeviz_bivar, \
dtreeviz, \
viz_leaf_samples, \
viz_leaf_criterion, \
ctreeviz_leaf_samples , \
viz_leaf_target, \
describe_node_sample, \
explain_prediction_path
from dtreeviz.classifiers import decision_boundaries
================================================
FILE: dtreeviz/ai_explanation.py
================================================
import datetime
import json
import os
import numpy as np
import pandas as pd
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.chat_history import InMemoryChatMessageHistory
from langchain_core.messages import SystemMessage, HumanMessage
from dtreeviz.models.shadow_decision_tree import ShadowDecTree
from dtreeviz.models.sklearn_decision_trees import ShadowSKDTree
DEFAULT_LLM_MODEL = "gpt-4.1-mini"
# Store for chat message histories (session-based)
_chat_history_store = {}
leaf_template_string = """You are an expert in decision tree models. You have very good knowledge about tree structure interpretation.
You should give advices how the decision tree could be improved based on its current structure.
I have the leaf nodes of a decision tree in a json format, delimited by '''.
I would like you to tell me about the following:
{leaf_related_questions}
json format: ```{leaf_json_format}```
At the end of your response, simple output the following text " Ask me more by typing viz_model.question('Your question')"
"""
node_stats_template_string = """
Below I will provide you, in json format, some basic statistics for numeric and string columns in a json format delimited by '''.
Those statistics are for the node samples and for the training set samples.
I will also provide you the distribution of class labels for that specific node as follow : {target_stats}
Start with a very short description of the node class labels.
After that, please make a short analysis only for the features which you consider have the most contribution on the node prediction,
based on the stats from node vs training samples. Please take in consideration all the stats provided, like count, mean, std (stadard deviation), min, max, 25% (percentile 25), 50% (percentile 50) and 75% (percentile 75)
At the end, make a summary of the all feature stats, with their implications in the node label stats.
node sample statistics : '''{node_stats_json_format}'''
training sample statistics : '''{training_stats_json_format}'''
"""
tree_stats_template_string = """
Bellow I will provide you, in json format, more information about the tree structure, leaf and internal nodes stats and training set statistics, all delimited by '''.
tree structure information: '''{tree_structure_knowledge}'''
leaf nodes information: '''{leaf_nodes_knowledge}'''
internal nodes information: '''{internal_nodes_knowledge}'''
training set information: '''{training_set_knowledge}'''
"""
leaf_prompt_template = ChatPromptTemplate.from_template(leaf_template_string)
node_stats_template = ChatPromptTemplate.from_template(node_stats_template_string)
tree_stats_template = ChatPromptTemplate.from_template(tree_stats_template_string)
def _convert_to_json_serializable(obj):
"""Convert NumPy types and other non-JSON-serializable objects to native Python types.
Compatible with both NumPy 1.x and 2.x.
"""
# Handle NumPy integers (compatible with NumPy 2.0)
# np.integer is the abstract base class that works in both NumPy 1.x and 2.x
if isinstance(obj, np.integer):
return int(obj)
# Handle NumPy floating point numbers (compatible with NumPy 2.0)
# np.floating is the abstract base class that works in both NumPy 1.x and 2.x
elif isinstance(obj, np.floating):
return float(obj)
# Handle NumPy arrays
elif isinstance(obj, np.ndarray):
return obj.tolist()
# Handle NumPy booleans
elif isinstance(obj, np.bool_):
return bool(obj)
# Handle dictionaries recursively
elif isinstance(obj, dict):
return {key: _convert_to_json_serializable(value) for key, value in obj.items()}
# Handle lists and tuples recursively
elif isinstance(obj, (list, tuple)):
return [_convert_to_json_serializable(item) for item in obj]
# Handle pandas NaN values
elif pd.isna(obj):
return None
else:
return obj
def get_completion(prompt, model=None):
"""Get a completion from the LLM using the new ChatOpenAI interface.
Args:
prompt: The prompt text to send to the LLM
model: OpenAI model to use. If None, defaults to DEFAULT_LLM_MODEL.
"""
if model is None:
model = DEFAULT_LLM_MODEL
chat = ChatOpenAI(temperature=0, model=model)
response = chat.invoke([HumanMessage(content=prompt)])
return response.content
def _get_library(tree: ShadowDecTree):
if isinstance(tree, ShadowSKDTree):
return "Scikit-Learn"
return None
def _get_tree_structure_knowledge(tree: ShadowDecTree):
tree_structure_knowledge = {}
tree_structure_knowledge["tree type"] = "classification" if tree.is_classifier() else "regression"
try:
tree_structure_knowledge["criterion"] = tree.criterion()
except Exception as e:
pass
try:
tree_structure_knowledge["tree max depth"] = tree.get_max_depth()
except Exception as e:
pass
tree_structure_knowledge["number of nodes"] = tree.nnodes()
return tree_structure_knowledge
def _get_training_set_knowledge(tree: ShadowDecTree):
training_set_knowledge = {}
training_df = pd.DataFrame(tree.X_train, columns=tree.feature_names).convert_dtypes()
training_target_df = pd.Series(tree.y_train)
training_set_stats = training_df.describe(include='all').to_json()
training_set_knowledge["training set size"] = training_df.shape[0]
training_set_knowledge["feature list"] = tree.feature_names
training_set_knowledge["feature list size"] = len(tree.feature_names)
training_set_knowledge["feature summary stats"] = training_set_stats
training_set_knowledge["target class"] = tree.target_name
training_set_knowledge["target class number"] = tree.nclasses()
training_set_knowledge["target class distribution"] = training_target_df.value_counts().to_json()
return training_set_knowledge
def _get_leaf_nodes_knowledge(tree: ShadowDecTree):
leaf_nodes_knowledge = {}
for leaf in tree.leaves:
leaf_info = {
"data samples": leaf.nsamples(),
"prediction": leaf.prediction(),
"leaf level": leaf.level,
"node criterion": tree.criterion()
}
if tree.is_classifier():
class_counts = tree.get_node_nsamples_by_class(leaf.id)
node_nsamples_by_class = [
f"class label {i} contains {int(node_sample_count)} samples"
for i, node_sample_count in enumerate(class_counts)
]
total_samples = sum(class_counts) if class_counts is not None else 0
prediction_confidence = round(
max(class_counts) / total_samples, 2) if class_counts is not None and total_samples else None
leaf_info.update({
"prediction class": leaf.prediction_name(),
"prediction confidence": prediction_confidence,
"leaf sample counts": ", ".join(node_nsamples_by_class)
})
else:
samples = leaf.samples()
if len(samples):
y_values = tree.y_train[samples]
leaf_info.update({
"prediction mean": round(float(np.mean(y_values)), 4),
"prediction std": round(float(np.std(y_values)), 4),
"prediction min": round(float(np.min(y_values)), 4),
"prediction max": round(float(np.max(y_values)), 4)
})
leaf_nodes_knowledge[f"leaf node id {leaf.id}"] = leaf_info
return leaf_nodes_knowledge
def _get_internal_nodes_knowledge(tree: ShadowDecTree):
internal_nodes_knowledge = {}
for node in tree.internal:
leaf_info = {
"data samples": node.nsamples(),
"categorical split": node.is_categorical_split(),
"split threshold": node.split(),
"split feature": node.feature_name(),
"node purity": node.criterion(),
"node level": node.level,
"node criterion": tree.criterion()
}
if tree.is_classifier():
node_nsamples_by_class = [
f"class label {i} contains {int(node_sample_count)} samples"
for i, node_sample_count in enumerate(tree.get_node_nsamples_by_class(node.id))
]
leaf_info["leaf sample counts"] = ", ".join(node_nsamples_by_class)
else:
node_samples = node.samples()
if len(node_samples):
node_values = tree.y_train[node_samples]
leaf_info.update({
"node target mean": round(float(np.mean(node_values)), 4),
"node target std": round(float(np.std(node_values)), 4),
"node target min": round(float(np.min(node_values)), 4),
"node target max": round(float(np.max(node_values)), 4)
})
internal_nodes_knowledge[f"internal node id {node.id}"] = leaf_info
return internal_nodes_knowledge
def _get_session_history(session_id: str, max_messages: int = None) -> InMemoryChatMessageHistory:
"""Get or create a chat message history for a given session.
Args:
session_id: Unique identifier for the conversation session
max_messages: Maximum number of messages to keep in history (excluding system message).
If None, no limit is applied. Old messages are trimmed when limit is exceeded.
"""
if session_id not in _chat_history_store:
_chat_history_store[session_id] = InMemoryChatMessageHistory()
history = _chat_history_store[session_id]
# Trim history if max_messages is set
if max_messages is not None and max_messages > 0:
messages = history.messages
# Keep system message(s) and the most recent max_messages conversation messages
# System messages are typically at the beginning
from langchain_core.messages import SystemMessage
system_messages = [msg for msg in messages if isinstance(msg, SystemMessage)]
conversation_messages = [msg for msg in messages if not isinstance(msg, SystemMessage)]
# Keep only the most recent max_messages conversation messages
if len(conversation_messages) > max_messages:
# Clear and rebuild with trimmed messages
history.clear()
# Re-add system messages first
for msg in system_messages:
history.add_message(msg)
# Re-add only the most recent conversation messages
for msg in conversation_messages[-max_messages:]:
history.add_message(msg)
return history
def setup_chat(tree: ShadowDecTree, session_id: str = "default", model: str = None, max_history_messages: int = 20):
"""Setup a chat conversation with memory using LangChain Core.
Args:
tree: The shadow decision tree to analyze
session_id: Unique identifier for the conversation session
model: OpenAI model to use (e.g., "gpt-4o", "gpt-4o-mini", "gpt-3.5-turbo").
If None, defaults to "gpt-4.1-mini" (good balance of cost and quality).
max_history_messages: Maximum number of conversation messages to keep in history
(excluding system message). Default is 20. Set to None for unlimited.
Old messages are automatically trimmed when limit is exceeded.
"""
# Reset chat history store to start fresh for each setup
global _chat_history_store
_chat_history_store = {}
# Use provided model or default
if model is None:
model = DEFAULT_LLM_MODEL
ml_library = _get_library(tree)
if ml_library is None:
raise ValueError(
"AI chat setup aborted: this tree's library is not currently supported. Only Scikit-Learn is supported for now."
)
leaf_nodes_knowledge = _get_leaf_nodes_knowledge(tree)
internal_nodes_knowledge = _get_internal_nodes_knowledge(tree)
training_set_knowledge = _get_training_set_knowledge(tree)
tree_structure_knowledge = _get_tree_structure_knowledge(tree)
# Format the tree statistics message
# Convert NumPy types to native Python types for JSON serialization
tree_stats_content = tree_stats_template.format_messages(
tree_structure_knowledge=json.dumps(_convert_to_json_serializable(tree_structure_knowledge), indent=2),
leaf_nodes_knowledge=json.dumps(_convert_to_json_serializable(leaf_nodes_knowledge), indent=2),
internal_nodes_knowledge=json.dumps(_convert_to_json_serializable(internal_nodes_knowledge), indent=2),
training_set_knowledge=json.dumps(_convert_to_json_serializable(training_set_knowledge), indent=2)
)[0].content
# Create comprehensive system prompt with all tree context
# Use format() instead of f-string to avoid template parsing issues
system_prompt_template = """You are an AI assistant specialized in Machine Learning, especially in decision tree structure interpretation.
You are not just talkative, you thrive on providing in-depth details and insights related to provided decision tree structure.
Always be helpful, detailed, and insightful when answering questions about decision trees.
Respond directly and naturally, as if you have direct knowledge of the tree. Avoid phrases like "based on the provided information", "from the provided data", "according to the information", or similar references. Simply state facts and insights directly.
The machine learning library used for this decision tree is: {ml_library}
Below is detailed information about this decision tree in JSON format:
{tree_stats_content}
Use this information to answer questions about the decision tree structure, nodes, leaves, training data, and any other aspects of the model."""
# Format the system prompt with actual values
# Escape curly braces in JSON so ChatPromptTemplate treats them as literals
escaped_tree_stats = tree_stats_content.replace("{", "{{").replace("}", "}}")
system_prompt = system_prompt_template.format(
ml_library=ml_library,
tree_stats_content=escaped_tree_stats
)
# Create the prompt template with system message
# Since system_prompt is already a fully formatted string, we create it as a literal
prompt = ChatPromptTemplate.from_messages([
SystemMessage(content=system_prompt),
("placeholder", "{messages}")
])
# Create the LLM
# Note: streaming is controlled at the chat() method level, not here
# We keep streaming=False here so invoke() works normally
chat = ChatOpenAI(temperature=0.0, model=model)
# Create the chain with message history
# Use a lambda to pass max_history_messages to _get_session_history
def get_session_history_with_limit(session_id: str) -> InMemoryChatMessageHistory:
return _get_session_history(session_id, max_messages=max_history_messages)
chain = prompt | chat
conversation = RunnableWithMessageHistory(
chain,
get_session_history_with_limit,
input_messages_key="messages"
)
# Initialize the conversation history with context
config = {"configurable": {"session_id": session_id}}
return conversation, config
# TOOD apply function calling ?
def build_node_stats_prompt(shadow_tree: ShadowDecTree, node_id: int) -> str:
"""Build a prompt for explaining node statistics for a specific node."""
node_samples = shadow_tree.get_node_samples()
df = pd.DataFrame(shadow_tree.X_train, columns=shadow_tree.feature_names).convert_dtypes()
training_stats = df.describe(include='all').to_json()
node_stats = df.iloc[node_samples[node_id]].describe(include='all').to_json()
if shadow_tree.is_classifier():
target_values = shadow_tree.y_train[node_samples[node_id]]
unique_values, value_counts = np.unique(target_values, return_counts=True)
target_frequencies = dict(zip(unique_values, value_counts))
target_string = ""
for key, value in target_frequencies.items():
target_string += (
f"Target class {shadow_tree.class_names[key] if shadow_tree.class_names is not None else key} "
f"has {value} samples. "
)
else:
target_values = shadow_tree.y_train[node_samples[node_id]]
target_string = (
f"The regression target statistics for this node are: "
f"mean={float(np.mean(target_values)):.4f}, "
f"std={float(np.std(target_values)):.4f}, "
f"min={float(np.min(target_values)):.4f}, "
f"max={float(np.max(target_values)):.4f}."
)
messages = node_stats_template.format_messages(
node_stats_json_format=node_stats,
target_stats=target_string,
training_stats_json_format=training_stats
)
return messages[0].content
================================================
FILE: dtreeviz/classifiers.py
================================================
from typing import Tuple
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import ImageColor
from colour import Color
from matplotlib import patches as patches
from matplotlib.collections import PatchCollection
from dtreeviz import utils
from dtreeviz.colors import adjust_colors
from dtreeviz.utils import add_classifier_legend, _format_axes
def decision_boundaries(model, X: np.ndarray, y: np.ndarray,
ntiles=50, tile_fraction=.9,
binary_threshold=0.5,
show=['instances', 'boundaries', 'probabilities', 'misclassified', 'legend'],
feature_names=None, target_name=None, class_names=None,
markers=None,
boundary_marker='o', boundary_markersize=.8,
fontsize=9, fontname="Arial",
dot_w=25,
yshift=.08,
sigma=.013,
colors: dict = None,
ranges: Tuple = None,
figsize: Tuple = None,
ax=None) -> None:
"""
Two-variable case:
Draw a tiled grid over a 2D classifier feature space where each tile is colored by
the coordinate probabilities or coordinate predicted class. The X,y instances
are drawn on top of the tiling. The decision boundaries are indicated
by dots in between the classes. You can specify a threshold for the binary
classification case. Misclassified instances are highlighted.
One-variable case:
Draw a strip plot over a 1D feature space, one strip per class. A narrow rectangle
along the bottom indicates a color combined probabilities from all classes. The
color associated with the most likely class will dominate the probabilities rectangle.
Misclassified instances are highlighted. Decision boundaries, where the predicted
class shifts from one to another, are indicated by vertical dashed lines.
TODO: assumes classes are contiguous and 0..k-1
:param model: an sklearn or Keras classifier model or any other model that can answer
method predict_proba(X)
:param X: A 1- or 2-column dataframe or numpy array with the one or two features to plot
:param y: The target column with integers indicating the true instance classes;
currently these must be contiguous 0..k-1 for k classes.
:param ntiles: How many tiles to draw across the x1, x2 feature space
:param tile_fraction: A value between 0..1 indicating how much of a tile
should be colored; e.g., .9 indicates the tile should leave
10% whitespace around the colored portion.
:param boundary_marker: The marker symbol from matplotlib to use for the boundary;
default is a circle 'o'.
:param boundary_markersize: The boundary marker size; default is .8
:param feature_names: A list of strings indicating the one or two X variable names.
If None, no axes labels are showing
:param target_name: If showing legend, this is the title of the legend box.
:param class_names: If showing legend, these are the class names in the legend box
:param show: Which elements to show, includes elements from
['instances','boundaries','probabilities','misclassified','legend']
:param markers: By default, just small circles are shown for each X instance, but
if not None, this is a list of matplotlib marker strings like ['X','s'].
:param fontsize: Font size for tick labels and axis labels
:param fontname: The font name for tick labels and axis labels
:param colors: A dictionary with adjustments to the colors
:param dot_w: How wide should the circles be when drawing the instances
:param yshift: For univariate case. If you'd like to play around with the strip plot,
this variable shifts the class clusters; a shifted zero puts them on
top of each other.
:param sigma: For univariate case. The standard deviation of the noise added to make
the strip plot.
:param ranges: Tuple for ranges of plot. One range per input dimension also specified as tuple,
e.g. ((10, 100), (500, 600)).
Ranges of plot are determined by min, max of X vector if not specified.
:param figsize: optional (width, height) in inches for the entire plot
:param ax: An optional matplotlib "axes" upon which this method should draw. If you
send in your own figure, it should be wide but not tall like shape 4,1
"""
if isinstance(X, pd.DataFrame):
X = X.values
if isinstance(y, pd.Series):
y = y.values
if class_names is not None and np.max(y) >= len(class_names):
raise ValueError(f"Target label values (for now) must be 0..{len(class_names)-1} for n={len(class_names)} labels")
if model.__class__.__module__.startswith('tensorflow.python.keras') or \
model.__class__.__module__.startswith('keras'):
if not (hasattr(model, 'predict') and callable(getattr(model, 'predict'))):
raise ValueError("Keras model argument must implement method `predict()`")
elif not(hasattr(model, 'predict_proba') and callable(getattr(model, 'predict_proba'))):
raise ValueError("model argument must implement method `predict_proba()`")
if len(X.shape) == 1 or (len(X.shape)==2 and X.shape[1] == 1):
decision_boundaries_univar(model=model, x=X, y=y,
ntiles=ntiles,
binary_threshold=binary_threshold,
show=show,
feature_name=feature_names[0] if feature_names is not None else None,
target_name=target_name,
class_names=class_names,
markers=markers,
fontsize=fontsize, fontname=fontname,
dot_w=dot_w,
sigma=sigma,
yshift=yshift,
colors=colors,
figsize=figsize,
ax=ax)
elif len(X.shape) == 2 and X.shape[1] == 2:
decision_boundaries_bivar(model=model, X=X, y=y,
ntiles=ntiles, tile_fraction=tile_fraction,
binary_threshold=binary_threshold,
show=show,
feature_names=feature_names, target_name=target_name,
class_names=class_names,
markers=markers,
boundary_marker=boundary_marker,
boundary_markersize=boundary_markersize,
fontsize=fontsize, fontname=fontname,
dot_w=dot_w, colors=colors,
ranges=ranges,
figsize=figsize,
ax=ax)
else:
raise ValueError(f"Expecting 2D data not {X.shape}")
def decision_boundaries_bivar(model, X:np.ndarray, y:np.ndarray,
ntiles=50, tile_fraction=.9,
binary_threshold=0.5,
show=['instances','boundaries','probabilities','misclassified','legend'],
feature_names=None, target_name=None, class_names=None,
markers=None,
boundary_marker='o', boundary_markersize=.8,
fontsize=9, fontname="Arial",
dot_w=25, colors:dict=None,
ranges=None,
figsize=None,
ax=None) -> None:
"""
See comment and parameter descriptions for decision_boundaries() above.
"""
if isinstance(X, pd.DataFrame):
X = X.values
if isinstance(y, pd.Series):
y = y.values
if len(X.shape)==1 or (len(X.shape)==2 and X.shape[1]!=2) or len(X.shape)>2:
raise ValueError(f"Expecting 2D data not {X.shape}")
if ax is None:
if figsize:
fig, ax = plt.subplots(figsize=figsize)
else:
fig, ax = plt.subplots()
# Created grid over the range of x1 and x2 variables, get probabilities, predictions
grid_points, grid_proba, grid_pred_as_matrix, w, x_, class_X, class_values = \
_compute_tiling(model, X, y, binary_threshold, ntiles, tile_fraction, ranges=ranges)
x_proba = _predict_proba(model, X)
if len(np.unique(y)) == 2: # is k=2 binary?
X_pred = np.where(x_proba[:, 1] >= binary_threshold, 1, 0)
else:
X_pred = np.argmax(x_proba, axis=1) # TODO: assumes classes are 0..k-1
class_X_pred = [X_pred[y == cl] for cl in class_values]
if markers is None:
markers = ['o']*len(class_X)
colors = adjust_colors(colors)
class_values = np.unique(y) # returns sorted
# Get class to color map for probabilities and predictions
color_map, grid_pred_colors, grid_proba_colors = \
_get_grid_colors(grid_proba, grid_pred_as_matrix, class_values, colors)
# Draw probabilities or class prediction grid
facecolors = grid_proba_colors if 'probabilities' in show else grid_pred_colors
_draw_tiles(ax, grid_points, facecolors, colors['tile_alpha'], x_, w)
# Get grid with class predictions with coordinates (x,y)
# e.g., y_pred[0,0] is lower left pixel and y_pred[5,5] is top-right pixel
# for npoints=5
grid_pred_as_matrix = grid_pred_as_matrix.reshape(ntiles, ntiles)
if 'boundaries' in show:
_draw_boundary_edges(ax, grid_points, grid_pred_as_matrix,
boundary_marker, boundary_markersize,
colors, w, x_)
# Draw the X instances circles
if 'instances' in show:
for i, x_ in enumerate(class_X):
if 'misclassified' in show:
# Show correctly classified markers
good_x = x_[class_X_pred[i] == class_values[i],:]
ax.scatter(good_x[:, 0], good_x[:, 1],
s=dot_w, c=color_map[i],
marker=markers[i],
alpha=colors['scatter_marker_alpha'],
edgecolors=colors['scatter_edge'],
lw=.5)
# Show misclassified markers (can't have alpha per marker so do in 2 calls)
bad_x = x_[class_X_pred[i] != class_values[i],:]
ax.scatter(bad_x[:, 0], bad_x[:, 1],
s=dot_w, c=color_map[i],
marker=markers[i],
alpha=1.0,
edgecolors=colors['warning'],
lw=.5)
else:
ax.scatter(x_[:, 0], x_[:, 1],
s=dot_w, c=color_map[i],
marker=markers[i],
alpha=colors['scatter_marker_alpha'],
edgecolors=colors['scatter_edge'],
lw=.5)
_format_axes(ax,
feature_names[0] if feature_names is not None else None,
feature_names[1] if feature_names is not None else None,
colors, fontsize, fontname)
if 'legend' in show:
class_names = utils._normalize_class_names(class_names, nclasses=len(class_values))
add_classifier_legend(ax, class_names, class_values, color_map, target_name, colors,
fontsize=fontsize, fontname=fontname)
def _compute_tiling(model, X:np.ndarray, y:np.ndarray, binary_threshold,
ntiles, tile_fraction, ranges):
"""
Create grid over the range of x1 and x2 variables; use the model to
compute the probabilities with model.predict_proba(), which will work with sklearn
and, I think, XGBoost. Later we will have to figure out how to get probabilities
out of the other models we support.
The predictions are computed simply by picking the argmax of probabilities, which
assumes classes are 0..k-1. TODO: update to allow disjoint integer class values
For k=2 binary classifications, there is no way to set the threshold and so
a threshold of 0.5 is implicitly chosen by argmax.
This returns all of the details needed to plot the tiles. The coordinates of
the grid are a linear space from min to max of each variable, inclusively.
So if the range is 1..5 and we want 5 tiles, then the width of each tile is 1.
We get a tile at each position. When we are drawing, the position is taken as
the center of the tile. In this case, the grid points would be centered over
1,2,3,4, and 5.
"""
if isinstance(X, pd.DataFrame):
X = X.values
if isinstance(y, pd.Series):
y = y.values
X1 = X[:, 0]
X2 = X[:, 1]
if ranges is not None:
x1range, x2range = ranges
min_x1, max_x1 = x1range
min_x2, max_x2 = x2range
else:
min_x1, max_x1 = min(X1), max(X1)
min_x2, max_x2 = min(X2), max(X2)
x1r = max_x1 - min_x1
x2r = max_x2 - min_x2
border1 = x1r*0.05 # make a 5% border
border2 = x2r*0.05
x1range = (min_x1-border1, max_x1+border1)
x2range = (min_x2-border2, max_x2+border2)
w = (x1r+2*border1) / (ntiles-1)
h = (x2r+2*border2) / (ntiles-1)
w *= tile_fraction
h *= tile_fraction
grid_points = [] # a list of coordinate pairs for the grid
# Iterate through v1 (x-axis) most quickly then v2 (y-axis)
for iv2, v2 in enumerate(np.linspace(*x2range, num=ntiles, endpoint=True)):
for iv1, v1 in enumerate(np.linspace(*x1range, num=ntiles, endpoint=True)):
grid_points.append([v1, v2])
grid_points = np.array(grid_points)
class_values = np.unique(y)
class_X = [X[y == cl] for cl in class_values]
grid_proba = _predict_proba(model, grid_points)
if len(np.unique(y))==2: # is k=2 binary?
grid_pred = np.where(grid_proba[:,1]>=binary_threshold,1,0)
else:
grid_pred = np.argmax(grid_proba, axis=1) # TODO: assumes classes are 0..k-1
return grid_points, grid_proba, grid_pred, w, h, class_X, class_values
def _get_grid_colors(grid_proba, grid_pred, class_values, colors):
"""
For the grid locations, return a list of colors, one per location
indicating the class color. To compute the probability color,
we want to simulate overlaying regions from multiple trees onto
the two-dimensional feature space using alpha to shade the colors.
Instead, compute the color for each tile by combining the class colors
according to their probabilities. If class 1 has probability .3 and class 2
has probability .7, multiply the color ((R,G,B) color vector) associated
with class 1 by .3 and the color vector associated with class 2 by .7 then
add together. This gives a weighted color vector for each tile associated with
the class probabilities. This gives the exact same effect as alpha channels,
but transparent colors screwed up plotting the instance circles on top; they
got washed out. This gives us more control and we can use alpha=1.
"""
nclasses = len(class_values)
class_colors = np.array(colors['classes'][nclasses])
grid_pred_colors = class_colors[grid_pred] # color for each prediction in grid
color_map = {v: class_colors[i] for i, v in enumerate(class_values)}
# multiply each probability vector times rgb color for each class then add
# together to get weighted color
rgb = np.array([ImageColor.getcolor(c, mode="RGB") for c in class_colors])
grid_proba_colors = grid_proba @ rgb
grid_proba_colors /= 255 # get in [0..1]
grid_proba_colors = [Color(rgb=c).hex for c in grid_proba_colors]
return color_map, grid_pred_colors, grid_proba_colors
def _draw_tiles(ax, grid_points, facecolors, tile_alpha, h, w):
boxes = []
for i, (v1, v2) in enumerate(grid_points):
# center a box over (v1,v2) grid location
rect = patches.Rectangle((v1 - w / 2, v2 - h / 2), w, h, angle=0.0, linewidth=0,
facecolor=facecolors[i], alpha=tile_alpha)
boxes.append(rect)
# Adding collection is MUCH faster than repeated add_patch()
ax.add_collection(PatchCollection(boxes, match_original=True))
def _draw_boundary_edges(ax, grid_points, grid_pred_as_matrix, boundary_marker, boundary_markersize,
colors, w, h):
ntiles = grid_pred_as_matrix.shape[0]
# find transitions from one class to the other moving horizontally
dx = np.diff(grid_pred_as_matrix, axis=1)
dx = np.abs(dx)
# put a zero col vector on the left to restore size
dx = np.hstack([np.zeros((ntiles, 1)), dx])
# find transitions moving vertically, bottom to top (grid matrix is flipped vertically btw)
dy = np.diff(grid_pred_as_matrix, axis=0)
dy = np.abs(dy)
# put a zero row vector on the top to restore size
dy = np.vstack([np.zeros((1, ntiles)), dy])
dx_edge_idx = np.where(dx.reshape(-1)) # what are the indexes of dx class transitions?
dy_edge_idx = np.where(dy.reshape(-1)) # what are the indexes of dy class transitions?
dx_edges = grid_points[dx_edge_idx] # get v1,v2 coordinates of left-to-right transitions
dy_edges = grid_points[dy_edge_idx] # get v1,v2 coordinates of bottom-to-top transitions
# Plot the boundary markers in between tiles; e.g., shift dx stuff to the left half a tile
ax.plot(dx_edges[:, 0] - w / 2, dx_edges[:, 1], boundary_marker,
markersize=boundary_markersize, c=colors['class_boundary'], alpha=1.0)
ax.plot(dy_edges[:, 0], dy_edges[:, 1] - h / 2, boundary_marker,
markersize=boundary_markersize, c=colors['class_boundary'], alpha=1.0)
def decision_boundaries_univar(model, x: np.ndarray, y: np.ndarray,
ntiles=100,
binary_threshold=0.5,
show=['instances', 'boundaries', 'probabilities', 'misclassified', 'legend'],
feature_name=None, target_name=None, class_names=None,
markers=None,
fontsize=9, fontname="Arial",
dot_w=25,
yshift=.09,
sigma=.09,
colors: dict = None,
figsize: Tuple = None,
ax=None) -> None:
"""
See comment and parameter descriptions for decision_boundaries() above.
"""
if ax is None:
if figsize:
fig, ax = plt.subplots(figsize=figsize)
else:
fig, ax = plt.subplots()
if isinstance(x, pd.Series):
x = x.values
if isinstance(y, pd.Series):
y = y.values
if (len(x.shape)==2 and x.shape[1]!=1) or len(x.shape)>2:
raise ValueError(f"Expecting 1D data not {x.shape}")
colors = adjust_colors(colors)
mu = 0.08
class_values = np.unique(y)
nclasses = len(class_values)
class_colors = np.array(colors['classes'][nclasses])
color_map = {v: class_colors[i] for i, v in enumerate(class_values)}
x1r = np.max(x) - np.min(x)
x1range = (np.min(x), np.max(x))
grid_points, w = np.linspace(*x1range, num=ntiles, endpoint=True, retstep=True)
grid_proba = _predict_proba(model, grid_points)
if len(np.unique(y)) == 2: # is k=2 binary?
grid_pred = np.where(grid_proba[:, 1] >= binary_threshold, 1, 0)
else:
grid_pred = np.argmax(grid_proba, axis=1) # TODO: assumes classes are 0..k-1
ymax = ax.get_ylim()[1]
# compute the stripes on the bottom showing probabilities
if 'probabilities' in show:
class_values = np.unique(y)
color_map, grid_pred_colors, grid_proba_colors = \
_get_grid_colors(grid_proba, grid_pred, class_values, colors=adjust_colors(None))
pred_box_height = .08 * ymax
boxes = []
for i, gx in enumerate(grid_points):
rect = patches.Rectangle((gx, 0), w, pred_box_height,
edgecolor='none', facecolor=grid_proba_colors[i],
alpha=colors['tile_alpha'])
boxes.append(rect)
# drop box around the gradation
ax.add_collection(PatchCollection(boxes, match_original=True))
rect = patches.Rectangle((grid_points[0], 0), x1r + w, pred_box_height, linewidth=.3,
edgecolor=colors['rect_edge'], facecolor='none')
ax.add_patch(rect)
if 'boundaries' in show:
dx = np.abs(np.diff(grid_pred))
dx = np.hstack([0, dx])
dx_edge_idx = np.where(dx) # indexes of dx class transitions?
for lx in grid_points[dx_edge_idx]:
ax.plot([lx, lx], [*ax.get_ylim()], '--', lw=.3,
c=colors['split_line'], alpha=1.0)
if 'instances' in show:
# user should pass in short and wide fig
x_proba = _predict_proba(model, x)
if len(np.unique(y)) == 2: # is k=2 binary?
x_pred = np.where(x_proba[:, 1] >= binary_threshold, 1, 0)
else:
x_pred = np.argmax(x_proba, axis=1) # TODO: assumes classes are 0..k-1
class_x = [x[y == cl] for cl in class_values]
class_x_pred = [x_pred[y == cl] for cl in class_values]
if markers is None:
markers = ['o'] * len(class_x)
for i, x_, in enumerate(class_x):
if 'misclassified' in show:
# Show correctly classified markers
good_x = x_[class_x_pred[i] == class_values[i]]
noise = np.random.normal(mu, sigma, size=len(good_x))
ax.scatter(good_x, [mu + i * yshift] * len(good_x) + noise,
s=dot_w, c=color_map[i],
marker=markers[i],
alpha=colors['scatter_marker_alpha'],
edgecolors=colors['scatter_edge'],
lw=.5)
# Show misclassified markers (can't have alpha per marker so do in 2 calls)
bad_x = x_[class_x_pred[i] != class_values[i]]
noise = np.random.normal(mu, sigma, size=len(bad_x))
ax.scatter(bad_x, [mu + i * yshift] * len(bad_x) + noise,
s=dot_w, c=color_map[i],
marker=markers[i],
alpha=1.0,
edgecolors=colors['warning'],
lw=.5)
else:
noise = np.random.normal(mu, sigma, size=len(x_))
ax.scatter(x_, [mu + i * yshift] * len(x_) + noise,
s=dot_w, c=color_map[i],
marker=markers[i],
alpha=colors['scatter_marker_alpha'],
edgecolors=colors['scatter_edge'],
lw=.5)
_format_axes(ax, feature_name if feature_name is not None else None, None, colors, fontsize, fontname)
ax.spines['left'].set_visible(False)
ax.set_yticks([])
ax.set_ylim(0, mu + nclasses * yshift + 6*sigma)
if 'legend' in show:
class_names = utils._normalize_class_names(class_names, nclasses)
add_classifier_legend(ax, class_names, class_values, color_map, target_name, colors,
fontsize=fontsize, fontname=fontname)
def _predict_proba(model, X):
"""
This is where we figure out how to get a matrix of k probabilities for a k-class
classification problem. It works with any model that answers predict_proba()
but we can add special cases such as Keras, that has deprecated that method.
"""
if len(X.shape)==1:
X = X.reshape(-1,1)
# Keras wants predict not predict_proba and still gives probabilities
if model.__class__.__module__.startswith('tensorflow.python.keras') or \
model.__class__.__module__.startswith('keras'):
proba = model.predict(X)
if proba.shape[1]==1:
proba = np.hstack([1-proba,proba]) # get prob y=0, y=1 nx2 matrix like sklearn
return proba
# sklearn etc...
return model.predict_proba(X)
================================================
FILE: dtreeviz/colors.py
================================================
import matplotlib
import numpy as np
YELLOW = '#fefecd'
GREEN = '#cfe2d4'
DARKBLUE = '#313695'
BLUE = '#4575b4'
DARKGREEN = '#006400'
LIGHTORANGE = '#fee090'
LIGHTBLUE = '#a6bddb'
GREY = '#444443'
WEDGE_COLOR = GREY
CATEGORICAL_SPLIT_LEFT = '#FFC300'
CATEGORICAL_SPLIT_RIGHT = BLUE
HIGHLIGHT_COLOR = '#D67C03'
color_blind_friendly_colors = [
None, # 0 classes
None, # 1 class
['#FEFEBB', '#a1dab4'], # 2 classes
['#FEFEBB', '#D9E6F5', '#a1dab4'], # 3 classes
['#FEFEBB', '#D9E6F5', '#a1dab4', LIGHTORANGE], # 4
['#FEFEBB', '#D9E6F5', '#a1dab4', '#41b6c4', LIGHTORANGE], # 5
['#FEFEBB', '#c7e9b4', '#41b6c4', '#2c7fb8', LIGHTORANGE, '#f46d43'], # 6
['#FEFEBB', '#c7e9b4', '#7fcdbb', '#41b6c4', '#225ea8', '#fdae61', '#f46d43'], # 7
['#FEFEBB', '#edf8b1', '#c7e9b4', '#7fcdbb', '#1d91c0', '#225ea8', '#fdae61', '#f46d43'], # 8
['#FEFEBB', '#c7e9b4', '#41b6c4', '#74add1', BLUE, DARKBLUE, LIGHTORANGE, '#fdae61', '#f46d43'], # 9
['#FEFEBB', '#c7e9b4', '#41b6c4', '#74add1', BLUE, DARKBLUE, LIGHTORANGE, '#fdae61', '#f46d43', '#d73027'] # 10
]
mpl_colors = [
None, # 0 classes
None, # 1 class
] + [
[f'C{i}' for i in range(0, n_classes)] for n_classes in range(2, 11)
]
def get_hex_colors(n_classes, cmap_name="RdYlBu"):
"""
Will generate a list of lists that contain n discrete hex colors
from a given matplotlib colormap based on the number of classes in
a given classifier model as determined in trees.py. Defaults to the
"RdYlBu" colormap.
For backward compatibility with the color_blind_friendly_colors, the first 10 lists will be populated with values
from color_blind_friendly_colors list.
Args:
n_classes (int): the number of classes in a classifier model as determined
by trees.py
cmap_name (str, optional): any valid matplotlib colormap. Defaults to "RdYlBu".
Returns:
list: a list of lists where each inner list item contains n discrete hex colors.
"""
hex_colors = color_blind_friendly_colors.copy()
if n_classes:
for i in range(len(color_blind_friendly_colors), n_classes + 1):
cmap = matplotlib.cm.get_cmap(cmap_name, i)
hex_colors.append(
[
matplotlib.colors.to_hex(rgb, keep_alpha=True)
for rgb in cmap(np.arange(0, cmap.N))
]
)
return hex_colors
COLORS = {'scatter_edge': GREY,
'scatter_marker': BLUE,
'scatter_marker_alpha': 0.7,
'class_boundary': GREY,
'warning': '#E9130D',
'tile_alpha': 0.8, # square tiling in decision_boundaries to show probabilities
'tessellation_alpha': 0.3, # rectangular regions for decision tree feature space partitioning
'tessellation_alpha_3D': 0.5,
'split_line': GREY,
'mean_line': '#f46d43',
'axis_label': GREY,
'axis': GREY,
'title': GREY,
'legend_title': GREY,
'legend_edge': GREY,
'edge': GREY,
'color_map_min': '#c7e9b4',
'color_map_max': '#081d58',
'classes': color_blind_friendly_colors,
'rect_edge': GREY,
'text': GREY,
'highlight': HIGHLIGHT_COLOR,
'wedge': WEDGE_COLOR,
'text_wedge': WEDGE_COLOR,
'arrow': GREY,
'larrow': GREY,
'rarrow': GREY,
'node_label': GREY,
'tick_label': GREY,
'leaf_label': GREY,
'pie': GREY,
'hist_bar': LIGHTBLUE,
'categorical_split_left': CATEGORICAL_SPLIT_LEFT,
'categorical_split_right': CATEGORICAL_SPLIT_RIGHT
}
def adjust_colors(colors, n_classes=None, cmp="RdYlBu"):
if colors is None:
if n_classes and n_classes > len(color_blind_friendly_colors) - 1:
# in case the number of classes is bigger than the color_blind_friendly_colors can handle, we will add more
# color class values
COLORS["classes"] = get_hex_colors(n_classes, cmp)
return COLORS
return dict(COLORS, **colors)
================================================
FILE: dtreeviz/compatibility.py
================================================
# Functions to support backward compatibility to pre-2.0 API
import warnings
from numbers import Number
from typing import Mapping, List
import numpy as np
import pandas as pd
from sklearn import tree
from dtreeviz.models.shadow_decision_tree import ShadowDecTree
from dtreeviz.utils import myround, DTreeVizRender
from dtreeviz.trees import DTreeVizAPI
def _warning_on_one_line(message, category, filename, lineno, file=None, line=None):
return '%s:%s: %s: %s\n' % (filename, lineno, category.__name__, message)
warnings.formatwarning = _warning_on_one_line
def rtreeviz_univar(tree_model,
X_train: (pd.DataFrame, np.ndarray) = None, # dataframe with only one column
y_train: (pd.Series, np.ndarray) = None,
feature_names: List[str] = None,
target_name: str = None,
tree_index: int = None, # required in case of tree ensemble
ax=None,
fontsize: int = 10,
show={'title', 'splits'},
split_linewidth=.5,
mean_linewidth=2,
markersize=15,
colors=None):
warnings.warn("rtreeviz_univar() function is deprecated starting from version 2.0. \n "
"For the same functionality, please use this code instead: \n m = dtreeviz.model(...) \n m.rtree_feature_space(...)",
DeprecationWarning, stacklevel=2)
if isinstance(feature_names, str):
feature_names = [feature_names]
shadow_tree = ShadowDecTree.get_shadow_tree(tree_model, X_train, y_train, feature_names, target_name, None, tree_index)
model = DTreeVizAPI(shadow_tree)
model.rtree_feature_space(ax=ax, fontsize=fontsize, show=show, split_linewidth=split_linewidth,
mean_linewidth=mean_linewidth, markersize=markersize, colors=colors)
def rtreeviz_bivar_heatmap(tree_model,
X_train: (pd.DataFrame, np.ndarray) = None, # dataframe with only one column
y_train: (pd.Series, np.ndarray) = None,
feature_names: List[str] = None,
target_name: str = None,
tree_index: int = None, # required in case of tree ensemble
ax=None,
fontsize=10, ticks_fontsize=12, fontname="Arial",
show={'title'},
n_colors_in_map=100,
colors=None,
markersize=15
) -> tree.DecisionTreeClassifier:
"""
Show tesselated 2D feature space for bivariate regression tree. X_train can
have lots of features but features lists indexes of 2 features to train tree with.
"""
warnings.warn("rtreeviz_bivar_heatmap() function is deprecated starting from version 2.0. \n "
"For the same functionality, please use this code instead: \n m = dtreeviz.model(...) \n m.rtree_feature_space(...)",
DeprecationWarning, stacklevel=2)
shadow_tree = ShadowDecTree.get_shadow_tree(tree_model, X_train, y_train, feature_names, target_name, None,
tree_index)
model = DTreeVizAPI(shadow_tree)
model.rtree_feature_space(ax=ax, fontsize=fontsize, ticks_fontsize=ticks_fontsize, fontname=fontname, show=show,
n_colors_in_map=n_colors_in_map, colors=colors, markersize=markersize)
def rtreeviz_bivar_3D(tree_model,
X_train: (pd.DataFrame, np.ndarray) = None, # dataframe with only one column
y_train: (pd.Series, np.ndarray) = None,
feature_names: List[str] = None,
target_name: str = None,
class_names: (Mapping[Number, str], List[str]) = None, # required if classifier,
tree_index: int = None, # required in case of tree ensemble
ax=None,
fontsize=10, ticks_fontsize=10, fontname="Arial",
azim=0, elev=0, dist=7,
show={'title'},
colors=None,
markersize=15,
n_colors_in_map=100
) -> tree.DecisionTreeClassifier:
"""
Show 3D feature space for bivariate regression tree. X_train should have
just the 2 variables used for training.
"""
warnings.warn("rtreeviz_bivar_3D() function is deprecated starting from version 2.0. \n "
"For the same functionality, please use this code instead: \n m = dtreeviz.model(...) \n m.rtree_feature_space3D(...)",
DeprecationWarning, stacklevel=2)
shadow_tree = ShadowDecTree.get_shadow_tree(tree_model, X_train, y_train, feature_names, target_name, class_names,
tree_index)
model = DTreeVizAPI(shadow_tree)
model.rtree_feature_space3D(ax, fontsize, ticks_fontsize, fontname,
azim, elev, dist, show, colors, markersize, n_colors_in_map)
def ctreeviz_univar(tree_model,
X_train: (pd.DataFrame, np.ndarray) = None, # dataframe with only one column
y_train: (pd.Series, np.ndarray) = None,
feature_names: List[str] = None,
target_name: str = None,
class_names: (Mapping[Number, str], List[str]) = None, # required if classifier,
tree_index: int = None, # required in case of tree ensemble
fontsize=10, fontname="Arial", nbins=25, gtype='strip',
show={'title', 'legend', 'splits'},
colors=None,
ax=None):
warnings.warn("ctreeviz_univar() function is deprecated starting from version 2.0. \n "
"For the same functionality, please use this code instead: \n m = dtreeviz.model(...) \n m.ctree_feature_space(...)",
DeprecationWarning, stacklevel=2)
if isinstance(feature_names, str):
feature_names = [feature_names]
shadow_tree = ShadowDecTree.get_shadow_tree(tree_model, X_train, y_train, feature_names, target_name, class_names,
tree_index)
model = DTreeVizAPI(shadow_tree)
model.ctree_feature_space(fontsize, fontname, nbins, gtype, show, colors, ax)
def ctreeviz_bivar(tree_model,
X_train: (pd.DataFrame, np.ndarray) = None, # dataframe with only one column
y_train: (pd.Series, np.ndarray) = None,
feature_names: List[str] = None,
target_name: str = None,
class_names: (Mapping[Number, str], List[str]) = None, # required if classifier,
tree_index: int = None, # required in case of tree ensemble
fontsize=10,
fontname="Arial",
show={'title', 'legend', 'splits'},
colors=None,
ax=None):
"""
Show tesselated 2D feature space for bivariate classification tree. X_train can
have lots of features but features lists indexes of 2 features to train tree with.
"""
warnings.warn("ctreeviz_bivar() function is deprecated starting from version 2.0. \n "
"For the same functionality, please use this code instead: \n m = dtreeviz.model(...) \n m.ctree_feature_space(...)",
DeprecationWarning, stacklevel=2)
if isinstance(feature_names, str):
feature_names = [feature_names]
shadow_tree = ShadowDecTree.get_shadow_tree(tree_model, X_train, y_train, feature_names, target_name, class_names,
tree_index)
model = DTreeVizAPI(shadow_tree)
model.ctree_feature_space(fontsize=fontsize,
fontname=fontname,
show=show,
colors=colors,
ax=ax)
def dtreeviz(tree_model,
X_train: (pd.DataFrame, np.ndarray) = None,
y_train: (pd.DataFrame, np.ndarray) = None,
feature_names: List[str] = None,
target_name: str = None,
class_names: (Mapping[Number, str], List[str]) = None, # required if classifier,
tree_index: int = None, # required in case of tree ensemble,
precision: int = 2,
orientation: ('TD', 'LR') = "TD",
instance_orientation: ("TD", "LR") = "LR",
show_root_edge_labels: bool = True,
show_node_labels: bool = False,
show_just_path: bool = False,
fancy: bool = True,
histtype: ('bar', 'barstacked', 'strip') = 'barstacked',
highlight_path: List[int] = [],
X: np.ndarray = None,
max_X_features_LR: int = 10,
max_X_features_TD: int = 20,
depth_range_to_display: tuple = None,
label_fontsize: int = 12,
ticks_fontsize: int = 8,
fontname: str = "Arial",
title: str = None,
title_fontsize: int = 10,
colors: dict = None,
scale=1.0
) \
-> DTreeVizRender:
"""
Given a decision tree regressor or classifier, create and return a tree visualization
using the graphviz (DOT) language.
We can call this function in two ways :
1. by using shadow tree
ex. dtreeviz(shadow_dtree)
- we need to initialize shadow_tree before this call
- ex. shadow_dtree = ShadowSKDTree(tree_model, dataset[features], dataset[target], features, target, [0, 1]))
- the main advantage is that we can use the shadow_tree for other visualisations methods as well
2. by using sklearn, xgboost tree
ex. dtreeviz(tree_model, dataset[features], dataset[target], features, target, class_names=[0, 1])
- maintain backward compatibility
:param tree_model: A DecisionTreeRegressor or DecisionTreeClassifier that has been
fit to X_train, y_train.
:param X_train: A data frame or 2-D matrix of feature vectors used to train the model.
:param y_train: A pandas Series or 1-D vector with target or classes values. These values should be numeric types.
:param feature_names: A list of the feature names.
:param target_name: The name of the target variable.
:param class_names: [For classifiers] A dictionary or list of strings mapping class
value to class name.
:param precision: When displaying floating-point numbers, how many digits to display
after the decimal point. Default is 2.
:param orientation: Is the tree top down, "TD", or left to right, "LR"?
:param instance_orientation: table orientation (TD, LR) for showing feature prediction's values.
:param show_root_edge_labels: Include < and >= on the edges emanating from the root?
:param show_node_labels: Add "Node id" to top of each node in graph for educational purposes
:param show_just_path: If True, it shows only the sample(X) prediction path
:param fancy:
:param histtype: [For classifiers] Either 'bar' or 'barstacked' to indicate
histogram type. We find that 'barstacked' looks great up to about.
four classes.
:param highlight_path: A list of node IDs to highlight, default is [].
Useful for emphasizing node(s) in tree for discussion.
If X argument given then this is ignored.
:type highlight_path: List[int]
:param X: Instance to run down the tree; derived path to highlight from this vector.
Show feature vector with labels underneath leaf reached. highlight_path
is ignored if X is not None.
:type X: np.ndarray
:param label_fontsize: Size of the label font
:param ticks_fontsize: Size of the tick font
:param fontname: Font which is used for labels and text
:param max_X_features_LR: If len(X) exceeds this limit for LR layout,
display only those features
used to guide X vector down tree. Helps when len(X) is large.
Default is 10.
:param max_X_features_TD: If len(X) exceeds this limit for TD layout,
display only those features
used to guide X vector down tree. Helps when len(X) is large.
Default is 25.
:param depth_range_to_display: range of depth levels to be displayed. The range values are inclusive
:param title: An optional title placed at the top of the tree.
:param title_fontsize: Size of the text for the title.
:param scale: Default is 1.0. Scale the width, height of the overall SVG preserving aspect ratio
:return: A string in graphviz DOT language that describes the decision tree.
"""
warnings.warn("dtreeviz() function is deprecated starting from version 2.0. \n "
"For the same functionality, please use this code instead: \n m = dtreeviz.model(...) \n m.view()",
DeprecationWarning, stacklevel=2)
shadow_tree = ShadowDecTree.get_shadow_tree(tree_model, X_train, y_train, feature_names, target_name, class_names,
tree_index)
model = DTreeVizAPI(shadow_tree)
return model.view(precision, orientation,
instance_orientation,
show_root_edge_labels, show_node_labels, show_just_path, fancy, histtype, highlight_path, X,
max_X_features_LR, max_X_features_TD, depth_range_to_display, label_fontsize, ticks_fontsize,
fontname, title, title_fontsize, colors=colors, scale=scale)
def viz_leaf_samples(tree_model,
X_train: (pd.DataFrame, np.ndarray) = None,
feature_names: List[str] = None,
tree_index: int = None, # required in case of tree ensemble
display_type: str = "plot",
colors: dict = None,
fontsize: int = 10,
fontname: str = "Arial",
grid: bool = False,
bins: int = 10,
min_samples: int = 0,
max_samples: int = None,
figsize: tuple = None,
ax=None):
"""Visualize the number of data samples from each leaf.
Interpreting leaf samples can help us to see how the data is spread over the tree:
- if we have a leaf with many samples and a good impurity, it means that we can be pretty confident
on its prediction.
- if we have a leaf with few samples and a good impurity, we cannot be very confident on its predicion and
it could be a sign of overfitting.
- by visualizing leaf samples, we can easily discover important leaves . Using describe_node_sample() function we
can take all its samples and discover common patterns between leaf samples.
- if the tree contains a lot of leaves and we want a general overview about leaves samples, we can use the
parameter display_type='hist' to display the histogram of leaf samples.
There is the option to filter the leaves with samples between 'min_samples' and 'max_samples'. This is helpful
especially when you want to investigate leaves with number of samples from a specific range.
We can call this function in two ways :
1. by using shadow tree
ex. viz_leaf_samples(shadow_dtree)
- we need to initialize shadow_tree before this call
- ex. shadow_dtree = ShadowSKDTree(tree_model, dataset[features], features)
- the main advantage is that we can use the shadow_tree for other visualisations methods as well
2. by using sklearn, xgboost tree
ex. viz_leaf_samples(tree_model, dataset[features], dataset[target], features, target)
- maintain backward compatibility
TODO : put a link with notebook examples (at each function docs)
This method contains three types of visualizations:
- If display_type = 'plot' it will show leaf samples using a plot.
- If display_type = 'text' it will show leaf samples as plain text. This method is preferred if number
of leaves is very large and the plot become very big and hard to interpret.
- If display_type = 'hist' it will show leaf sample histogram. Useful when you want to easily see the general
distribution of leaf samples.
Note : If the X_train and y_train are the datasets used to trained the model, then we will investigate the tree model
as it was trained. We can give other X_train and y_train datasets, ex. validation dataset, to see how the new data is
spread over the tree.
:param tree_model: tree.DecisionTreeRegressor, tree.DecisionTreeClassifier, xgboost.core.Booster,
dtreeviz.models.sklearn_decision_trees.ShadowSKDTree,
dtreeviz.models.xgb_decision_trees.ShadowXGBDTree
The tree model or dtreeviz shadow tree model to interpret
:param X_train: pd.DataFrame, np.ndarray
The dataset based on which we want to make this visualisation.
:param feature_names: List[str], optional
The list of feature variable's name
:param tree_index: int, optional
Required in case of tree ensemble. Specify the tree index to interpret.
:param display_type: str, optional
'plot', 'text'. 'hist'
:param colors: dict
The set of colors used for plotting
:param fontsize: int
Plot labels font size
:param fontname: str
Plot labels font name
:param grid: bool
True if we want to display the grid lines on the visualization
:param bins: int
Number of histogram bins
:param min_samples: int
Min number of samples for a leaf
:param max_samples: int
Max number of samples for a leaf
:param figsize: optional (width, height) in inches for the entire plot
:param ax: optional matplotlib "axes" to draw into
"""
warnings.warn("viz_leaf_samples() function is deprecated starting from version 2.0. \n "
"For the same functionality, please use this code instead: \n m = dtreeviz.model(...) \n m.leaf_sizes()",
DeprecationWarning, stacklevel=2)
shadow_tree = ShadowDecTree.get_shadow_tree(tree_model, X_train, None, feature_names, None, None,
tree_index)
model = DTreeVizAPI(shadow_tree)
model.leaf_sizes(display_type, colors, fontsize,
fontname, grid, bins, min_samples, max_samples, figsize, ax)
def viz_leaf_criterion(tree_model,
tree_index: int = None, # required in case of tree ensemble,
display_type: str = "plot",
colors: dict = None,
fontsize: int = 10,
fontname: str = "Arial",
grid: bool = False,
bins: int = 10,
figsize: tuple = None,
ax=None):
"""Visualize leaves criterion.
The most common criterion/impurity for tree regressors is “mse”, “friedman_mse”, “mae” and for tree classifers are
"gini" and "entropy". This information shows the leaf performance/confidence for its predictions, namely how pure or
impure are the samples from each leaf. Each leaf performance, in the end, will determine the general tree performance.
This visualisation can be used together with viz_leaf_samples() for a better leaf interpretation. For example,
a leaf with good confidence, but few samples, can be a sign of overfitting. The best scenario would be to have a
leaf with good confidence and also a lot of samples.
We can call this function in two ways :
1. by using shadow tree
ex. viz_leaf_criterion(shadow_dtree)
- we need to initialize shadow_tree before this call
- ex. shadow_dtree = ShadowSKDTree(tree_model, dataset[features], dataset[target], features, target, [0, 1])
- the main advantage is that we can use the shadow_tree for other visualisations methods as well
2. by using sklearn, xgboost tree
ex. viz_leaf_criterion(tree_model)
- maintain backward compatibility
This method contains three types of visualizations:
- a plot bar visualisations for each leaf criterion, when we want to interpret individual leaves
- a hist visualizations with leaf criterion, when we want to have a general overview for all leaves
- a text visualisations, useful when number of leaves is very large and visual interpretation becomes difficult.
:param tree_model: tree.DecisionTreeRegressor, tree.DecisionTreeClassifier, xgboost.core.Booster,
dtreeviz.models.sklearn_decision_trees.ShadowSKDTree,
dtreeviz.models.xgb_decision_trees.ShadowXGBDTree
The tree model or dtreeviz shadow tree model to interpret
:param tree_index: int, optional
Required in case of tree ensemble. Specify the tree index to interpret.
:param display_type: str, optional
'plot', 'text'. 'hist'
:param colors: dict
The set of colors used for plotting
:param fontsize: int
Plot labels font size
:param fontname: str
Plot labels font name
:param grid: bool
True if we want to display the grid lines on the visualization
:param bins: int
Number of histogram bins
:param figsize: optional (width, height) in inches for the entire plot
:param ax: optional matplotlib "axes" to draw into
:return:
"""
warnings.warn("viz_leaf_criterion() function is deprecated starting from version 2.0. \n "
"For the same functionality, please use this code instead: \n m = dtreeviz.model(...) \n m.leaf_purity()",
DeprecationWarning, stacklevel=2)
shadow_tree = ShadowDecTree.get_shadow_tree(tree_model, None, None, None, None, None,
tree_index)
model = DTreeVizAPI(shadow_tree)
model.leaf_purity(display_type, colors, fontsize, fontname, grid, bins, figsize, ax)
def ctreeviz_leaf_samples(tree_model,
X_train: (pd.DataFrame, np.ndarray) = None,
y_train: (pd.DataFrame, np.ndarray) = None,
feature_names: List[str] = None,
tree_index: int = None, # required in case of tree ensemble,
display_type: str = "plot",
plot_ylim: int = None,
colors: dict = None,
fontsize: int = 10,
fontname: str = "Arial",
grid: bool = False,
figsize: tuple = None,
ax=None):
"""Visualize the number of data samples by class for each leaf.
It's a good way to see how classes are distributed in leaves. For example, you can observe that in some
leaves all the samples belong only to one class, or that in other leaves the distribution of classes is almost
50/50.
You could get all the samples from these leaves and look over/understand what they have in common. Now, you
can understand your data in a model driven way.
Right now it supports only binary classifications decision trees.
We can call this function in two ways :
1. by using shadow tree
ex. ctreeviz_leaf_samples(shadow_dtree)
- we need to initialize shadow_tree before this call
- ex. shadow_dtree = ShadowSKDTree(tree_model, dataset[features], dataset[target], features, target, [0, 1])
- the main advantage is that we can use the shadow_tree for other visualisations methods as well
2. by using sklearn, xgboost tree
ex. ctreeviz_leaf_samples(tree_classifier, dataset[features], dataset[target], features)
- maintain backward compatibility
:param tree_model: tree.DecisionTreeClassifier, xgboost.core.Booster,
dtreeviz.models.sklearn_decision_trees.ShadowSKDTree,
dtreeviz.models.xgb_decision_trees.ShadowXGBDTree
The tree model or dtreeviz shadow tree model to interpret
:param X_train: pd.DataFrame, np.ndarray
The dataset based on which we want to make this visualisation.
:param y_train: pd.Series, np.ndarray
Target variable
:param feature_names: List[str], optional
The list of feature variable's name
:param tree_index: int, optional
Required in case of tree ensemble. Specify the tree index to interpret.
:param display_type: str, optional
'plot' or 'text'
:param plot_ylim: int, optional
The max value for oY. This is useful in case we have few leaves with big sample values which 'shadow'
the other leaves values.
:param colors: dict
The set of colors used for plotting
:param fontsize: int
Plot labels fontsize
:param fontname: str
Plot labels font name
:param grid: bool
True if we want to display the grid lines on the visualization
:param figsize: optional (width, height) in inches for the entire plot
:param ax: optional matplotlib "axes" to draw into
"""
warnings.warn("ctreeviz_leaf_samples() function is deprecated starting from version 2.0. \n "
"For the same functionality, please use this code instead: \n m = dtreeviz.model(...) \n m.ctree_leaf_distributions()",
DeprecationWarning, stacklevel=2)
shadow_tree = ShadowDecTree.get_shadow_tree(tree_model, X_train, y_train, feature_names, None, None,
tree_index)
model = DTreeVizAPI(shadow_tree)
model.ctree_leaf_distributions(display_type, plot_ylim, colors, fontsize, fontname, grid, figsize, ax)
def viz_leaf_target(tree_model,
X_train: (pd.DataFrame, np.ndarray) = None,
y_train: (pd.DataFrame, np.ndarray) = None,
feature_names: List[str] = None,
target_name: str = None,
tree_index: int = None, # required in case of tree ensemble,
show_leaf_labels: bool = True,
colors: dict = None,
markersize: int = 50,
label_fontsize: int = 10,
fontname: str = "Arial",
precision: int = 1,
grid: bool = False,
prediction_line_width: int = 2,
figsize: tuple = None,
ax=None):
"""Visualize leaf target distribution for regression decision trees.
We can call this function in two ways :
1. by using shadow tree
ex. viz_leaf_target(shadow_dtree)
- we need to initialize shadow_tree before this call
- ex. shadow_dtree = ShadowSKDTree(tree_model, dataset[features], dataset[target], features, target)
- the main advantage is that we can use the shadow_tree for other visualisations methods as well
2. by using sklearn, xgboost tree
ex. viz_leaf_target(tree_model, dataset[features], dataset[target], features, target)
- maintain backward compatibility
:param tree_model: tree.DecisionTreeRegressor, xgboost.core.Booster,
dtreeviz.models.sklearn_decision_trees.ShadowSKDTree,
dtreeviz.models.xgb_decision_trees.ShadowXGBDTree
The tree model or dtreeviz shadow tree model to interpret
:param X_train: pd.DataFrame, np.ndarray
The dataset based on which we want to make this visualisation.
:param y_train: pd.Series, np.ndarray
Target variable values
:param feature_names: List[str], optional
The list of feature variable's name
:param target_name: str, optional
The name of target variable
:param tree_index: int, optional
Required in case of tree ensemble. Specify the tree index to interpret.
:param show_leaf_labels: bool
True if the plot should contains the leaf labels on x ax, False otherwise.
:param markersize: int
Marker size in points.
:param precision: int
When displaying floating-point numbers, how many digits to display after the decimal point. Default is 1.
:param grid: bool
True if we want to display the grid lines on the visualization
:param prediction_line_width: int
The width of prediction line.
:param figsize: optional (width, height) in inches for the entire plot
:param ax: optional matplotlib "axes" to draw into
"""
warnings.warn("viz_leaf_target() function is deprecated starting from version 2.0. \n "
"For the same functionality, please use this code instead: \n m = dtreeviz.model(...) \n m.rtree_leaf_distributions()",
DeprecationWarning, stacklevel=2)
shadow_tree = ShadowDecTree.get_shadow_tree(tree_model, X_train, y_train, feature_names, target_name, None,
tree_index)
model = DTreeVizAPI(shadow_tree)
model.rtree_leaf_distributions(show_leaf_labels,
colors, markersize, label_fontsize, fontname, precision, grid,
prediction_line_width, figsize, ax)
def describe_node_sample(tree_model,
node_id: int,
X_train: (pd.DataFrame, np.ndarray) = None,
feature_names: List[str] = None,
tree_index: int = None, # required in case of tree ensemble
):
"""Generate stats (count, mean, std, etc) based on data samples from a specified node.
This method is especially useful to investigate leaf samples from a decision tree. This is a way to discover data
patterns, to better understand our tree model and to get new ideas for feature generation.
We can call this function in two ways :
1. by using shadow tree
ex. describe_node_sample(shadow_dtree, node_id=10)
- we need to initialize shadow_tree before this call
- ex. shadow_dtree = ShadowSKDTree(tree_model, dataset[features], dataset[target], features, target)
- the main advantage is that we can use the shadow_tree for other visualisations methods as well
2. by using sklearn, xgboost tree
ex. describe_node_sample(tree_classifier, node_id=1, X_train=dataset[features], feature_names=features)
- maintain backward compatibility
:param tree_model: tree.DecisionTreeRegressor, tree.DecisionTreeClassifier, xgboost.core.Booster,
dtreeviz.models.sklearn_decision_trees.ShadowSKDTree,
dtreeviz.models.xgb_decision_trees.ShadowXGBDTree
The tree model or dtreeviz shadow tree model to interpret
:param node_id: int
Node id to interpret
:param X_train: pd.DataFrame, np.ndarray
The dataset based on which we want to make this visualisation.
:param feature_names: List[str], optional
The list of feature variable's name
:param tree_index: int, optional
Required in case of tree ensemble. Specify the tree index to interpret.
:return: pd.DataFrame
Node training samples' stats
"""
warnings.warn("describe_node_sample() function is deprecated starting from version 2.0. \n "
"For the same functionality, please use this code instead: \n m = dtreeviz.model(...) \n m.node_stats()",
DeprecationWarning, stacklevel=2)
shadow_tree = ShadowDecTree.get_shadow_tree(tree_model, X_train, None, feature_names, None, None,
tree_index)
model = DTreeVizAPI(shadow_tree)
return model.node_stats(node_id)
def explain_prediction_path(tree_model,
x: np.ndarray,
X_train=None,
y_train=None, # required for XGBoost
explanation_type: ('plain_english', 'sklearn_default') = "plain_english",
feature_names: List[str] = None,
target_name: str = None,
class_names: (Mapping[Number, str], List[str]) = None, # required if classifier,
tree_index: int = None, # required in case of tree ensemble
):
"""Prediction path interpretation for a data instance.
In case explanation_type = 'plain_english', there will be created a range of values for each feature, based on data
instance values and its tree prediction path.
A possible output for this method could be :
1.5 <= Pclass
3.5 <= Age < 44.5
7.91 <= Fare < 54.25
0.5 <= Sex_label
Cabin_label < 3.5
0.5 <= Embarked_label
:param tree_model: tree.DecisionTreeRegressor, tree.DecisionTreeClassifier, xgboost.core.Booster,
dtreeviz.models.sklearn_decision_trees.ShadowSKDTree,
dtreeviz.models.xgb_decision_trees.ShadowXGBDTree
The tree model or dtreeviz shadow tree model to interpret
:param x: np.ndarray
The data instance for which we want to investigate prediction path
:param y_train: pd.Series, np.ndarray
Target variable values
:param explanation_type: plain_english, sklearn_default
Specify the interpretation type
:param feature_names: List[str], optional
The list of feature variable's name
:param target_name: str, optional
The name of target variable
:param class_names: Mapping[Number, str], List[str], optional
The list of class names. Required only for classifier
:param tree_index: int, optional
Required in case of tree ensemble. Specify the tree index to interpret.
"""
shadow_tree = ShadowDecTree.get_shadow_tree(tree_model, X_train, y_train, feature_names, target_name, class_names,
tree_index)
model = DTreeVizAPI(shadow_tree)
if explanation_type == "sklearn_default":
warnings.warn(
"explain_prediction_path(explanation_type='sklearn_default') function is deprecated starting from version 2.0. \n "
"For the same functionality, please use this code instead: \n m = dtreeviz.model(...) \n m.instance_feature_importance()",
DeprecationWarning, stacklevel=2)
model.instance_feature_importance(x)
else:
warnings.warn("explain_prediction_path() function is deprecated starting from version 2.0. \n "
"For the same functionality, please use this code instead: \n m = dtreeviz.model(...) \n m.explain_prediction_path()",
DeprecationWarning, stacklevel=2)
return model.explain_prediction_path(x)
================================================
FILE: dtreeviz/interpretation.py
================================================
"""
Prediction path interpretation for decision tree models.
In this moment, it contains "plain english" implementation, but others can be added in the future.
"""
from collections import defaultdict
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from dtreeviz.colors import adjust_colors
from dtreeviz.models.shadow_decision_tree import ShadowDecTree
from dtreeviz.utils import _format_axes
def explain_prediction_plain_english(shadow_tree: ShadowDecTree,
x: (pd.core.series.Series, np.ndarray)) -> str:
"""
Explains the prediction path using feature value's range.
A possible output for this method could be :
1.5 <= Pclass
3.5 <= Age < 44.5
7.91 <= Fare < 54.25
0.5 <= Sex_label
Cabin_label < 3.5
0.5 <= Embarked_label
Output explanation :
The model chose to make this prediction because instance's Pclass feature value is bigger or equal to 1.5, Age
is between 3.5 and 44.5, Fare is between 7.91 and 54.25, and so on.
:param shadow_tree: tree used to make prediction
:param x: Instance example to make prediction
:return: str
Prediction path explanation in plain english.
"""
node_feature_index = shadow_tree.get_features()
feature_names = shadow_tree.feature_names
node_threshold = shadow_tree.get_thresholds()
decision_node_path = shadow_tree.predict_path(x)
# TODO - refactor this logic and find a way to make it simpler
feature_smaller_values = {}
feature_bigger_values = {}
feature_categorical_value = defaultdict(lambda: set())
feature_categorical_value_not_in = defaultdict(lambda: set())
for i, node in enumerate(decision_node_path):
if i == len(decision_node_path) - 1:
break # stop at leaf node
node_id = node.id
feature_name = feature_names[node_feature_index[node_id]]
feature_value = x[node_feature_index[node_id]]
if not shadow_tree.is_categorical_split(node_id):
feature_split_value = round(node_threshold[node_id], 2)
if feature_split_value <= feature_value:
if feature_smaller_values.get(feature_name) is None:
feature_smaller_values[feature_name] = []
feature_smaller_values.get(feature_name).append(feature_split_value)
elif feature_split_value > feature_value:
if feature_bigger_values.get(feature_name) is None:
feature_bigger_values[feature_name] = []
feature_bigger_values.get(feature_name).append(feature_split_value)
else:
if feature_value in node_threshold[node_id]:
feature_categorical_value[feature_name].update(node_threshold[node_id])
else:
feature_categorical_value_not_in[feature_name].update(node_threshold[node_id])
prediction_path_output = ""
for feature_name in feature_names:
feature_range = ""
if feature_name in feature_smaller_values:
feature_range = f"{max(feature_smaller_values[feature_name])} <= {feature_name} "
if feature_name in feature_bigger_values:
if feature_range == "":
feature_range = f"{feature_name} < {min(feature_bigger_values[feature_name])}"
else:
feature_range += f" < {min(feature_bigger_values[feature_name])}"
if feature_range != "":
prediction_path_output += feature_range + "\n"
for feature_name in set(list(feature_categorical_value.keys()) + list(feature_categorical_value_not_in.keys())):
prediction_path_output += f"{feature_name}{' in ' + str(feature_categorical_value[feature_name]) if feature_name in feature_categorical_value else ''}" \
f"{' not in ' + str(feature_categorical_value_not_in[feature_name]) if feature_name in feature_categorical_value_not_in else ''} \n"
return prediction_path_output
def explain_prediction_sklearn_default(shadow_tree: ShadowDecTree,
x: (pd.core.series.Series, np.ndarray),
colors: dict = None,
fontsize: int = 10,
fontname: str = "Arial",
grid: bool = False,
figsize: tuple = None,
ax=None,
return_summary: bool = False):
"""
Explain prediction calculating features importance using sklearn default algorithm : mean decrease in impurity
(or gini importance) mechanism.
This mechanism can be biased, especially for situations where features vary in their scale of measurement or
their number of categories.
For more details, you can read this article : https://explained.ai/rf-importance/index.html
:param shadow_tree: tree used to make prediction
:param x: Instance example to make prediction
:param colors: dict, optional
The set of colors used for plotting
:param fontsize: int, optional
Plot labels fontsize
:param fontname: str, optional
Plot labels font name
:param grid: bool
True if we want to display the grid lines on the visualization
:param figsize: optional (width, height) in inches for the entire plot
:param ax: optional matplotlib "axes" to draw into
"""
decision_node_path = shadow_tree.predict_path(x)
decision_node_path = [node.id for node in decision_node_path]
feature_path_importance = shadow_tree.get_feature_path_importance(decision_node_path)
colors = adjust_colors(colors)
fig = None
if ax is None:
if figsize:
fig, ax = plt.subplots(figsize=figsize)
else:
fig, ax = plt.subplots()
df = pd.DataFrame()
df['features'] = shadow_tree.feature_names
df['imp'] = feature_path_importance
df = df.sort_values('imp', ascending=True)
barcontainers = ax.barh(y=df['features'],
width=df['imp'],
color=colors["hist_bar"],
lw=.3,
align='center',
height=1)
for rect in barcontainers.patches:
rect.set_linewidth(.5)
rect.set_edgecolor(colors['rect_edge'])
_format_axes(ax, "Feature Importance", "Features", colors, fontsize, fontname, grid=grid)
if fig is not None:
fig.tight_layout()
plt.show()
if return_summary:
ranked_df = df.sort_values('imp', ascending=False)
top_features = ranked_df.head(5)
summary_lines = []
if not top_features.empty:
summary_lines.append("Top contributing features (importance):")
for idx, row in enumerate(top_features.itertuples(index=False), start=1):
summary_lines.append(f"{idx}. {row.features}: {row.imp:.4f}")
zero_features = ranked_df[ranked_df['imp'] == 0]['features'].tolist()
if zero_features:
zero_list = ", ".join(zero_features[:5])
more = len(zero_features) - 5
suffix = f", and {more} more" if more > 0 else ""
summary_lines.append(f"No contribution from: {zero_list}{suffix}")
if not summary_lines:
summary_lines.append("All features have zero importance for this instance.")
return "\n".join(summary_lines)
================================================
FILE: dtreeviz/models/__init__.py
================================================
================================================
FILE: dtreeviz/models/lightgbm_decision_tree.py
================================================
from collections import defaultdict
from typing import List, Mapping
import numpy as np
import pandas as pd
from lightgbm.basic import Booster
from dtreeviz.models.shadow_decision_tree import ShadowDecTree, VisualisationNotYetSupportedError
from dtreeviz import utils
class ShadowLightGBMTree(ShadowDecTree):
def __init__(self,
booster: Booster,
tree_index: int,
X_train: (pd.DataFrame, np.ndarray),
y_train: (pd.Series, np.ndarray),
feature_names: List[str] = None,
target_name: str = None,
class_names: (List[str], Mapping[int, str]) = None):
utils.check_tree_index(tree_index, booster.num_trees())
self.booster = booster
self.tree_index = tree_index
self.tree_nodes, self.children_left, self.children_right = self._get_nodes_info()
self.thresholds = None # lazy evaluation
self.features = None # lazy evaluation
self.node_to_samples = None
super().__init__(booster, X_train, y_train, feature_names, target_name, class_names)
def _get_nodes_info(self):
tree_nodes = {}
children_left = {}
children_right = {}
node_index = 0
def _walk_tree(node, node_id):
nonlocal node_index
tree_nodes[node_id] = node
if node.get("split_index") is None:
children_left[node_id] = -1
children_right[node_id] = -1
return
node_index += 1
children_left[node_id] = node_index
_walk_tree(node.get("left_child"), node_index)
node_index += 1
children_right[node_id] = node_index
_walk_tree(node.get("right_child"), node_index)
def _convert_dict_to_list(my_dict):
my_list = [-1] * len(my_dict)
for key, value in my_dict.items():
my_list[key] = value
return my_list
tree_dump = self.booster.dump_model()["tree_info"][self.tree_index]
_walk_tree(tree_dump["tree_structure"], node_index)
for node in tree_nodes.values():
node.pop("left_child", None)
node.pop("right_child", None)
children_left_list = _convert_dict_to_list(children_left)
children_right_list = _convert_dict_to_list(children_right)
tree_node_list = _convert_dict_to_list(tree_nodes)
return tree_node_list, children_left_list, children_right_list
def is_fit(self) -> bool:
return isinstance(self.booster, Booster)
def is_classifier(self) -> bool:
objective = self.booster.dump_model(num_iteration=1)["objective"]
if "binary" in objective or "multiclass" in objective:
return True
elif objective in ["regression", "regression_l1", "huber", "fair", "poisson", "quantile", "mape", "gamma",
"tweedie"]:
return False
raise Exception(f"objective {objective} is not yet supported by dtreeviz's lightgbm implementation")
def is_categorical_split(self, id) -> bool:
node = self.tree_nodes[id]
if 'split_index' in node:
if node["decision_type"] == "==":
return True
return False
def get_class_weights(self):
pass
def get_thresholds(self) -> np.ndarray:
if self.thresholds is not None:
return self.thresholds
node_thresholds = [-1] * self.nnodes()
for i in range(self.nnodes()):
if self.children_left[i] != -1 and self.children_right[i] != -1:
if self.is_categorical_split(i):
node_thresholds[i] = list(map(int, self.tree_nodes[i]["threshold"].split("||")))
else:
node_thresholds[i] = round(self.tree_nodes[i]["threshold"], 2)
self.thresholds = np.array(node_thresholds, dtype=object)
return self.thresholds
def get_features(self) -> np.ndarray:
if self.features is not None:
return self.features
self.features = [-1] * self.nnodes()
for i, node in enumerate(self.tree_nodes):
self.features[i] = node.get("split_feature", -1)
self.features = np.array(self.features)
return self.features
def criterion(self) -> str:
raise VisualisationNotYetSupportedError("criterion()", "LightGBM")
def get_class_weight(self):
return None
def nclasses(self) -> int:
if self.booster._Booster__num_class == 1:
return 2
else:
return self.booster._Booster__num_class
def classes(self) -> np.ndarray:
if self.is_classifier():
return np.unique(self.y_train)
def get_node_samples(self):
if self.node_to_samples is not None:
return self.node_to_samples
node_to_samples = defaultdict(list)
for i in range(self.X_train.shape[0]):
path = self.predict_path(self.X_train[i])
for node in path:
node_to_samples[node.id].append(i)
self.node_to_samples = node_to_samples
return self.node_to_samples
def get_split_samples(self, id):
samples = np.array(self.get_node_samples()[id])
node_X_data = self.X_train[samples, self.get_node_feature(id)]
split = self.get_node_split(id)
if self.is_categorical_split(id):
indices = np.sum([node_X_data == split_value for split_value in self.get_node_split(id)], axis=0)
left = np.nonzero(indices == 1)[0]
right = np.nonzero(indices == 0)[0]
else:
left = np.nonzero(node_X_data <= split)[0]
right = np.nonzero(node_X_data > split)[0]
return left, right
def get_root_edge_labels(self):
return ["≤", ">"]
def get_node_nsamples(self, id):
if self.children_right[id] == -1 and self.children_left[id] == -1:
return self.tree_nodes[id]["leaf_count"]
else:
return self.tree_nodes[id]["internal_count"]
def get_children_left(self) -> np.ndarray:
return np.array(self.children_left, dtype=int)
def get_children_right(self) -> np.ndarray:
return np.array(self.children_right, dtype=int)
def get_node_split(self, id) -> (int, float):
return self.get_thresholds()[id]
def get_node_feature(self, id) -> int:
return self.get_features()[id]
def get_node_nsamples_by_class(self, id):
all_nodes = self.internal + self.leaves
if self.is_classifier():
node_value = [node.n_sample_classes() for node in all_nodes if node.id == id]
return node_value[0]
def get_prediction(self, id):
all_nodes = self.internal + self.leaves
if self.is_classifier():
node_value = [node.n_sample_classes() for node in all_nodes if node.id == id]
return np.argmax(node_value[0])
elif not self.is_classifier():
node_samples = [node.samples() for node in all_nodes if node.id == id][0]
return np.mean(self.y_train[node_samples])
def nnodes(self) -> int:
return len(self.tree_nodes)
def get_node_criterion(self, id):
raise VisualisationNotYetSupportedError("get_node_criterion()", "LightGBM")
def get_feature_path_importance(self, node_list):
raise VisualisationNotYetSupportedError("get_feature_path_importance()", "LightGBM")
def get_max_depth(self) -> int:
# max_depth can be found in lgbm_model.params, but only if the max_depth is specified
# otherwise the max depth is -1, from lgbm_model.model_to_string() (to double check)
raise VisualisationNotYetSupportedError("get_max_depth()", "LightGBM")
def get_score(self) -> float:
raise VisualisationNotYetSupportedError("get_score()", "LightGBM")
def get_min_samples_leaf(self) -> (int, float):
default_value = 20
return self.booster.params.get("min_data_in_leaf", default_value)
def shouldGoLeftAtSplit(self, id, x):
if self.is_categorical_split(id):
return x in self.get_node_split(id)
return x <= self.get_node_split(id)
================================================
FILE: dtreeviz/models/shadow_decision_tree.py
================================================
from abc import ABC, abstractmethod
from numbers import Number
from typing import List, Tuple, Mapping
import numpy as np
import pandas as pd
import sklearn
from dtreeviz import utils
class ShadowDecTree(ABC):
"""
This object adapts decision trees constructed by the various libraries such as scikit-learn's and XGBoost's
DecisionTree(Regressor|Classifier) to dtreeviz. As part of the construction process, the samples
considered at decision and leaf nodes are saved as a big dictionary for use by the nodes.
The decision trees for classifiers and regressors from scikit-learn and
XGBoost etc... are built for efficiency, not ease of tree walking. This class
wraps all of that information in an easy-to-use and consistent interface
that hides the details of the various decision tree libraries.
Field leaves is list of shadow leaf nodes.
Field internal is list of shadow non-leaf nodes.
Field root is the shadow tree root.
"""
def __init__(self,
tree_model,
X_train: (pd.DataFrame, np.ndarray),
y_train: (pd.Series, np.ndarray),
feature_names: List[str] = None,
target_name: str = None,
class_names: (List[str], Mapping[int, str]) = None):
"""
Parameters
----------
:param tree_model: sklearn.tree.DecisionTreeRegressor, sklearn.tree.DecisionTreeClassifier, xgboost.core.Booster
The decision tree to be interpreted
:param X_train: pd.DataFrame, np.ndarray
Features values on which the shadow tree will be build.
:param y_train: pd.Series, np.ndarray
Target values on which the shadow tree will be build.
:param feature_names: List[str]
Features' names
:param target_name: str
Target's name
:param class_names: List[str], Mapping[int, str]
Class' names (in case of a classifier)
"""
self.tree_model = tree_model
if not self.is_fit():
raise Exception(f"Model {tree_model} is not fit.")
self.feature_names = feature_names
self.target_name = target_name
self.X_train = ShadowDecTree._get_x_data(X_train)
self.y_train = ShadowDecTree._get_y_data(y_train)
self.root, self.leaves, self.internal = self._get_tree_nodes()
if self.is_classifier():
self.class_names = utils._normalize_class_names(class_names, self.nclasses())
@abstractmethod
def is_fit(self) -> bool:
"""Checks if the tree model is already trained."""
pass
@abstractmethod
def is_classifier(self) -> bool:
"""Checks if the tree model is a classifier."""
pass
@abstractmethod
def get_class_weights(self):
"""Returns the tree model's class weights."""
pass
@abstractmethod
def get_thresholds(self) -> np.ndarray:
"""Returns split node/threshold values for tree's nodes.
Ex. threshold[i] holds the split value/threshold for the node i.
"""
pass
@abstractmethod
def get_features(self) -> np.ndarray:
"""Returns feature indexes for tree's nodes.
Ex. features[i] holds the feature index to split on
"""
pass
@abstractmethod
def criterion(self) -> str:
"""Returns the function to measure the quality of a split.
Ex. Gini, entropy, MSE, MAE
"""
pass
@abstractmethod
def get_class_weight(self):
"""
TOOD - to be compared with get_class_weights
:return:
"""
pass
@abstractmethod
def nclasses(self) -> int:
"""Returns the number of classes.
Ex. 2 for binary classification or 1 for regression.
"""
pass
@abstractmethod
def classes(self) -> np.ndarray:
"""Returns the tree's classes values in case of classification.
Ex. [0,1] in class of a binary classification
"""
pass
@abstractmethod
def get_node_samples(self):
"""Returns dictionary mapping node id to list of sample indexes considered by
the feature/split decision.
"""
pass
@abstractmethod
def get_split_samples(self, id):
"""Returns left and right split indexes from a node"""
pass
@abstractmethod
def get_node_nsamples(self, id):
"""Returns number of samples for a specific node id."""
pass
@abstractmethod
def get_children_left(self) -> np.ndarray:
"""Returns the node ids of the left child node.
Ex. children_left[i] holds the node id of the left child of node i.
"""
pass
@abstractmethod
def get_children_right(self) -> np.ndarray:
"""Returns the node ids of the right child node.
Ex. children_right[i] holds the node id of the right child of node i.
"""
pass
@abstractmethod
def get_node_split(self, id) -> (int, float):
"""Returns node split value.
Parameters
----------
id : int
The node id.
"""
pass
@abstractmethod
def get_node_feature(self, id) -> int:
"""Returns feature index from node id.
Parameters
----------
id : int
The node id.
"""
pass
@abstractmethod
def get_node_nsamples_by_class(self, id):
"""For a classification decision tree, returns the number of samples for each class from a specified node.
Parameters
----------
id : int
The node id.
"""
pass
@abstractmethod
def get_prediction(self, id):
"""Returns the constant prediction value for node id.
Parameters
----------
id : int
The node id.
"""
pass
@abstractmethod
def nnodes(self) -> int:
"Returns the number of nodes (internal nodes + leaves) in the tree."
pass
@abstractmethod
def get_node_criterion(self, id):
"""Returns the impurity (i.e., the value of the splitting criterion) at node id.
Parameters
----------
id : int
The node id.
"""
pass
@abstractmethod
def get_feature_path_importance(self, node_list):
"""Returns the feature importance for a list of nodes.
The node feature importance is calculated based on only the nodes from that list, not based on entire tree nodes.
Parameters
----------
node_list : List
The list of nodes.
"""
pass
@abstractmethod
def get_max_depth(self) -> int:
"""The max depth of the tree."""
pass
@abstractmethod
def get_score(self) -> float:
"""
For classifier, returns the mean accuracy.
For regressor, returns the R^2.
"""
pass
@abstractmethod
def get_min_samples_leaf(self) -> (int, float):
"""Returns the minimum number of samples required to be at a leaf node, during node splitting"""
pass
@abstractmethod
def shouldGoLeftAtSplit(self, id, x):
"""Return true if it should go to the left node child based on node split criterion and x value"""
pass
def get_root_edge_labels(self):
pass
def is_categorical_split(self, id) -> bool:
"""Checks if the node split is a categorical one.
This method needs to be overloaded only for shadow tree implementation which contain categorical splits,
like Spark.
"""
return False
def get_split_node_heights(self, X_train, y_train, nbins) -> Mapping[int, int]:
class_values = np.unique(y_train)
node_heights = {}
for node in self.internal:
# print(f"node feature {node.feature_name()}, id {node.id}")
X_feature = X_train[:, node.feature()]
if node.is_categorical_split():
overall_feature_range = (0, len(np.unique(X_train[:, node.feature()])) - 1)
else:
overall_feature_range = (np.min(X_feature), np.max(X_feature))
bins = np.linspace(overall_feature_range[0],
overall_feature_range[1], nbins + 1)
X, y = X_feature[node.samples()], y_train[node.samples()]
# in case there is a categorical split node, we can convert the values to numbers because we need them
# only for getting the distribution values
if node.is_categorical_split():
X = pd.Series(X).astype("category").cat.codes
X_hist = [X[y == cl] for cl in class_values]
height_of_bins = np.zeros(nbins)
for i, _ in enumerate(class_values):
hist, foo = np.histogram(X_hist[i], bins=bins, range=overall_feature_range)
height_of_bins += hist
node_heights[node.id] = np.max(height_of_bins)
# print(f"\tmax={np.max(height_of_bins):2.0f}, heights={list(height_of_bins)}, {len(height_of_bins)} bins")
return node_heights
def predict(self, x: np.ndarray) -> Number:
"""
Given an x - vector of features, return predicted class or value based upon this tree.
Also return path from root to leaf as 2nd value in return tuple.
Recursively walk down tree from root to appropriate leaf by comparing feature in x to node's split value.
:param
x: np.ndarray
Feature vector to run down the tree to a leaf.
"""
def walk(t, x):
if t.isleaf():
return t
if self.shouldGoLeftAtSplit(t.id, x[t.feature()]):
return walk(t.left, x)
return walk(t.right, x)
leaf = walk(self.root, x)
return leaf.prediction()
def predict_path(self, x: np.ndarray) -> List:
"""
Given an x - vector of features, return path prediction based upon this tree.
Also return path from root to leaf as 2nd value in return tuple.
Recursively walk down tree from root to appropriate leaf by comparing feature in x to node's split value.
:param
x: np.ndarray
Feature vector to run down the tree to a leaf.
"""
def walk(t, x, path):
path.append(t)
if t.isleaf():
return None
if self.shouldGoLeftAtSplit(t.id, x[t.feature()]):
return walk(t.left, x, path)
return walk(t.right, x, path)
path = []
walk(self.root, x, path)
return path
def get_leaf_sample_counts(self, min_samples=0, max_samples=None):
"""
Get the number of samples for each leaf.
There is the option to filter the leaves w
gitextract_7mrp8x7l/
├── .gitignore
├── LICENSE
├── README.md
├── data/
│ ├── cars.csv
│ └── titanic/
│ └── titanic.csv
├── developer-cert-of-origin.txt
├── dtreeviz/
│ ├── __init__.py
│ ├── ai_explanation.py
│ ├── classifiers.py
│ ├── colors.py
│ ├── compatibility.py
│ ├── interpretation.py
│ ├── models/
│ │ ├── __init__.py
│ │ ├── lightgbm_decision_tree.py
│ │ ├── shadow_decision_tree.py
│ │ ├── sklearn_decision_trees.py
│ │ ├── spark_decision_tree.py
│ │ ├── tensorflow_decision_tree.py
│ │ └── xgb_decision_tree.py
│ ├── trees.py
│ ├── utils.py
│ └── version.py
├── notebooks/
│ ├── classifier-boundary-animations.ipynb
│ ├── classifier-decision-boundaries.ipynb
│ ├── colors.ipynb
│ ├── dtreeviz_lightgbm_visualisations.ipynb
│ ├── dtreeviz_sklearn_AI_visualisations.ipynb
│ ├── dtreeviz_sklearn_pipeline_visualisations.ipynb
│ ├── dtreeviz_sklearn_visualisations.ipynb
│ ├── dtreeviz_spark_visualisations.ipynb
│ ├── dtreeviz_tensorflow_visualisations.ipynb
│ └── dtreeviz_xgboost_visualisations.ipynb
├── play.ipynb
├── releasing.txt
├── scripts/
│ └── github_release_notes.py
├── setup.cfg
├── setup.py
├── talk/
│ └── dtreeviz.pptx
└── testing/
├── __init__.py
├── animate_rtree_bivar_3D.py
├── bin/
│ ├── icons.sh
│ └── topng.sh
├── cancer.py
├── data/
│ ├── cars.csv
│ ├── forestfires.csv
│ ├── knowledge.csv
│ └── sweetrs.csv
├── gen_feature_space_samples.py
├── gen_samples.py
├── iris.py
├── issues_investigations.ipynb
├── paper_examples.py
├── play_ctree.py
├── play_ctree_bivar.py
├── play_lightgbm.py
├── play_rtree.py
├── play_rtree_bivar_3D.py
├── play_rtree_bivar_heatmap.py
├── play_spark.py
├── playground.ipynb
├── slides.ipynb
├── testlib/
│ ├── __init__.py
│ └── models/
│ ├── __init__.py
│ ├── conftest.py
│ ├── fixtures/
│ │ ├── dataset.csv
│ │ ├── dataset_lightgbm.csv
│ │ ├── dataset_spark_tf.csv
│ │ ├── lightgbm_model_classifier.txt
│ │ ├── sk_decision_tree_classifier.joblib
│ │ ├── spark_2_decision_tree_classifier.model/
│ │ │ ├── data/
│ │ │ │ ├── part-00000-d3b57c8e-2190-44de-a427-3f57f84c7b67-c000.snappy.parquet
│ │ │ │ └── part-00001-d3b57c8e-2190-44de-a427-3f57f84c7b67-c000.snappy.parquet
│ │ │ └── metadata/
│ │ │ └── part-00000
│ │ ├── spark_3_0_decision_tree_classifier.model/
│ │ │ ├── data/
│ │ │ │ ├── part-00000-65d1fe70-5c43-4fd5-b250-7020b561625a-c000.snappy.parquet
│ │ │ │ └── part-00001-65d1fe70-5c43-4fd5-b250-7020b561625a-c000.snappy.parquet
│ │ │ └── metadata/
│ │ │ └── part-00000
│ │ ├── xgb_model_classifier.joblib
│ │ └── xgb_model_regressor.joblib
│ ├── test_decision_tree_lightgbm_classifier.py
│ ├── test_decision_tree_spark_classifier.py
│ ├── test_decision_tree_tensorflow_classifier.py
│ ├── test_decision_tree_xgb_regressor.py
│ ├── test_decision_trees_sk_classifier.py
│ ├── test_decision_trees_sk_pipeline.py
│ └── test_decision_trees_xgb_classifier.py
├── testone.py
├── tf-catvars.py
├── tf_catvars2.py
├── tf_catvars3.py
└── tf_regr_catvars.py
SYMBOL INDEX (450 symbols across 30 files)
FILE: dtreeviz/ai_explanation.py
function _convert_to_json_serializable (line 60) | def _convert_to_json_serializable(obj):
function get_completion (line 91) | def get_completion(prompt, model=None):
function _get_library (line 105) | def _get_library(tree: ShadowDecTree):
function _get_tree_structure_knowledge (line 111) | def _get_tree_structure_knowledge(tree: ShadowDecTree):
function _get_training_set_knowledge (line 128) | def _get_training_set_knowledge(tree: ShadowDecTree):
function _get_leaf_nodes_knowledge (line 146) | def _get_leaf_nodes_knowledge(tree: ShadowDecTree):
function _get_internal_nodes_knowledge (line 185) | def _get_internal_nodes_knowledge(tree: ShadowDecTree):
function _get_session_history (line 218) | def _get_session_history(session_id: str, max_messages: int = None) -> I...
function setup_chat (line 254) | def setup_chat(tree: ShadowDecTree, session_id: str = "default", model: ...
function build_node_stats_prompt (line 348) | def build_node_stats_prompt(shadow_tree: ShadowDecTree, node_id: int) ->...
FILE: dtreeviz/classifiers.py
function decision_boundaries (line 16) | def decision_boundaries(model, X: np.ndarray, y: np.ndarray,
function decision_boundaries_bivar (line 134) | def decision_boundaries_bivar(model, X:np.ndarray, y:np.ndarray,
function _compute_tiling (line 237) | def _compute_tiling(model, X:np.ndarray, y:np.ndarray, binary_threshold,
function _get_grid_colors (line 305) | def _get_grid_colors(grid_proba, grid_pred, class_values, colors):
function _draw_tiles (line 335) | def _draw_tiles(ax, grid_points, facecolors, tile_alpha, h, w):
function _draw_boundary_edges (line 346) | def _draw_boundary_edges(ax, grid_points, grid_pred_as_matrix, boundary_...
function decision_boundaries_univar (line 374) | def decision_boundaries_univar(model, x: np.ndarray, y: np.ndarray,
function _predict_proba (line 501) | def _predict_proba(model, X):
FILE: dtreeviz/colors.py
function get_hex_colors (line 41) | def get_hex_colors(n_classes, cmap_name="RdYlBu"):
function adjust_colors (line 111) | def adjust_colors(colors, n_classes=None, cmp="RdYlBu"):
FILE: dtreeviz/compatibility.py
function _warning_on_one_line (line 15) | def _warning_on_one_line(message, category, filename, lineno, file=None,...
function rtreeviz_univar (line 21) | def rtreeviz_univar(tree_model,
function rtreeviz_bivar_heatmap (line 48) | def rtreeviz_bivar_heatmap(tree_model,
function rtreeviz_bivar_3D (line 76) | def rtreeviz_bivar_3D(tree_model,
function ctreeviz_univar (line 106) | def ctreeviz_univar(tree_model,
function ctreeviz_bivar (line 129) | def ctreeviz_bivar(tree_model,
function dtreeviz (line 160) | def dtreeviz(tree_model,
function viz_leaf_samples (line 261) | def viz_leaf_samples(tree_model,
function viz_leaf_criterion (line 354) | def viz_leaf_criterion(tree_model,
function ctreeviz_leaf_samples (line 421) | def ctreeviz_leaf_samples(tree_model,
function viz_leaf_target (line 491) | def viz_leaf_target(tree_model,
function describe_node_sample (line 559) | def describe_node_sample(tree_model,
function explain_prediction_path (line 604) | def explain_prediction_path(tree_model,
FILE: dtreeviz/interpretation.py
function explain_prediction_plain_english (line 16) | def explain_prediction_plain_english(shadow_tree: ShadowDecTree,
function explain_prediction_sklearn_default (line 94) | def explain_prediction_sklearn_default(shadow_tree: ShadowDecTree,
FILE: dtreeviz/models/lightgbm_decision_tree.py
class ShadowLightGBMTree (line 12) | class ShadowLightGBMTree(ShadowDecTree):
method __init__ (line 14) | def __init__(self,
method _get_nodes_info (line 33) | def _get_nodes_info(self):
method is_fit (line 74) | def is_fit(self) -> bool:
method is_classifier (line 77) | def is_classifier(self) -> bool:
method is_categorical_split (line 86) | def is_categorical_split(self, id) -> bool:
method get_class_weights (line 93) | def get_class_weights(self):
method get_thresholds (line 96) | def get_thresholds(self) -> np.ndarray:
method get_features (line 111) | def get_features(self) -> np.ndarray:
method criterion (line 122) | def criterion(self) -> str:
method get_class_weight (line 125) | def get_class_weight(self):
method nclasses (line 128) | def nclasses(self) -> int:
method classes (line 134) | def classes(self) -> np.ndarray:
method get_node_samples (line 138) | def get_node_samples(self):
method get_split_samples (line 151) | def get_split_samples(self, id):
method get_root_edge_labels (line 165) | def get_root_edge_labels(self):
method get_node_nsamples (line 168) | def get_node_nsamples(self, id):
method get_children_left (line 174) | def get_children_left(self) -> np.ndarray:
method get_children_right (line 177) | def get_children_right(self) -> np.ndarray:
method get_node_split (line 180) | def get_node_split(self, id) -> (int, float):
method get_node_feature (line 183) | def get_node_feature(self, id) -> int:
method get_node_nsamples_by_class (line 186) | def get_node_nsamples_by_class(self, id):
method get_prediction (line 192) | def get_prediction(self, id):
method nnodes (line 201) | def nnodes(self) -> int:
method get_node_criterion (line 204) | def get_node_criterion(self, id):
method get_feature_path_importance (line 207) | def get_feature_path_importance(self, node_list):
method get_max_depth (line 210) | def get_max_depth(self) -> int:
method get_score (line 215) | def get_score(self) -> float:
method get_min_samples_leaf (line 218) | def get_min_samples_leaf(self) -> (int, float):
method shouldGoLeftAtSplit (line 222) | def shouldGoLeftAtSplit(self, id, x):
FILE: dtreeviz/models/shadow_decision_tree.py
class ShadowDecTree (line 12) | class ShadowDecTree(ABC):
method __init__ (line 28) | def __init__(self,
method is_fit (line 66) | def is_fit(self) -> bool:
method is_classifier (line 71) | def is_classifier(self) -> bool:
method get_class_weights (line 76) | def get_class_weights(self):
method get_thresholds (line 81) | def get_thresholds(self) -> np.ndarray:
method get_features (line 89) | def get_features(self) -> np.ndarray:
method criterion (line 97) | def criterion(self) -> str:
method get_class_weight (line 105) | def get_class_weight(self):
method nclasses (line 113) | def nclasses(self) -> int:
method classes (line 121) | def classes(self) -> np.ndarray:
method get_node_samples (line 129) | def get_node_samples(self):
method get_split_samples (line 136) | def get_split_samples(self, id):
method get_node_nsamples (line 141) | def get_node_nsamples(self, id):
method get_children_left (line 146) | def get_children_left(self) -> np.ndarray:
method get_children_right (line 154) | def get_children_right(self) -> np.ndarray:
method get_node_split (line 162) | def get_node_split(self, id) -> (int, float):
method get_node_feature (line 173) | def get_node_feature(self, id) -> int:
method get_node_nsamples_by_class (line 184) | def get_node_nsamples_by_class(self, id):
method get_prediction (line 195) | def get_prediction(self, id):
method nnodes (line 206) | def nnodes(self) -> int:
method get_node_criterion (line 211) | def get_node_criterion(self, id):
method get_feature_path_importance (line 222) | def get_feature_path_importance(self, node_list):
method get_max_depth (line 235) | def get_max_depth(self) -> int:
method get_score (line 240) | def get_score(self) -> float:
method get_min_samples_leaf (line 248) | def get_min_samples_leaf(self) -> (int, float):
method shouldGoLeftAtSplit (line 253) | def shouldGoLeftAtSplit(self, id, x):
method get_root_edge_labels (line 257) | def get_root_edge_labels(self):
method is_categorical_split (line 260) | def is_categorical_split(self, id) -> bool:
method get_split_node_heights (line 268) | def get_split_node_heights(self, X_train, y_train, nbins) -> Mapping[i...
method predict (line 297) | def predict(self, x: np.ndarray) -> Number:
method predict_path (line 319) | def predict_path(self, x: np.ndarray) -> List:
method get_leaf_sample_counts (line 343) | def get_leaf_sample_counts(self, min_samples=0, max_samples=None):
method get_leaf_criterion (line 365) | def get_leaf_criterion(self):
method get_leaf_sample_counts_by_class (line 375) | def get_leaf_sample_counts_by_class(self):
method _get_tree_nodes (line 385) | def _get_tree_nodes(self):
method _get_x_data (line 409) | def _get_x_data(X_train):
method _get_y_data (line 415) | def _get_y_data(y_train):
method get_shadow_tree (line 421) | def get_shadow_tree(tree_model, X_train, y_train, feature_names, targe...
class ShadowDecTreeNode (line 474) | class ShadowDecTreeNode():
method __init__ (line 481) | def __init__(self, shadow_tree: ShadowDecTree, id: int, left=None, rig...
method split (line 488) | def split(self) -> (int, float):
method feature (line 492) | def feature(self) -> int:
method feature_name (line 496) | def feature_name(self) -> (str, None):
method samples (line 502) | def samples(self) -> List[int]:
method nsamples (line 506) | def nsamples(self) -> int:
method n_sample_classes (line 514) | def n_sample_classes(self):
method criterion (line 533) | def criterion(self):
method split_samples (line 536) | def split_samples(self) -> Tuple[np.ndarray, np.ndarray]:
method isleaf (line 540) | def isleaf(self) -> bool:
method isclassifier (line 543) | def isclassifier(self) -> bool:
method is_categorical_split (line 546) | def is_categorical_split(self) -> bool:
method prediction (line 549) | def prediction(self) -> (Number, None):
method prediction_name (line 558) | def prediction_name(self) -> (str, None):
method class_counts (line 573) | def class_counts(self) -> (List[int], None):
method __str__ (line 586) | def __str__(self):
class VisualisationNotYetSupportedError (line 596) | class VisualisationNotYetSupportedError(Exception):
method __init__ (line 597) | def __init__(self, method_name, model_name):
FILE: dtreeviz/models/sklearn_decision_trees.py
class ShadowSKDTree (line 11) | class ShadowSKDTree(ShadowDecTree):
method __init__ (line 12) | def __init__(self, tree_model,
method is_fit (line 22) | def is_fit(self):
method is_classifier (line 25) | def is_classifier(self):
method get_class_weights (line 28) | def get_class_weights(self):
method get_thresholds (line 33) | def get_thresholds(self):
method get_features (line 36) | def get_features(self):
method criterion (line 39) | def criterion(self):
method get_class_weight (line 42) | def get_class_weight(self):
method nclasses (line 45) | def nclasses(self):
method classes (line 48) | def classes(self):
method get_node_samples (line 52) | def get_node_samples(self):
method get_split_samples (line 68) | def get_split_samples(self, id):
method get_root_edge_labels (line 78) | def get_root_edge_labels(self):
method get_node_nsamples (line 81) | def get_node_nsamples(self, id):
method get_children_left (line 84) | def get_children_left(self):
method get_children_right (line 87) | def get_children_right(self):
method get_node_split (line 90) | def get_node_split(self, id) -> (int, float):
method get_node_feature (line 93) | def get_node_feature(self, id) -> int:
method get_node_nsamples_by_class (line 96) | def get_node_nsamples_by_class(self, id):
method get_prediction (line 111) | def get_prediction(self, id):
method nnodes (line 118) | def nnodes(self):
method get_node_criterion (line 121) | def get_node_criterion(self, id):
method get_feature_path_importance (line 124) | def get_feature_path_importance(self, node_list):
method get_max_depth (line 140) | def get_max_depth(self):
method get_score (line 143) | def get_score(self):
method get_min_samples_leaf (line 146) | def get_min_samples_leaf(self):
method shouldGoLeftAtSplit (line 149) | def shouldGoLeftAtSplit(self, id, x):
FILE: dtreeviz/models/spark_decision_tree.py
class ShadowSparkTree (line 15) | class ShadowSparkTree(ShadowDecTree):
method __init__ (line 17) | def __init__(self, tree_model: (DecisionTreeClassificationModel, Decis...
method _get_nodes_info (line 30) | def _get_nodes_info(self, tree_model):
method is_fit (line 53) | def is_fit(self) -> bool:
method is_classifier (line 58) | def is_classifier(self) -> bool:
method is_categorical_split (line 61) | def is_categorical_split(self, id) -> bool:
method get_class_weights (line 68) | def get_class_weights(self):
method get_class_weight (line 71) | def get_class_weight(self):
method get_thresholds (line 74) | def get_thresholds(self) -> np.ndarray:
method get_features (line 91) | def get_features(self) -> np.ndarray:
method criterion (line 102) | def criterion(self) -> str:
method nclasses (line 105) | def nclasses(self) -> int:
method classes (line 115) | def classes(self) -> np.ndarray:
method get_node_samples (line 119) | def get_node_samples(self):
method get_split_samples (line 134) | def get_split_samples(self, id):
method get_root_edge_labels (line 148) | def get_root_edge_labels(self):
method get_node_nsamples (line 151) | def get_node_nsamples(self, id):
method get_children_left (line 162) | def get_children_left(self) -> np.ndarray:
method get_children_right (line 165) | def get_children_right(self):
method get_node_split (line 168) | def get_node_split(self, id) -> (int, float, list):
method get_node_feature (line 171) | def get_node_feature(self, id) -> int:
method get_node_nsamples_by_class (line 174) | def get_node_nsamples_by_class(self, id):
method get_prediction (line 193) | def get_prediction(self, id):
method nnodes (line 196) | def nnodes(self) -> int:
method get_node_criterion (line 199) | def get_node_criterion(self, id):
method get_feature_path_importance (line 202) | def get_feature_path_importance(self, node_list):
method get_max_depth (line 205) | def get_max_depth(self) -> int:
method get_score (line 208) | def get_score(self) -> float:
method get_min_samples_leaf (line 211) | def get_min_samples_leaf(self) -> (int, float):
method shouldGoLeftAtSplit (line 214) | def shouldGoLeftAtSplit(self, id, x):
method _get_pyspark_major_version (line 220) | def _get_pyspark_major_version():
method _get_tree_model_parameter_value (line 223) | def _get_tree_model_parameter_value(self, name):
FILE: dtreeviz/models/tensorflow_decision_tree.py
class ShadowTensorflowTree (line 13) | class ShadowTensorflowTree(ShadowDecTree):
method __init__ (line 18) | def __init__(self, model: RandomForestModel,
method _get_column_dataspec (line 39) | def _get_column_dataspec(self):
method is_fit (line 45) | def is_fit(self) -> bool:
method get_children_left (line 52) | def get_children_left(self):
method get_children_right (line 55) | def get_children_right(self):
method is_classifier (line 58) | def is_classifier(self) -> bool:
method get_class_weights (line 64) | def get_class_weights(self):
method get_thresholds (line 67) | def get_thresholds(self) -> np.ndarray:
method get_features (line 88) | def get_features(self) -> np.ndarray:
method criterion (line 101) | def criterion(self) -> str:
method get_class_weight (line 105) | def get_class_weight(self):
method nclasses (line 108) | def nclasses(self) -> int:
method classes (line 115) | def classes(self) -> np.ndarray:
method get_node_samples (line 119) | def get_node_samples(self):
method get_split_samples (line 132) | def get_split_samples(self, id):
method get_node_nsamples (line 146) | def get_node_nsamples(self, id):
method get_node_split (line 149) | def get_node_split(self, id) -> (int, float):
method get_node_feature (line 152) | def get_node_feature(self, id) -> int:
method get_node_nsamples_by_class (line 156) | def get_node_nsamples_by_class(self, id):
method get_prediction (line 162) | def get_prediction(self, id):
method is_categorical_split (line 172) | def is_categorical_split(self, id) -> bool:
method nnodes (line 179) | def nnodes(self) -> int:
method get_node_criterion (line 182) | def get_node_criterion(self, id):
method get_feature_path_importance (line 185) | def get_feature_path_importance(self, node_list):
method get_max_depth (line 188) | def get_max_depth(self) -> int:
method get_score (line 191) | def get_score(self) -> float:
method get_min_samples_leaf (line 194) | def get_min_samples_leaf(self) -> (int, float):
method shouldGoLeftAtSplit (line 197) | def shouldGoLeftAtSplit(self, id, x):
method get_root_edge_labels (line 202) | def get_root_edge_labels(self):
method _get_nodes_info (line 205) | def _get_nodes_info(self):
FILE: dtreeviz/models/xgb_decision_tree.py
class ShadowXGBDTree (line 16) | class ShadowXGBDTree(ShadowDecTree):
method __init__ (line 25) | def __init__(self, booster: Booster,
method is_fit (line 47) | def is_fit(self):
method get_class_weights (line 51) | def get_class_weights(self):
method get_class_weight (line 55) | def get_class_weight(self):
method criterion (line 58) | def criterion(self):
method get_children_left (line 61) | def get_children_left(self):
method get_children_right (line 64) | def get_children_right(self):
method get_node_split (line 67) | def get_node_split(self, id) -> (float):
method get_node_feature (line 75) | def get_node_feature(self, id) -> int:
method get_features (line 82) | def get_features(self):
method get_node_samples (line 90) | def get_node_samples(self):
method get_split_samples (line 114) | def get_split_samples(self, id):
method get_root_edge_labels (line 124) | def get_root_edge_labels(self):
method get_node_nsamples (line 127) | def get_node_nsamples(self, id):
method _get_leaf_prediction_path (line 130) | def _get_leaf_prediction_path(self, leaf):
method _get_tree_dataframe (line 157) | def _get_tree_dataframe(self):
method _get_column_value (line 160) | def _get_column_value(self, column_name):
method _get_nodes_values (line 163) | def _get_nodes_values(self, column_name):
method _split_column_value (line 169) | def _split_column_value(self, column_name):
method _change_no_children_value (line 178) | def _change_no_children_value(self, children):
method _calculate_children (line 181) | def _calculate_children(self, column_name):
method get_feature_path_importance (line 187) | def get_feature_path_importance(self, node_list):
method get_node_criterion (line 190) | def get_node_criterion(self):
method get_thresholds (line 194) | def get_thresholds(self):
method get_node_nsamples_by_class (line 199) | def get_node_nsamples_by_class(self, id):
method get_prediction (line 205) | def get_prediction(self, id):
method is_classifier (line 214) | def is_classifier(self):
method nnodes (line 222) | def nnodes(self):
method nclasses (line 225) | def nclasses(self):
method classes (line 231) | def classes(self):
method get_max_depth (line 235) | def get_max_depth(self):
method get_score (line 238) | def get_score(self):
method get_min_samples_leaf (line 241) | def get_min_samples_leaf(self):
method shouldGoLeftAtSplit (line 244) | def shouldGoLeftAtSplit(self, id, x):
FILE: dtreeviz/trees.py
class DTreeVizAPI (line 33) | class DTreeVizAPI:
method __init__ (line 41) | def __init__(self, shadow_tree: ShadowDecTree, ai_chat: bool = False, ...
method _require_ai_explanation (line 54) | def _require_ai_explanation():
method chat (line 68) | def chat(self, question, stream=True):
method _chat_invoke (line 84) | def _chat_invoke(self, question):
method _chat_stream (line 111) | def _chat_stream(self, question):
method leaf_sizes (line 240) | def leaf_sizes(self,
method ctree_leaf_distributions (line 344) | def ctree_leaf_distributions(self,
method view (line 486) | def view(self,
method leaf_purity (line 1061) | def leaf_purity(self,
method node_stats (line 1176) | def node_stats(self, node_id: int, ai_chat: Optional[bool] = None) -> ...
method instance_feature_importance (line 1223) | def instance_feature_importance(self, x,
method explain_prediction_path (line 1279) | def explain_prediction_path(self, x: np.ndarray) -> str:
method rtree_leaf_distributions (line 1301) | def rtree_leaf_distributions(self,
method ctree_feature_space (line 1364) | def ctree_feature_space(self,
method rtree_feature_space (line 1431) | def rtree_feature_space(self, fontsize: int = 10, ticks_fontsize=8, sh...
method rtree_feature_space3D (line 1487) | def rtree_feature_space3D(self,
function _class_split_viz (line 1550) | def _class_split_viz(node: ShadowDecTreeNode,
function _class_leaf_viz (line 1682) | def _class_leaf_viz(node: ShadowDecTreeNode,
function _regr_split_viz (line 1717) | def _regr_split_viz(node: ShadowDecTreeNode,
function _regr_leaf_viz (line 1819) | def _regr_leaf_viz(node: ShadowDecTreeNode,
function _draw_legend (line 1868) | def _draw_legend(shadow_tree, target_name, filename, colors, fontname):
function _draw_piechart (line 1892) | def _draw_piechart(counts, size, colors, filename, label, fontname, grap...
function _draw_barh_chart (line 1929) | def _draw_barh_chart(counts, size, colors, filename, label, fontname, gr...
function _prop_size (line 1966) | def _prop_size(n, counts, output_range=(0.00, 0.3)):
function _get_num_bins (line 1978) | def _get_num_bins(histtype, n_classes):
function _get_leaf_target_input (line 1985) | def _get_leaf_target_input(shadow_tree: ShadowDecTree, precision: int):
function _ctreeviz_univar (line 2008) | def _ctreeviz_univar(shadow_tree,
function _ctreeviz_bivar (line 2108) | def _ctreeviz_bivar(shadow_tree, fontsize, ticks_fontsize, fontname, show,
function _rtreeviz_univar (line 2168) | def _rtreeviz_univar(shadow_tree, fontsize, ticks_fontsize, fontname, show,
function _rtreeviz_bivar_heatmap (line 2231) | def _rtreeviz_bivar_heatmap(shadow_tree, fontsize, ticks_fontsize, fontn...
function _rtreeviz_bivar_3D (line 2290) | def _rtreeviz_bivar_3D(shadow_tree, fontsize, ticks_fontsize, fontname,
function model (line 2353) | def model(model,
FILE: dtreeviz/utils.py
function criterion_remapping (line 17) | def criterion_remapping(criterion):
function inline_svg_images (line 32) | def inline_svg_images(svg) -> str:
function get_SVG_shape (line 97) | def get_SVG_shape(svg) -> Tuple[Number,Number,Sequence[Number]]:
function scale_SVG (line 112) | def scale_SVG(svg:str, scale:float) -> str:
function myround (line 148) | def myround(v,ndigits=2):
function _extract_final_feature_names (line 152) | def _extract_final_feature_names(pipeline, features):
function _normalize_class_names (line 184) | def _normalize_class_names(class_names, nclasses):
function extract_params_from_pipeline (line 197) | def extract_params_from_pipeline(pipeline, X_train, feature_names):
function check_tree_index (line 226) | def check_tree_index(tree_index, nr_of_trees):
class DTreeVizRender (line 233) | class DTreeVizRender:
method __init__ (line 237) | def __init__(self, dot, scale=1.0):
method _repr_svg_ (line 241) | def _repr_svg_(self):
method svg (line 244) | def svg(self):
method view (line 251) | def view(self):
method show (line 257) | def show(self):
method save_svg (line 262) | def save_svg(self):
method save (line 269) | def save(self, filename):
function add_classifier_legend (line 316) | def add_classifier_legend(ax, class_names, class_values, facecolors, tar...
function _format_axes (line 347) | def _format_axes(ax, xlabel, ylabel, colors, fontsize, fontname, ticks_f...
function _draw_wedge (line 373) | def _draw_wedge(ax, x, node, color, is_classifier, h=None, height_range=...
function _set_wedge_ticks (line 416) | def _set_wedge_ticks(ax, ax_ticks, wedge_ticks, separation=0.1):
function tessellate (line 443) | def tessellate(root, X_train, featidx):
function is_numeric (line 482) | def is_numeric(A:np.ndarray) -> bool:
FILE: testing/cancer.py
function viz_breast_cancer (line 27) | def viz_breast_cancer(orientation="TD",
FILE: testing/gen_feature_space_samples.py
function viz_digits (line 22) | def viz_digits(features, feature_names, max_depth):
function viz_wine (line 48) | def viz_wine(features, feature_names, max_depth):
function viz_knowledge (line 74) | def viz_knowledge(features, feature_names, max_depth):
function viz_diabetes (line 103) | def viz_diabetes(features, feature_names, max_depth):
function viz_boston (line 129) | def viz_boston(features, feature_names, max_depth):
FILE: testing/gen_samples.py
function viz_boston (line 36) | def viz_boston(orientation="TD",
function viz_diabetes (line 74) | def viz_diabetes(orientation="TD",
function viz_sweets (line 107) | def viz_sweets(orientation="TD",
function viz_fires (line 144) | def viz_fires(orientation="TD",
function viz_iris (line 186) | def viz_iris(orientation="TD",
function viz_digits (line 227) | def viz_digits(orientation="TD",
function viz_wine (line 264) | def viz_wine(orientation="TD",
function viz_breast_cancer (line 297) | def viz_breast_cancer(orientation="TD",
function viz_knowledge (line 330) | def viz_knowledge(orientation="TD",
function save (line 367) | def save(name, dirname, orientation, max_depth, fancy=True, pickX=False,...
FILE: testing/iris.py
function viz_iris (line 25) | def viz_iris(orientation="TD",
FILE: testing/paper_examples.py
function viz_boston_one_feature (line 7) | def viz_boston_one_feature(orientation="TD", max_depth=3, random_state=6...
function viz_knowledge_one_feature (line 24) | def viz_knowledge_one_feature(orientation="TD", max_depth=3, random_stat...
FILE: testing/testlib/models/conftest.py
function dataset (line 11) | def dataset() -> pd.DataFrame:
function x_dataset_classifier (line 16) | def x_dataset_classifier() -> pd.DataFrame:
function y_dataset_classifier (line 22) | def y_dataset_classifier() -> pd.Series:
function x_dataset_regressor (line 27) | def x_dataset_regressor() -> pd.DataFrame:
function y_dataset_regressor (line 33) | def y_dataset_regressor(dataset) -> pd.Series:
function dataset_spark_tensorflow (line 38) | def dataset_spark_tensorflow() -> pd.DataFrame:
function dataset_lightgbm (line 42) | def dataset_lightgbm() -> pd.DataFrame:
FILE: testing/testlib/models/test_decision_tree_lightgbm_classifier.py
function lgb_dec_tree (line 8) | def lgb_dec_tree():
function shadow_dec_tree (line 13) | def shadow_dec_tree(lgb_dec_tree, dataset_lightgbm) -> ShadowLightGBMTree:
function test_is_fit (line 19) | def test_is_fit(shadow_dec_tree: ShadowLightGBMTree):
function test_is_classifier (line 23) | def test_is_classifier(shadow_dec_tree: ShadowLightGBMTree):
function test_get_children_left (line 27) | def test_get_children_left(shadow_dec_tree: ShadowLightGBMTree):
function test_get_children_right (line 32) | def test_get_children_right(shadow_dec_tree):
function test_get_node_nsamples (line 37) | def test_get_node_nsamples(shadow_dec_tree):
function test_get_thresholds (line 47) | def test_get_thresholds(shadow_dec_tree: ShadowLightGBMTree):
function test_nnodes (line 58) | def test_nnodes(shadow_dec_tree):
function test_get_features (line 62) | def test_get_features(shadow_dec_tree: ShadowLightGBMTree):
function test_nclasses (line 67) | def test_nclasses(shadow_dec_tree: ShadowLightGBMTree):
function test_classes (line 71) | def test_classes(shadow_dec_tree: ShadowLightGBMTree):
function test_get_node_samples (line 75) | def test_get_node_samples(shadow_dec_tree: ShadowLightGBMTree):
function test_get_split_samples (line 86) | def test_get_split_samples(shadow_dec_tree: ShadowLightGBMTree):
function test_get_min_samples_leaf (line 106) | def test_get_min_samples_leaf(shadow_dec_tree: ShadowLightGBMTree):
FILE: testing/testlib/models/test_decision_tree_spark_classifier.py
function tree_model (line 11) | def tree_model() -> (DecisionTreeClassificationModel):
function spark_dtree (line 25) | def spark_dtree(tree_model, dataset_spark_tensorflow) -> ShadowSparkTree:
function test_is_fit (line 31) | def test_is_fit(spark_dtree):
function test_is_classifier (line 35) | def test_is_classifier(spark_dtree):
function test_get_children_left (line 39) | def test_get_children_left(spark_dtree):
function test_get_children_right (line 44) | def test_get_children_right(spark_dtree):
function test_get_node_nsamples (line 49) | def test_get_node_nsamples(spark_dtree):
function test_get_features (line 59) | def test_get_features(spark_dtree):
function test_nclasses (line 65) | def test_nclasses(spark_dtree):
function test_get_node_feature (line 69) | def test_get_node_feature(spark_dtree):
function test_get_node_nsamples_by_class (line 79) | def test_get_node_nsamples_by_class(spark_dtree):
function test_get_prediction (line 89) | def test_get_prediction(spark_dtree):
function test_nnodes (line 101) | def test_nnodes(spark_dtree):
function test_get_max_depth (line 105) | def test_get_max_depth(spark_dtree):
function test_get_min_samples_leaf (line 109) | def test_get_min_samples_leaf(spark_dtree):
function test_get_thresholds (line 113) | def test_get_thresholds(spark_dtree):
function test_predict (line 120) | def test_predict(spark_dtree, dataset_spark_tensorflow):
function test_predict_path (line 134) | def test_predict_path(spark_dtree, dataset_spark_tensorflow):
function test_get_node_samples (line 151) | def test_get_node_samples(spark_dtree):
function test_is_categorical_split (line 160) | def test_is_categorical_split(spark_dtree):
FILE: testing/testlib/models/test_decision_tree_tensorflow_classifier.py
function features_clf (line 9) | def features_clf():
function tf_rf_model (line 14) | def tf_rf_model(dataset_spark_tensorflow):
function tf_shadow_clf (line 30) | def tf_shadow_clf(tf_rf_model, dataset_spark_tensorflow, features_clf):
function test_is_fit (line 44) | def test_is_fit(tf_shadow_clf):
function test_get_children_left (line 48) | def test_get_children_left(tf_shadow_clf):
function test_get_children_right (line 52) | def test_get_children_right(tf_shadow_clf):
function test_nclasses (line 56) | def test_nclasses(tf_shadow_clf):
function test_classes (line 60) | def test_classes(tf_shadow_clf):
function test_get_features (line 64) | def test_get_features(tf_shadow_clf):
function test_get_node_feature (line 68) | def test_get_node_feature(tf_shadow_clf):
function test_get_thresholds (line 74) | def test_get_thresholds(tf_shadow_clf):
function test_get_node_samples (line 78) | def test_get_node_samples(tf_shadow_clf):
function test_get_node_nsamples (line 85) | def test_get_node_nsamples(tf_shadow_clf):
function test_get_node_nsamples_by_class (line 92) | def test_get_node_nsamples_by_class(tf_shadow_clf):
function test_get_prediction (line 98) | def test_get_prediction(tf_shadow_clf):
function test_get_max_depth (line 108) | def test_get_max_depth(tf_shadow_clf):
function test_is_categorical_split (line 112) | def test_is_categorical_split(tf_shadow_clf):
FILE: testing/testlib/models/test_decision_tree_xgb_regressor.py
function xgb_booster (line 10) | def xgb_booster() -> xgb.Booster:
function xgb_tree (line 15) | def xgb_tree(xgb_booster, x_dataset_regressor, y_dataset_regressor) -> S...
function test_x_dataset (line 21) | def test_x_dataset(x_dataset_regressor):
function test_y_dataset (line 27) | def test_y_dataset(y_dataset_regressor):
function test_feature_names (line 33) | def test_feature_names(xgb_booster):
function test_get_prediction (line 37) | def test_get_prediction(xgb_tree):
function test_get_max_depth (line 47) | def test_get_max_depth(xgb_tree):
function test_get_leaf_sample_counts (line 51) | def test_get_leaf_sample_counts(xgb_tree):
FILE: testing/testlib/models/test_decision_trees_sk_classifier.py
function dec_tree (line 10) | def dec_tree() -> (DecisionTreeClassifier, DecisionTreeRegressor):
function shadow_dec_tree (line 15) | def shadow_dec_tree(dec_tree, dataset) -> ShadowSKDTree:
function test_x_dataset (line 22) | def test_x_dataset(x_dataset_classifier):
function test_feature_number (line 28) | def test_feature_number(shadow_dec_tree):
function test_is_fit (line 32) | def test_is_fit(shadow_dec_tree):
function test_is_classifier (line 36) | def test_is_classifier(shadow_dec_tree):
function test_class_weight (line 40) | def test_class_weight(shadow_dec_tree):
function test_criterion (line 44) | def test_criterion(shadow_dec_tree):
function test_nclasses (line 48) | def test_nclasses(shadow_dec_tree):
function test_classes (line 52) | def test_classes(shadow_dec_tree):
function test_get_node_samples (line 57) | def test_get_node_samples(shadow_dec_tree):
function test_get_class_weights (line 66) | def test_get_class_weights(shadow_dec_tree):
function test_get_tree_nodes (line 70) | def test_get_tree_nodes(shadow_dec_tree):
function test_get_children_left (line 75) | def test_get_children_left(shadow_dec_tree):
function test_get_children_right (line 80) | def test_get_children_right(shadow_dec_tree):
function test_get_node_split (line 85) | def test_get_node_split(shadow_dec_tree):
function test_get_node_feature (line 92) | def test_get_node_feature(shadow_dec_tree):
function test_get_max_depth (line 100) | def test_get_max_depth(shadow_dec_tree):
function test_get_score (line 104) | def test_get_score(shadow_dec_tree):
function test_get_min_samples_leaf (line 108) | def test_get_min_samples_leaf(shadow_dec_tree): \
function test_nnodes (line 112) | def test_nnodes(shadow_dec_tree):
function test_get_leaf_sample_counts (line 116) | def test_get_leaf_sample_counts(shadow_dec_tree):
function test_get_thresholds (line 124) | def test_get_thresholds(shadow_dec_tree):
function test_predict (line 129) | def test_predict(shadow_dec_tree, x_dataset_classifier):
function test_predict_path (line 146) | def test_predict_path(shadow_dec_tree, x_dataset_classifier):
function test_get_prediction (line 165) | def test_get_prediction(shadow_dec_tree):
function test_get_node_nsamples_by_class (line 176) | def test_get_node_nsamples_by_class(shadow_dec_tree):
FILE: testing/testlib/models/test_decision_trees_sk_pipeline.py
function dec_tree_pipeline (line 26) | def dec_tree_pipeline(x_dataset_classifier, y_dataset_classifier) -> (Pi...
function shadow_dec_tree (line 37) | def shadow_dec_tree(dec_tree_pipeline, dataset) -> ShadowSKDTree:
function test_extract_feature_names (line 56) | def test_extract_feature_names(dec_tree_pipeline):
function test_feature_number (line 70) | def test_feature_number(shadow_dec_tree):
function test_is_fit (line 74) | def test_is_fit(shadow_dec_tree):
FILE: testing/testlib/models/test_decision_trees_xgb_classifier.py
function xgb_booster (line 10) | def xgb_booster() -> xgb.Booster:
function xgb_tree (line 15) | def xgb_tree(xgb_booster, x_dataset_classifier, y_dataset_classifier) ->...
function test_x_dataset (line 22) | def test_x_dataset(x_dataset_classifier):
function test_y_dataset (line 28) | def test_y_dataset(y_dataset_classifier):
function test_feature_names (line 33) | def test_feature_names(xgb_booster):
function test_get_children_left (line 37) | def test_get_children_left(xgb_tree):
function test_get_children_right (line 42) | def test_get_children_right(xgb_tree):
function test_get_node_feature (line 46) | def test_get_node_feature(xgb_tree):
function test_get_features (line 54) | def test_get_features(xgb_tree):
function test_get_node_samples (line 58) | def test_get_node_samples(xgb_tree):
function test_get_node_nsamples_by_class (line 69) | def test_get_node_nsamples_by_class(xgb_tree):
function test_get_prediction (line 76) | def test_get_prediction(xgb_tree):
function test_nclasses (line 83) | def test_nclasses(xgb_tree):
function test_classes (line 87) | def test_classes(xgb_tree):
function test_get_thresholds (line 91) | def test_get_thresholds(xgb_tree):
function test_is_classifier (line 95) | def test_is_classifier(xgb_tree):
function test_get_leaf_sample_counts (line 99) | def test_get_leaf_sample_counts(xgb_tree):
FILE: testing/testone.py
function viz_iris (line 8) | def viz_iris(orientation="TD", max_depth=5, random_state=666, fancy=True):
function viz_boston (line 42) | def viz_boston(orientation="TD", max_depth=3, random_state=666, fancy=Tr...
function viz_knowledge (line 63) | def viz_knowledge(orientation="TD", max_depth=3, random_state=666, fancy...
function viz_diabetes (line 83) | def viz_diabetes(orientation="TD", max_depth=3, random_state=666, fancy=...
function viz_digits (line 117) | def viz_digits(orientation="TD", max_depth=3, random_state=666, fancy=Tr...
function viz_wine (line 137) | def viz_wine(orientation="TD", max_depth=3, random_state=666, fancy=True...
function weird_binary_case (line 159) | def weird_binary_case():
FILE: testing/tf-catvars.py
function split_dataset (line 15) | def split_dataset(dataset, test_ratio=0.30):
FILE: testing/tf_catvars3.py
function split_dataset (line 10) | def split_dataset(dataset, test_ratio=0.30):
FILE: testing/tf_regr_catvars.py
function split_dataset (line 15) | def split_dataset(dataset, test_ratio=0.30, seed=1234):
Copy disabled (too large)
Download .json
Condensed preview — 89 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (54,590K chars).
[
{
"path": ".gitignore",
"chars": 1210,
"preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packagi"
},
{
"path": "LICENSE",
"chars": 1069,
"preview": "MIT License\n\nCopyright (c) 2021 Terence Parr\n\nPermission is hereby granted, free of charge, to any person obtaining a co"
},
{
"path": "README.md",
"chars": 18606,
"preview": "# dtreeviz : Decision Tree Visualization\n\n## Description\n\nA python library for decision tree visualization and model int"
},
{
"path": "data/cars.csv",
"chars": 5996,
"preview": "MPG,CYL,ENG,WGT\n18,8,307,3504\n15,8,350,3693\n18,8,318,3436\n16,8,304,3433\n17,8,302,3449\n15,8,429,4341\n14,8,454,4354\n14,8,4"
},
{
"path": "data/titanic/titanic.csv",
"chars": 60302,
"preview": "PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked\n1,0,3,\"Braund, Mr. Owen Harris\",male,22,"
},
{
"path": "developer-cert-of-origin.txt",
"chars": 1882,
"preview": "dtreeviz uses the Linux Foundation's Developer\nCertificate of Origin, DCO, version 1.1. See either\nhttps://developercert"
},
{
"path": "dtreeviz/__init__.py",
"chars": 596,
"preview": "from .version import __version__\n\n# NEW API\n# import dtreeviz\n# call m = dtreeviz.model(...) then m.view() etc...\nfrom d"
},
{
"path": "dtreeviz/ai_explanation.py",
"chars": 17380,
"preview": "import datetime\nimport json\nimport os\n\nimport numpy as np\nimport pandas as pd\nfrom langchain_openai import ChatOpenAI\nfr"
},
{
"path": "dtreeviz/classifiers.py",
"chars": 24724,
"preview": "from typing import Tuple\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nfrom PIL import ImageCo"
},
{
"path": "dtreeviz/colors.py",
"chars": 4174,
"preview": "import matplotlib\nimport numpy as np\n\nYELLOW = '#fefecd'\nGREEN = '#cfe2d4'\nDARKBLUE = '#313695'\nBLUE = '#4575b4'\nDARKGRE"
},
{
"path": "dtreeviz/compatibility.py",
"chars": 35372,
"preview": "# Functions to support backward compatibility to pre-2.0 API\nimport warnings\nfrom numbers import Number\nfrom typing impo"
},
{
"path": "dtreeviz/interpretation.py",
"chars": 7577,
"preview": "\"\"\"\nPrediction path interpretation for decision tree models.\nIn this moment, it contains \"plain english\" implementation,"
},
{
"path": "dtreeviz/models/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "dtreeviz/models/lightgbm_decision_tree.py",
"chars": 8271,
"preview": "from collections import defaultdict\nfrom typing import List, Mapping\n\nimport numpy as np\nimport pandas as pd\nfrom lightg"
},
{
"path": "dtreeviz/models/shadow_decision_tree.py",
"chars": 22837,
"preview": "from abc import ABC, abstractmethod\nfrom numbers import Number\nfrom typing import List, Tuple, Mapping\n\nimport numpy as "
},
{
"path": "dtreeviz/models/sklearn_decision_trees.py",
"chars": 5255,
"preview": "from collections import defaultdict\nfrom typing import List, Mapping\n\nimport numpy as np\nfrom sklearn.utils import compu"
},
{
"path": "dtreeviz/models/spark_decision_tree.py",
"chars": 9121,
"preview": "from abc import ABC\nfrom collections import defaultdict\nfrom typing import List, Mapping\n\nimport numpy as np\nimport pysp"
},
{
"path": "dtreeviz/models/tensorflow_decision_tree.py",
"chars": 8886,
"preview": "from collections import defaultdict\nfrom typing import List, Mapping\n\nimport numpy as np\nimport tensorflow_decision_fore"
},
{
"path": "dtreeviz/models/xgb_decision_tree.py",
"chars": 8979,
"preview": "import json\nimport math\nfrom collections import defaultdict\nfrom typing import List, Mapping\n\nimport numpy as np\n\nfrom d"
},
{
"path": "dtreeviz/trees.py",
"chars": 111974,
"preview": "import os\nimport tempfile\nimport warnings\nfrom typing import Mapping, List, Callable, Optional\n\n\nimport matplotlib\nimpor"
},
{
"path": "dtreeviz/utils.py",
"chars": 17511,
"preview": "import os\nimport re\nimport tempfile\nimport warnings\nimport xml.etree.cElementTree as ET\nfrom pathlib import Path\nfrom sy"
},
{
"path": "dtreeviz/version.py",
"chars": 1099,
"preview": "\"\"\"\nMIT License\n\nCopyright (c) 2023 Terence Parr\n\nPermission is hereby granted, free of charge, to any person obtaining "
},
{
"path": "notebooks/classifier-boundary-animations.ipynb",
"chars": 9913,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# Animations showing feature space "
},
{
"path": "notebooks/classifier-decision-boundaries.ipynb",
"chars": 5503803,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# Feature space and classification "
},
{
"path": "notebooks/dtreeviz_lightgbm_visualisations.ipynb",
"chars": 6882870,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# `dtreeviz` LightGBM Examples\\n\",\n"
},
{
"path": "notebooks/dtreeviz_sklearn_AI_visualisations.ipynb",
"chars": 7584547,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# `dtreeviz` scikit-learn Examples\\"
},
{
"path": "notebooks/dtreeviz_sklearn_pipeline_visualisations.ipynb",
"chars": 1996126,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"execution_count\": 1,\n \"metadata\": {},\n \"outputs\": [],\n \"source\": [\n "
},
{
"path": "notebooks/dtreeviz_sklearn_visualisations.ipynb",
"chars": 6200070,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# `dtreeviz` scikit-learn Examples\\"
},
{
"path": "notebooks/dtreeviz_spark_visualisations.ipynb",
"chars": 5369571,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# `dtreeviz` Spark Examples\\n\",\n "
},
{
"path": "notebooks/dtreeviz_tensorflow_visualisations.ipynb",
"chars": 5146052,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"id\": \"dc54a482-1a08-4042-baa7-2ab017b8ce35\",\n \"metadata\": {},\n \"so"
},
{
"path": "notebooks/dtreeviz_xgboost_visualisations.ipynb",
"chars": 4967971,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# `dtreeviz` XGBoost Examples\\n\",\n "
},
{
"path": "play.ipynb",
"chars": 103509,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"execution_count\": 1,\n \"metadata\": {},\n \"outputs\": [],\n \"source\": [\n "
},
{
"path": "releasing.txt",
"chars": 266,
"preview": "# Releasing dtreeviz\n\n1. Update dtreeviz/version.py, setup.py, README, releasing.txt.\n2. Merge dev into master\n3. Create"
},
{
"path": "scripts/github_release_notes.py",
"chars": 1553,
"preview": "# Get github issues / PR for a release\n# Exec with \"python github_release_notes.py YOUR_GITHUB_API_ACCESS_TOKEN 2.1.0\"\n\n"
},
{
"path": "setup.cfg",
"chars": 40,
"preview": "[metadata]\ndescription-file = README.md\n"
},
{
"path": "setup.py",
"chars": 2654,
"preview": "from setuptools import setup, find_packages\n\n# To RELEASE:\n#\n# $ python3 -m build\n# $ twine upload dist/dtreeviz-1.4.0.t"
},
{
"path": "testing/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "testing/animate_rtree_bivar_3D.py",
"chars": 1878,
"preview": "# This is broken\n\nimport pandas as pd\nfrom sklearn.model_selection import train_test_split\nimport matplotlib.pyplot as p"
},
{
"path": "testing/bin/icons.sh",
"chars": 256,
"preview": "# From current dir, convert all pdf to 40% size for icons\n\nfor f in $(basename -s '.pdf' *.pdf)\ndo\n\tif test $f.pdf -nt $"
},
{
"path": "testing/bin/topng.sh",
"chars": 194,
"preview": "# From current dir, convert all pdf to png\n\nfor f in $(basename -s '.pdf' *.pdf)\ndo\n\tif test $f.pdf -nt $f.png \n\tthen\n\t\t"
},
{
"path": "testing/cancer.py",
"chars": 1754,
"preview": "# -*- coding: utf-8 -*-\nimport numpy as np\nimport pandas as pd\nimport graphviz\nimport graphviz.backend\nfrom numpy.distut"
},
{
"path": "testing/data/cars.csv",
"chars": 5996,
"preview": "MPG,CYL,ENG,WGT\n18,8,307,3504\n15,8,350,3693\n18,8,318,3436\n16,8,304,3433\n17,8,302,3449\n15,8,429,4341\n14,8,454,4354\n14,8,4"
},
{
"path": "testing/data/forestfires.csv",
"chars": 25478,
"preview": "X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area\n7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0,0\n7,4,oct,tue,90.6,35.4"
},
{
"path": "testing/data/knowledge.csv",
"chars": 7818,
"preview": "STG,SCG,STR,LPR,PEG,UNS\n0,0,0,0,0,very_low\n0.08,0.08,0.1,0.24,0.9,High\n0.06,0.06,0.05,0.25,0.33,Low\n0.1,0.1,0.15,0.65,0."
},
{
"path": "testing/data/sweetrs.csv",
"chars": 154937,
"preview": "userID,productID,rating\n351,31,0\n57,9,3\n385,30,1\n286,23,4\n126,16,3\n371,46,0\n131,20,5\n116,17,3\n364,18,3\n180,40,2\n317,8,2\n"
},
{
"path": "testing/gen_feature_space_samples.py",
"chars": 7263,
"preview": "import numpy as np\nimport pandas as pd\nimport graphviz\nimport graphviz.backend\nfrom numpy.distutils.system_info import f"
},
{
"path": "testing/gen_samples.py",
"chars": 13759,
"preview": "# -*- coding: utf-8 -*-\nimport numpy as np\nimport pandas as pd\nimport graphviz\nimport graphviz.backend\nfrom numpy.distut"
},
{
"path": "testing/iris.py",
"chars": 1967,
"preview": "# -*- coding: utf-8 -*-\nimport numpy as np\nimport pandas as pd\nimport graphviz\nimport graphviz.backend\nfrom numpy.distut"
},
{
"path": "testing/issues_investigations.ipynb",
"chars": 47258,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"execution_count\": 2,\n \"metadata\": {},\n \"outputs\": [],\n \"source\": [\n "
},
{
"path": "testing/paper_examples.py",
"chars": 1861,
"preview": "import numpy as np\nfrom animl.trees import *\nfrom animl.viz.trees import *\n\nparrt_article = \"/Users/parrt/github/ml-arti"
},
{
"path": "testing/play_ctree.py",
"chars": 703,
"preview": "from dtreeviz.trees import *\n\nknow = pd.read_csv(\"data/knowledge.csv\")\nclass_names = ['very_low', 'Low', 'Middle', 'High"
},
{
"path": "testing/play_ctree_bivar.py",
"chars": 724,
"preview": "from dtreeviz.trees import *\n\nknow = pd.read_csv(\"data/knowledge.csv\")\nclass_names = ['very_low', 'Low', 'Middle', 'High"
},
{
"path": "testing/play_lightgbm.py",
"chars": 2247,
"preview": "import sys\nimport os\n\nimport numpy as np\nimport pandas as pd\nimport lightgbm as lgb\nfrom sklearn.model_selection import "
},
{
"path": "testing/play_rtree.py",
"chars": 550,
"preview": "from dtreeviz.trees import *\n\ndf_cars = pd.read_csv(\"data/cars.csv\")\nX = df_cars.drop('MPG', axis=1)\ny = df_cars['MPG']\n"
},
{
"path": "testing/play_rtree_bivar_3D.py",
"chars": 817,
"preview": "from mpl_toolkits.mplot3d import Axes3D # noqa: F401 unused import\nfrom dtreeviz.trees import *\n\ndf_cars = pd.read_csv("
},
{
"path": "testing/play_rtree_bivar_heatmap.py",
"chars": 565,
"preview": "from dtreeviz.trees import *\n\ndf_cars = pd.read_csv(\"data/cars.csv\")\nX = df_cars.drop('MPG', axis=1)\ny = df_cars['MPG']\n"
},
{
"path": "testing/play_spark.py",
"chars": 3068,
"preview": "import pandas as pd\nimport numpy as np\n\nimport pyspark\nfrom pyspark.sql import SparkSession\nfrom pyspark.ml.feature impo"
},
{
"path": "testing/playground.ipynb",
"chars": 5286566,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"execution_count\": 1,\n \"metadata\": {},\n \"outputs\": [\n {\n \"data\":"
},
{
"path": "testing/slides.ipynb",
"chars": 173697,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"execution_count\": 13,\n \"metadata\": {},\n \"outputs\": [],\n \"source\": [\n"
},
{
"path": "testing/testlib/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "testing/testlib/models/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "testing/testlib/models/conftest.py",
"chars": 1129,
"preview": "import pandas as pd\nimport pytest\nimport os\n\nROOT_DIR = os.path.dirname(os.path.abspath(__file__))\n\nprint(f\"ROOT_DIR {RO"
},
{
"path": "testing/testlib/models/fixtures/dataset.csv",
"chars": 545,
"preview": "Pclass,Age,Fare,Sex_label,Cabin_label,Embarked_label,Survived\n3,22.0,7.25,1,-1,2,0\n1,38.0,71.2833,0,81,0,1\n3,26.0,7.925,"
},
{
"path": "testing/testlib/models/fixtures/dataset_lightgbm.csv",
"chars": 20372,
"preview": ",Pclass,Age,Fare,Sex_label,Cabin_label,Embarked_label,Survived\n331,1,45.5,28.5,1,56,2,0\n733,2,23.0,13.0,1,-1,2,0\n382,3,3"
},
{
"path": "testing/testlib/models/fixtures/dataset_spark_tf.csv",
"chars": 26473,
"preview": "Pclass,Sex_label,Embarked_label,Age_mean,SibSp,Parch,Fare,Survived\n3,0.0,0.0,22.0,1,0,7.25,0\n1,1.0,1.0,38.0,1,0,71.2833,"
},
{
"path": "testing/testlib/models/fixtures/lightgbm_model_classifier.txt",
"chars": 137676,
"preview": "tree\nversion=v3\nnum_class=1\nnum_tree_per_iteration=1\nlabel_index=0\nmax_feature_idx=5\nobjective=binary sigmoid:1\nfeature_"
},
{
"path": "testing/testlib/models/fixtures/spark_2_decision_tree_classifier.model/metadata/part-00000",
"chars": 625,
"preview": "{\"class\":\"org.apache.spark.ml.classification.DecisionTreeClassificationModel\",\"timestamp\":1601631161317,\"sparkVersion\":\""
},
{
"path": "testing/testlib/models/fixtures/spark_3_0_decision_tree_classifier.model/metadata/part-00000",
"chars": 656,
"preview": "{\"class\":\"org.apache.spark.ml.classification.DecisionTreeClassificationModel\",\"timestamp\":1596202077652,\"sparkVersion\":\""
},
{
"path": "testing/testlib/models/test_decision_tree_lightgbm_classifier.py",
"chars": 3879,
"preview": "import lightgbm as lgb\nimport pytest\nfrom dtreeviz.models.lightgbm_decision_tree import ShadowLightGBMTree\nimport numpy "
},
{
"path": "testing/testlib/models/test_decision_tree_spark_classifier.py",
"chars": 7608,
"preview": "import numpy as np\nimport pyspark\nimport pytest\nfrom pyspark.ml.classification import DecisionTreeClassificationModel\nfr"
},
{
"path": "testing/testlib/models/test_decision_tree_tensorflow_classifier.py",
"chars": 4051,
"preview": "import pytest\nimport numpy as np\n\nimport tensorflow_decision_forests as tf_df\nfrom dtreeviz.models.tensorflow_decision_t"
},
{
"path": "testing/testlib/models/test_decision_tree_xgb_regressor.py",
"chars": 2161,
"preview": "import joblib\nimport numpy as np\nimport pytest\nimport xgboost as xgb\n\nfrom dtreeviz.models.xgb_decision_tree import Shad"
},
{
"path": "testing/testlib/models/test_decision_trees_sk_classifier.py",
"chars": 7565,
"preview": "import joblib\nimport numpy as np\nimport pytest\nfrom sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier\n\nf"
},
{
"path": "testing/testlib/models/test_decision_trees_sk_pipeline.py",
"chars": 2467,
"preview": "import numpy as np\nimport pytest\nfrom sklearn.tree import DecisionTreeClassifier\nfrom sklearn.feature_selection import V"
},
{
"path": "testing/testlib/models/test_decision_trees_xgb_classifier.py",
"chars": 3887,
"preview": "import joblib\nimport numpy as np\nimport pytest\nimport xgboost as xgb\n\nfrom dtreeviz.models.xgb_decision_tree import Shad"
},
{
"path": "testing/testone.py",
"chars": 7207,
"preview": "import graphviz\nfrom dtreeviz.shadow import *\nfrom gen_samples import *\nimport tempfile\nfrom sklearn.tree import export_"
},
{
"path": "testing/tf-catvars.py",
"chars": 2412,
"preview": "import tensorflow_decision_forests as tfdf\nimport dtreeviz\nprint(tfdf.__version__, dtreeviz.__version__ )\n\nimport tensor"
},
{
"path": "testing/tf_catvars2.py",
"chars": 1897,
"preview": "import tensorflow_decision_forests as tfdf\n\nimport numpy as np\nimport pandas as pd\n\nimport dtreeviz\n\nnp.random.seed(2)\n\n"
},
{
"path": "testing/tf_catvars3.py",
"chars": 1788,
"preview": "import tensorflow_decision_forests as tfdf\n\nimport numpy as np\nimport pandas as pd\n\nimport dtreeviz\n\nnp.random.seed(1)\n\n"
},
{
"path": "testing/tf_regr_catvars.py",
"chars": 1941,
"preview": "import tensorflow_decision_forests as tfdf\n\nimport tensorflow as tf\n\nimport os\nimport numpy as np\nimport pandas as pd\nim"
}
]
// ... and 9 more files (download for full content)
About this extraction
This page contains the full source code of the parrt/animl GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 89 files (194.9 MB), approximately 12.5M tokens, and a symbol index with 450 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.