Repository: kubeflow/example-seldon
Branch: master
Commit: d0e056aaa7ec
Files: 69
Total size: 98.7 KB

Directory structure:
gitextract_39vdm35i/

├── .gitignore
├── LICENSE
├── OWNERS
├── README.md
├── VERSION
├── k8s_serving/
│   ├── ab_test_sklearn_tensorflow.json
│   ├── ambassador-auth-service-config.yaml
│   ├── ambassador-auth-service-setup.yaml
│   ├── epsilon_greedy.json
│   ├── epsilon_greedy_3way.json
│   ├── serving_model.json
│   ├── serving_r_model.json
│   └── serving_sk_model.json
├── k8s_train/
│   ├── sklearn_training_job.yaml
│   └── tfJob.json
├── models/
│   ├── r_mnist/
│   │   ├── runtime/
│   │   │   ├── Dockerfile
│   │   │   ├── Makefile
│   │   │   ├── install.R
│   │   │   └── mnist.R
│   │   └── train/
│   │       ├── Dockerfile
│   │       ├── Makefile
│   │       ├── get_data.sh
│   │       ├── install.R
│   │       ├── train.R
│   │       └── train.sh
│   ├── sk_mnist/
│   │   ├── runtime/
│   │   │   ├── Dockerfile
│   │   │   ├── Makefile
│   │   │   ├── SkMnist.py
│   │   │   ├── contract.json
│   │   │   └── requirements.txt
│   │   └── train/
│   │       ├── Dockerfile
│   │       ├── Makefile
│   │       ├── create_model.py
│   │       ├── requirements.txt
│   │       └── train.sh
│   └── tf_mnist/
│       ├── runtime/
│       │   ├── DeepMnist.py
│       │   ├── Dockerfile
│       │   ├── Makefile
│       │   ├── contract.json
│       │   └── requirements.txt
│       └── train/
│           ├── Dockerfile
│           ├── Makefile
│           └── create_model.py
├── nfs.md
├── notebooks/
│   ├── Makefile
│   ├── __init__.py
│   ├── create-protos.sh
│   ├── proto/
│   │   ├── __init__.py
│   │   └── prediction.proto
│   ├── requirements.txt
│   ├── serving.ipynb
│   ├── training.ipynb
│   ├── utils.py
│   └── visualizer.py
├── scripts/
│   ├── README.md
│   ├── create_demo.sh
│   ├── delete-demo.sh
│   ├── env-example.sh
│   ├── nfs-pvc.yaml
│   ├── port-forwards.sh
│   └── watch-mnist.sh
└── workflows/
    ├── serving-r-mnist-workflow.yaml
    ├── serving-sk-mnist-workflow.yaml
    ├── serving-tf-mnist-workflow.md
    ├── serving-tf-mnist-workflow.yaml
    ├── training-r-mnist-workflow.yaml
    ├── training-sk-mnist-workflow.yaml
    ├── training-tf-mnist-workflow.md
    └── training-tf-mnist-workflow.yaml

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
# build
/target/
/public
cluster-manager/.m2/

.ipynb_checkpoints

# eclipse
.classpath
.settings/
.project

# Netbeans and IntelliJ files
!.gitignore
/nbproject
/*.ipr
/*.iws
*.iml
.idea

/bin/
*~
*.pyc
.m2
\#*
_*.yaml
_*.json


models/tf_mnist/runtime/build/
models/sk_mnist/runtime/build/

models/sk_mnist/train/mnist-original.mat
notebooks/proto/prediction_pb2.py
notebooks/proto/prediction_pb2_grpc.py
notebooks/tensorflow
scripts/kubeflow_src
scripts/env.sh


================================================
FILE: LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: OWNERS
================================================
approvers:
  - cliveseldon
  - jinchihe
  - ryandawsonuk
reviewers:
  - cliveseldon
  - jinchihe


================================================
FILE: README.md
================================================
## :warning: **kubeflow/example-seldon is not maintained**

This repository has been deprecated and [archived](https://github.com/kubeflow/community/issues/479) on Nov 30th, 2021. 


# Train and Deploy Machine Learning Models on Kubernetes with Kubeflow and Seldon-Core

![MNIST](notebooks/mnist.png "MNIST Digits")

Using:

 * [kubeflow](https://github.com/kubeflow/kubeflow)
 * [seldon-core](https://github.com/SeldonIO/seldon-core)

The example will be the MNIST handwritten digit classification task. We will train 3 different models to solve this task:

 * A TensorFlow neural network model.
 * A scikit-learn random forest model.
 * An R least squares model.

We will then show various rolling deployments

 1. Deploy the single Tensorflow model.
 2. Do a rolling update to an AB test of the Tensorflow model and the sklearn model.
 3. Do a rolling update to a Multi-armed Bandit over all 3 models to direct traffic in real time to the best model.


In the follow we will:

 1. [Install kubeflow and seldon-core on a kubernetes cluster](#setup)
 1. [Train the models](#train-the-models)
 1. [Serve the models](#serve-the-models)


# Requirements

 * gcloud
 * kubectl
 * ksonnet
 * argo


# Setup

  There is a consolidated script to create the demo which can be found [here](./scripts/README.md). For a step by step guide do the following:

  1. [Install kubeflow on GKE](https://www.kubeflow.org/docs/started/getting-started-gke/). This should create kubeflow in a namespace ```kubeflow```. We suggest you use the command line install so you can easily modify your Ksonnet installation. Ensure you have the environment variables `KUBEFLOW_SRC` and `KFAPP` set. OAUTH is preferred as with basic auth [port-forwarding to ambassador is insufficient](https://github.com/kubeflow/kubeflow/issues/3213)

  1. Install seldon. Go to your Ksonnet application folder setup in the previous step and run
      ```
      cd ${KUBEFLOW_SRC}/${KFAPP}/ks_app

      ks pkg install kubeflow/seldon
      ks generate seldon seldon
      ks apply default -c seldon
      ```
  1. Install Helm
      ```
      kubectl -n kube-system create sa tiller
      kubectl create clusterrolebinding tiller --clusterrole cluster-admin --serviceaccount=kube-system:tiller
      helm init --service-account tiller
      kubectl rollout status deploy/tiller-deploy -n kube-system
      ```
  1. Create an NFS disk and persistent volume claim called `nfs-1`. You can follow one guide on create an NFS volume using Google Filestore [here](https://cloud.google.com/community/tutorials/gke-filestore-dynamic-provisioning). A consolidated set of steps is shown [here](nfs.md)
  1. Add Cluster Roles so Argo can start jobs successfully
      ```
      kubectl create clusterrolebinding my-cluster-admin-binding --clusterrole=cluster-admin --user=$(gcloud info --format="value(config.account)")
      kubectl create clusterrolebinding default-admin2 --clusterrole=cluster-admin --serviceaccount=kubeflow:default
      ```
  1. Install Seldon Analytics Dashboard
      ```
      helm install seldon-core-analytics --name seldon-core-analytics --set grafana_prom_admin_password=password --set persistence.enabled=false --repo https://storage.googleapis.com/seldon-charts --namespace kubeflow
      ```
  1. Port forward the dashboard when running
      ```
      kubectl port-forward $(kubectl get pods -n kubeflow -l app=grafana-prom-server -o jsonpath='{.items[0].metadata.name}') -n kubeflow 3000:3000
      ```
  1. Visit http://localhost:3000/dashboard/db/prediction-analytics?refresh=5s&orgId=1 and login using "admin" and the password you set above when launching with helm.

# MNIST models

## Tensorflow Model

 * [Python training code](models/tf_mnist/train/create_model.py)
 * [Python runtime prediction code](models/tf_mnist/runtime/DeepMnist.py)
 * [Dockerfile to wrap runtime prediction code to run under seldon-Core](models/tf_mnist/runtime/Dockerfile).

## SKLearn Model

 * [Python training code](models/sk_mnist/train/create_model.py)
 * [Python runtime prediction code](models/sk_mnist/runtime/SkMnist.py)
 * [Dockerfile to wrap runtime prediction code to run under seldon-Core](models/sk_mnist/runtime/Dockerfile).

## R Model

 * [R training code](models/r_mnist/train/train.R)
 * [R runtime prediction code](models/r_mnist/runtime/mnist.R)
 * [Dockerfile to wrap runtime prediction code to run under seldon-Core](models/r_mnist/runtime/Dockerfile).

# Train the Models

 Follow the steps in [./notebooks/training.ipynb](./notebooks/training.ipynb) to:

 * Run Argo Jobs for each model to:
   * Creating training images and push to repo
   * Run training
   * Create runtime prediction images and push to repo
   * Deploy individual runtime model

**To push to your own repo the Docker images you will need to setup your docker credentials as a Kubernetes secret containing a [config.json](https://www.projectatomic.io/blog/2016/03/docker-credentials-store/). To do this you can find your docker home (typically ~/.docker) and run `kubectl create secret generic docker-config --from-file=config.json=${DOCKERHOME}/config.json --type=kubernetes.io/config` to [create a secret](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/#registry-secret-existing-credentials).**

# Serve the Models

Follow the steps in [./notebooks/serving.ipynb](./notebooks/serving.ipynb) to:

 1. Deploy the single Tensorflow model.
 2. Do a rolling update to an AB test of the Tensorflow model and the sklearn model.
 3. Do a rolling update to a Multi-armed Bandit over all 3 models to direct traffic in real time to the best model.

To ensure the notebook can run successfully install the python dependencies:

```
pip install -r notebooks/requirements.txt
```

If you have [installed the Seldon-Core analytics](#setup) you can view them on the grafana dashboard:

![Grafana](grafana.png "Grafana Dashboard")


================================================
FILE: VERSION
================================================
0.1

================================================
FILE: k8s_serving/ab_test_sklearn_tensorflow.json
================================================
{
    "apiVersion": "machinelearning.seldon.io/v1alpha2",
    "kind": "SeldonDeployment",
    "metadata": {
	"labels": {
	    "app": "seldon"
	},
	"name": "mnist-classifier"
    },
    "spec": {
	"annotations": {
	    "project_name": "kubeflow-seldon",
	    "deployment_version": "v1",
	    "seldon.io/rest-connection-timeout": "100"	    
	},
	"name": "mnist-classifier",
	"predictors": [
	    {
		"componentSpecs": [{
		    "spec": {
			"containers": [
			    {
                                "image": "seldonio/deepmnistclassifier_runtime:0.2",
				"name": "tf-model",
                                "volumeMounts": [
                                    {
                                        "mountPath": "/data",
                                        "name": "persistent-storage"
                                    }
                                ]
			    },
			    {
                                "image": "seldonio/skmnistclassifier_runtime:0.2",
				"name": "sk-model",
                                "volumeMounts": [
                                    {
                                        "mountPath": "/data",
                                        "name": "persistent-storage"
                                    }
                                ]
			    }
			],
                        "volumes": [
                            {
                                "name": "persistent-storage",
				"volumeSource" : {
                                    "persistentVolumeClaim": {
					"claimName": "nfs-1"
                                    }
				}
                            }
                        ]
		    }
		}],
		"name": "mnist-classifier",
		"replicas": 1,
		"annotations": {
		    "predictor_version": "v1"
		},
		"graph": {
		    "name": "random-ab-test",
		    "implementation":"RANDOM_ABTEST",
		    "parameters": [
			{
			    "name":"ratioA",
			    "value":"0.5",
			    "type":"FLOAT"
			}
		    ],
		    "children": [
			{
			    "name": "tf-model",
			    "endpoint":{
				"type":"REST"
			    },
			    "type":"MODEL"
			},
			{
			    "name": "sk-model",
			    "endpoint":{
				"type":"REST"
			    },
			    "type":"MODEL"
			}   
		    ]
		}
	    }
	]
    }
}
		
		
================================================
FILE: k8s_serving/ambassador-auth-service-config.yaml
================================================
---
apiVersion: v1
kind: Service
metadata:
  name: example-auth
  annotations:
    getambassador.io/config: |
      
      ---
      apiVersion: ambassador/v0
      kind:  Module
      name:  authentication
      config:
        auth_service: "example-auth:3000"
        path_prefix: "/extauth"
spec:
  type: ClusterIP
  selector:
    app: example-auth
  ports:
  - port: 3000
    name: http-example-auth
    targetPort: http-api


================================================
FILE: k8s_serving/ambassador-auth-service-setup.yaml
================================================
---
apiVersion: v1
kind: Service
metadata:
  name: example-auth
spec:
  type: ClusterIP
  selector:
    app: example-auth
  ports:
  - port: 3000
    name: http-example-auth
    targetPort: http-api
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
  name: example-auth
spec:
  replicas: 1
  strategy:
    type: RollingUpdate
  template:
    metadata:
      labels:
        app: example-auth
    spec:
      containers:
      - name: example-auth
        image: seldonio/ambassador-auth-service:1.1.1
        imagePullPolicy: IfNotPresent
        ports:
        - name: http-api
          containerPort: 3000
        resources:
          limits:
            cpu: "0.1"
            memory: 100Mi


================================================
FILE: k8s_serving/epsilon_greedy.json
================================================
{
    "apiVersion": "machinelearning.seldon.io/v1alpha2",
    "kind": "SeldonDeployment",
    "metadata": {
	"labels": {
	    "app": "seldon"
	},
	"name": "mnist-classifier"
    },
    "spec": {
	"annotations": {
	    "project_name": "kubeflow-seldon",
	    "deployment_version": "v1"
	},
	"name": "mnist-classifier",
	"predictors": [
	    {
		"componentSpecs": [{
		    "spec": {
			"containers": [
			    {
                                "image": "seldonio/deepmnistclassifier_runtime:0.2",
				"name": "tf-model",
                                "volumeMounts": [
                                    {
                                        "mountPath": "/data",
                                        "name": "persistent-storage"
                                    }
                                ]
			    },
			    {
                                "image": "seldonio/skmnistclassifier_runtime:0.2",
				"name": "sk-model",
                                "volumeMounts": [
                                    {
                                        "mountPath": "/data",
                                        "name": "persistent-storage"
                                    }
                                ]
			    },
			    {
				"image": "seldonio/mab_epsilon_greedy:1.1",
				"name": "eg-router"
			    }
			],
                        "volumes": [
                            {
                                "name": "persistent-storage",
				"volumeSource" : {
                                    "persistentVolumeClaim": {
					"claimName": "nfs-1"
                                    }
				}
                            }
                        ]
		    }
		}],
		"name": "mnist-classifier",
		"replicas": 1,
		"annotations": {
		    "predictor_version": "v1"
		},
		"graph": {
		    "name": "eg-router",
		    "type":"ROUTER",
		    "parameters": [
			{
			    "name": "n_branches",
			    "value": "2",
			    "type": "INT"
			},
			{
			    "name": "epsilon",
			    "value": "0.1",
			    "type": "FLOAT"
			},
			{
			    "name": "verbose",
			    "value": "1",
			    "type": "BOOL"
			}
		    ],
		    "children": [
			{
			    "name": "sk-model",
			    "type": "MODEL",
			    "endpoint":{
				"type":"REST"
			    }
			},
			{
			    "name": "tf-model",
			    "type": "MODEL",
			    "endpoint":{
				"type":"REST"
			    }
			}
		    ]
		}
	    }
	]
    }
}


================================================
FILE: k8s_serving/epsilon_greedy_3way.json
================================================
{
    "apiVersion": "machinelearning.seldon.io/v1alpha2",
    "kind": "SeldonDeployment",
    "metadata": {
	"labels": {
	    "app": "seldon"
	},
	"name": "mnist-classifier"
    },
    "spec": {
	"annotations": {
	    "project_name": "kubeflow-seldon",
	    "deployment_version": "v1"
	},
	"name": "mnist-classifier",
	"predictors": [
	    {
		"componentSpecs": [{
		    "spec": {
			"containers": [
			    {
                                "image": "seldonio/deepmnistclassifier_runtime:0.2",
				"name": "tf-model",
                                "volumeMounts": [
                                    {
                                        "mountPath": "/data",
                                        "name": "persistent-storage"
                                    }
                                ]
			    },
			    {
                                "image": "seldonio/skmnistclassifier_runtime:0.2",
				"name": "sk-model",
                                "volumeMounts": [
                                    {
                                        "mountPath": "/data",
                                        "name": "persistent-storage"
                                    }
                                ]
			    },
			    {
                                "image": "seldonio/rmnistclassifier_runtime:0.2",
				"name": "r-model",
                                "volumeMounts": [
                                    {
                                        "mountPath": "/data",
                                        "name": "persistent-storage"
                                    }
                                ]
			    },
			    {
				"image": "seldonio/mab_epsilon_greedy:1.1",
				"name": "eg-router"
			    }
			],
                        "volumes": [
                            {
                                "name": "persistent-storage",
				"volumeSource" : {
                                    "persistentVolumeClaim": {
					"claimName": "nfs-1"
                                    }
				}
                            }
                        ]
		    }
		}],
		"name": "mnist-classifier",
		"replicas": 1,
		"annotations": {
		    "predictor_version": "v1"
		},
		"graph": {
		    "name": "eg-router",
		    "type":"ROUTER",
		    "parameters": [
			{
			    "name": "n_branches",
			    "value": "3",
			    "type": "INT"
			},
			{
			    "name": "epsilon",
			    "value": "0.2",
			    "type": "FLOAT"
			},
			{
			    "name": "verbose",
			    "value": "1",
			    "type": "BOOL"
			}
		    ],
		    "children": [
			{
			    "name": "sk-model",
			    "type": "MODEL",
			    "endpoint":{
				"type":"REST"
			    }
			},
			{
			    "name": "tf-model",
			    "type": "MODEL",
			    "endpoint":{
				"type":"REST"
			    }
			},
			{
			    "name": "r-model",
			    "type": "MODEL",
			    "endpoint":{
				"type":"REST"
			    }
			}
		    ]
		}
	    }
	]
    }
}


================================================
FILE: k8s_serving/serving_model.json
================================================
{
    "apiVersion": "machinelearning.seldon.io/v1alpha2",
    "kind": "SeldonDeployment",
    "metadata": {
        "labels": {
            "app": "seldon"
        },
        "name": "mnist-classifier"
    },
    "spec": {
        "annotations": {
            "deployment_version": "v1",
            "project_name": "MNIST Example",
	    "seldon.io/engine-separate-pod": "false",
	    "seldon.io/rest-connection-timeout": "100"
        },
        "name": "mnist-classifier",
        "predictors": [
            {
                "annotations": {
                    "predictor_version": "v1"
                },
                "componentSpecs": [{
                    "spec": {
                        "containers": [
                            {
                                "image": "seldonio/deepmnistclassifier_runtime:0.2",
                                "imagePullPolicy": "Always",
                                "name": "tf-model",
                                "volumeMounts": [
                                    {
                                        "mountPath": "/data",
                                        "name": "persistent-storage"
                                    }
                                ]
                            }
                        ],
                        "terminationGracePeriodSeconds": 1,
                        "volumes": [
                            {
                                "name": "persistent-storage",
				"volumeSource" : {
                                    "persistentVolumeClaim": {
					"claimName": "nfs-1"
                                    }
				}
                            }
                        ]
                     }
                }],
                "graph": {
                    "children": [],
                    "endpoint": {
                        "type": "REST"
                    },
                    "name": "tf-model",
                    "type": "MODEL"
                },
                "name": "mnist-classifier",
                "replicas": 1
            }
        ]
    }
}


================================================
FILE: k8s_serving/serving_r_model.json
================================================
{
    "apiVersion": "machinelearning.seldon.io/v1alpha2",
    "kind": "SeldonDeployment",
    "metadata": {
        "labels": {
            "app": "seldon"
        },
        "name": "mnist-classifier"
    },
    "spec": {
        "annotations": {
            "deployment_version": "v1",
            "project_name": "MNIST Example"
        },
        "name": "mnist-classifier",
        "predictors": [
            {
                "annotations": {
                    "predictor_version": "v1"
                },
                "componentSpecs": [{
                    "spec": {
                        "containers": [
                            {
                                "image": "seldonio/rmnistclassifier_runtime:0.2",
                                "imagePullPolicy": "Always",
                                "name": "r-model",
                                "volumeMounts": [
                                    {
                                        "mountPath": "/data",
                                        "name": "persistent-storage"
                                    }
                                ]
                            }
                        ],
                        "terminationGracePeriodSeconds": 1,
                        "volumes": [
                            {
                                "name": "persistent-storage",
				"volumeSource" : {
                                    "persistentVolumeClaim": {
					"claimName": "nfs-1"
                                    }
				}
                            }
                        ]
                     }
                }],
                "graph": {
                    "children": [],
                    "endpoint": {
                        "type": "REST"
                    },
                    "name": "r-model",
                    "type": "MODEL"
                },
                "name": "mnist-classifier",
                "replicas": 1
            }
        ]
    }
}


================================================
FILE: k8s_serving/serving_sk_model.json
================================================
{
    "apiVersion": "machinelearning.seldon.io/v1alpha2",
    "kind": "SeldonDeployment",
    "metadata": {
        "labels": {
            "app": "seldon"
        },
        "name": "mnist-classifier"
    },
    "spec": {
        "annotations": {
            "deployment_version": "v1",
            "project_name": "MNIST Example"
        },
        "name": "mnist-classifier",
        "predictors": [
            {
                "annotations": {
                    "predictor_version": "v1"
                },
                "componentSpecs": [{
                    "spec": {
                        "containers": [
                            {
                                "image": "seldonio/skmnistclassifier_runtime:0.2",
                                "imagePullPolicy": "Always",
                                "name": "sk-model",
                                "volumeMounts": [
                                    {
                                        "mountPath": "/data",
                                        "name": "persistent-storage"
                                    }
                                ]
                            }
                        ],
                        "terminationGracePeriodSeconds": 1,
                        "volumes": [
                            {
                                "name": "persistent-storage",
				"volumeSource" : {
                                    "persistentVolumeClaim": {
					"claimName": "nfs-1"
                                    }
				}
                            }
                        ]
                     }
                }],
                "graph": {
                    "children": [],
                    "endpoint": {
                        "type": "REST"
                    },
                    "name": "sk-model",
                    "type": "MODEL"
                },
                "name": "mnist-classifier",
                "replicas": 1
            }
        ]
    }
}


================================================
FILE: k8s_train/sklearn_training_job.yaml
================================================
apiVersion: "batch/v1"
kind: "Job"
metadata: 
  name: "sk-train"
  namespace: "default"
spec: 
  template: 
    metadata: 
      name: "sk-train"
    spec: 
      containers: 
        - 
          image: "seldonio/skmnistclassifier_trainer:0.1"
          name: "sk-train"
          volumeMounts: 
            - 
              mountPath: "/data"
              name: "persistent-storage"
      restartPolicy: "Never"
      volumes: 
        - 
          name: "persistent-storage"
          persistentVolumeClaim: 
            claimName: "ml-data"


================================================
FILE: k8s_train/tfJob.json
================================================
{
    "apiVersion": "kubeflow.org/v1alpha1",
    "kind": "TFJob",
    "metadata": {
        "name": "mnist-train",
        "namespace": "kubeflow-seldon"
    },
    "spec": {
        "replicaSpecs": [
            {
                "replicas": 1,
                "template": {
                    "spec": {
                        "containers": [
                            {
                                "image": "seldonio/deepmnistclassifier_trainer:0.1",
                                "name": "tensorflow",
                                "volumeMounts": [
                                    {
                                        "mountPath": "/data",
                                        "name": "persistent-storage"
                                    }
                                ]
                            }
                        ],
                        "restartPolicy": "OnFailure",
                        "volumes": [
                            {
                                "name": "persistent-storage",
                                "persistentVolumeClaim": {
                                    "claimName": "ml-data"
                                }
                            }
                        ]
                    }
                },
                "tfReplicaType": "MASTER"
            }
        ]
    }
}


================================================
FILE: models/r_mnist/runtime/Dockerfile
================================================
FROM rocker/r-apt:bionic

RUN apt-get update && \
    apt-get install -y -qq \
    	r-cran-plumber \
    	r-cran-jsonlite \
    	r-cran-optparse \
    	r-cran-stringr \
    	r-cran-urltools \
    	r-cran-caret \
    	r-cran-pls \
    	curl

ENV MODEL_NAME mnist.R
ENV API_TYPE REST
ENV SERVICE_TYPE MODEL
ENV PERSISTENCE 0

RUN mkdir microservice
COPY . /microservice
WORKDIR /microservice

RUN curl -OL https://raw.githubusercontent.com/SeldonIO/seldon-core/v0.2.7/wrappers/s2i/R/microservice.R > /microservice/microservice.R

EXPOSE 5000

CMD Rscript microservice.R --model $MODEL_NAME --api $API_TYPE --service $SERVICE_TYPE --persistence $PERSISTENCE

================================================
FILE: models/r_mnist/runtime/Makefile
================================================

seldon_build_image_local:
	docker build . -t seldonio/rmnistclassifier_runtime:0.2

seldon_push_docker_hub:
	docker push seldonio/rmnistclassifier_runtime:0.2


================================================
FILE: models/r_mnist/runtime/install.R
================================================
install.packages('pls')


================================================
FILE: models/r_mnist/runtime/mnist.R
================================================
library(methods)

predict.mnist <- function(mnist,newdata=list()) {
  cn <- 1:784
  for (i in seq_along(cn)){cn[i] <- paste("X",cn[i],sep = "")}
  colnames(newdata) <- cn
  predict(mnist$model, newdata = newdata, type='prob')
}

send_feedback.mnist <- function(mnist,request=list(),reward=1,truth=list()) {
}

new_mnist <- function(filename) {
  model <- readRDS(filename)
  structure(list(model=model), class = "mnist")
}

initialise_seldon <- function(params) {
  new_mnist("/data/model.Rds")
}

================================================
FILE: models/r_mnist/train/Dockerfile
================================================
FROM rocker/r-apt:bionic

RUN apt-get update && \
    apt-get install -y -qq \
    	r-cran-caret \
    	r-cran-pls \
    	r-cran-e1071

RUN R -e 'install.packages("doParallel")'

RUN mkdir training
COPY /train.R /training/train.R
COPY /get_data.sh /training/get_data.sh
COPY ./train.sh /training/train.sh

RUN cd /training && \
    ./get_data.sh

WORKDIR /training

CMD ["/training/train.sh"]

================================================
FILE: models/r_mnist/train/Makefile
================================================


build_model:
	docker build --force-rm=true -t seldonio/rmnistclassifier_trainer:0.1 .

push_image:
	docker push seldonio/rmnistclassifier_trainer:0.1 


================================================
FILE: models/r_mnist/train/get_data.sh
================================================
wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
wget http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
wget http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz


gunzip train-images-idx3-ubyte.gz
gunzip train-labels-idx1-ubyte.gz
gunzip t10k-images-idx3-ubyte.gz
gunzip t10k-labels-idx1-ubyte.gz


================================================
FILE: models/r_mnist/train/install.R
================================================
install.packages('caret')
install.packages('doParallel')
install.packages('pls')
install.packages('e1071')


================================================
FILE: models/r_mnist/train/train.R
================================================
library(caret)
library(doParallel)

# Enable parallel processing.
cl <- makeCluster(detectCores())
registerDoParallel(cl)

# Load the MNIST digit recognition dataset into R
# http://yann.lecun.com/exdb/mnist/
# assume you have all 4 files and gunzip'd them
# creates train$n, train$x, train$y  and test$n, test$x, test$y
# e.g. train$x is a 60000 x 784 matrix, each row is one digit (28x28)
# call:  show_digit(train$x[5,])   to see a digit.
# brendan o'connor - gist.github.com/39760 - anyall.org
load_mnist <- function() {
  load_image_file <- function(filename) {
    ret = list()
    f = file(filename,'rb')
    readBin(f,'integer',n=1,size=4,endian='big')
    ret$n = readBin(f,'integer',n=1,size=4,endian='big')
    nrow = readBin(f,'integer',n=1,size=4,endian='big')
    ncol = readBin(f,'integer',n=1,size=4,endian='big')
    x = readBin(f,'integer',n=ret$n*nrow*ncol,size=1,signed=F)
    ret$x = matrix(x, ncol=nrow*ncol, byrow=T)
    close(f)
    ret
  }
  load_label_file <- function(filename) {
    f = file(filename,'rb')
    readBin(f,'integer',n=1,size=4,endian='big')
    n = readBin(f,'integer',n=1,size=4,endian='big')
    y = readBin(f,'integer',n=n,size=1,signed=F)
    close(f)
    y
  }
  train <<- load_image_file('train-images-idx3-ubyte')
  test <<- load_image_file('t10k-images-idx3-ubyte')
  
  train$y <<- load_label_file('train-labels-idx1-ubyte')
  test$y <<- load_label_file('t10k-labels-idx1-ubyte')  
}

show_digit <- function(arr784, col=gray(12:1/12), ...) {
  image(matrix(arr784, nrow=28)[,28:1], col=col, ...)
}

train <- data.frame()
test <- data.frame()

# Load data.
load_mnist()

# Normalize: X = (X - min) / (max - min) => X = (X - 0) / (255 - 0) => X = X / 255.
train$x <- train$x / 255

# Setup training data with digit and pixel values with 60/40 split for train/cv.
inTrain = data.frame(y=train$y, train$x)
inTrain$y <- as.factor(inTrain$y)
trainIndex = createDataPartition(inTrain$y, p = 0.60,list=FALSE)
training = inTrain[trainIndex,]
cv = inTrain[-trainIndex,]

# SVM. 95/94.
#fit <- train(y ~ ., data = head(training, 1000), method = 'svmRadial', tuneGrid = data.frame(sigma=0.0107249, C=1))
fit <- train(y ~ ., data = head(training, 1000), method = 'pls')
results <- predict(fit, newdata = head(cv, 1000), type='prob')
#confusionMatrix(results, head(cv$y, 1000))
saveRDS(fit, file = "/data/model.Rds", compress = TRUE)


================================================
FILE: models/r_mnist/train/train.sh
================================================
#!/usr/bin/env bash

# exit when any command fails
set -e

until mountpoint -q /data; do
    echo "$(date) - waiting for /data to be mounted..."
    sleep 1
done       

ls -l /data

Rscript train.R

ls -l /data


================================================
FILE: models/sk_mnist/runtime/Dockerfile
================================================
FROM python:3.7-slim
COPY . /app
WORKDIR /app
RUN pip install -r requirements.txt
EXPOSE 5000

# Define environment variable
ENV MODEL_NAME SkMnist
ENV API_TYPE REST
ENV SERVICE_TYPE MODEL
ENV PERSISTENCE 0

CMD exec seldon-core-microservice $MODEL_NAME $API_TYPE --service-type $SERVICE_TYPE --persistence $PERSISTENCE


================================================
FILE: models/sk_mnist/runtime/Makefile
================================================

seldon_build_image_local:
	docker build . -t seldonio/skmnistclassifier_runtime:0.2

seldon_push_docker_hub:
	docker push seldonio/skmnistclassifier_runtime:0.2


================================================
FILE: models/sk_mnist/runtime/SkMnist.py
================================================
from sklearn.externals import joblib

class SkMnist(object):
    def __init__(self):
        self.class_names = ["class:{}".format(str(i)) for i in range(10)]
        self.clf = joblib.load('/data/sk.pkl') 

    def predict(self,X,feature_names):
        predictions = self.clf.predict_proba(X)
        return predictions

    
================================================
FILE: models/sk_mnist/runtime/contract.json
================================================
{
    "features":[
	{
	    "name":"x",
	    "dtype":"FLOAT",
	    "ftype":"continuous",
	    "range":[0,1],
	    "repeat":784
	}
    ],
    "targets":[
	{
	    "name":"class",
	    "dtype":"FLOAT",
	    "ftype":"continuous",
	    "range":[0,1],
	    "repeat":10
	}
    ]
}

    
================================================
FILE: models/sk_mnist/runtime/requirements.txt
================================================
scipy>= 0.13.3
scikit-learn>=0.18
seldon-core>=0.2.5

================================================
FILE: models/sk_mnist/train/Dockerfile
================================================
FROM python:3.7-slim

RUN apt-get update -y
RUN apt-get install -y python-pip python-dev build-essential

COPY /requirements.txt /tmp/
RUN cd /tmp && \
    pip install --no-cache-dir -r requirements.txt

RUN mkdir training
COPY ./create_model.py /training/create_model.py
COPY ./train.sh /training/train.sh
WORKDIR /training

CMD ["/training/train.sh"]


================================================
FILE: models/sk_mnist/train/Makefile
================================================


build_model:
	docker build --force-rm=true -t seldonio/skmnistclassifier_trainer:0.2 .

push_image:
	docker push seldonio/skmnistclassifier_trainer:0.2


================================================
FILE: models/sk_mnist/train/create_model.py
================================================
from sklearn.ensemble import RandomForestClassifier
from sklearn import datasets, metrics
from sklearn.utils import shuffle
from sklearn.datasets import fetch_mldata
from sklearn.externals import joblib
from six.moves import urllib

if __name__ == '__main__':
    try:
        mnist = fetch_mldata('MNIST original')
    except:
        print("Could not download MNIST data from mldata.org, trying alternative...")

        # Alternative method to load MNIST, if mldata.org is down
        from scipy.io import loadmat
        mnist_alternative_url = "https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat"
        mnist_path = "./mnist-original.mat"
        response = urllib.request.urlopen(mnist_alternative_url)
        with open(mnist_path, "wb") as f:
            content = response.read()
            f.write(content)
        mnist_raw = loadmat(mnist_path)
        mnist = {
            "data": mnist_raw["data"].T,
            "target": mnist_raw["label"][0],
            "COL_NAMES": ["label", "data"],
            "DESCR": "mldata.org dataset: mnist-original",
        }
        print("Success!")

    #mnist = fetch_mldata('MNIST original', data_home="./mnist_sklearn")
    # To apply a classifier on this data, we need to flatten the image, to
    # turn the data in a (samples, feature) matrix:
    n_samples = len(mnist['data'])
    data = mnist['data'].reshape((n_samples, -1))
    targets = mnist['target']

    data,targets = shuffle(data,targets)
    classifier = RandomForestClassifier(n_estimators=30)

    # We learn the digits on the first half of the digits
    classifier.fit(data[:n_samples // 2], targets[:n_samples // 2])

    # Now predict the value of the digit on the second half:
    expected = targets[n_samples // 2:]
    test_data = data[n_samples // 2:]

    print(classifier.score(test_data, expected))

    predicted = classifier.predict(data[n_samples // 2:])

    print("Classification report for classifier %s:\n%s\n"
          % (classifier, metrics.classification_report(expected, predicted)))
    print("Confusion matrix:\n%s" % metrics.confusion_matrix(expected, predicted))

    joblib.dump(classifier, '/data/sk.pkl') 


================================================
FILE: models/sk_mnist/train/requirements.txt
================================================
scipy
scikit-learn>=0.18
six


================================================
FILE: models/sk_mnist/train/train.sh
================================================
#!/usr/bin/env bash

# exit when any command fails
set -e

until mountpoint -q /data; do
    echo "$(date) - wainting for /data to be mounted..."
    sleep 1
done       

ls -l /data

python -u create_model.py

ls -l /data


================================================
FILE: models/tf_mnist/runtime/DeepMnist.py
================================================
import tensorflow as tf
import logging
logging.basicConfig(format='%(asctime)s.%(msecs)03d %(levelname)s {%(module)s} [%(funcName)s] %(message)s', datefmt='%Y-%m-%d,%H:%M:%S', level=logging.INFO)
logger = logging.getLogger(__name__)

class DeepMnist(object):
    def __init__(self):
        self.class_names = ["class:{}".format(str(i)) for i in range(10)]
        self.sess = tf.Session()
        saver = tf.train.import_meta_graph("/data/deep_mnist_model.meta")
        saver.restore(self.sess,tf.train.latest_checkpoint("/data/"))

        graph = tf.get_default_graph()
        self.x = graph.get_tensor_by_name("x:0")
        self.y = graph.get_tensor_by_name("y:0")

    def predict(self,X,feature_names):
        predictions = self.sess.run(self.y,feed_dict={self.x:X})
        return predictions

    
================================================
FILE: models/tf_mnist/runtime/Dockerfile
================================================
FROM python:3.7-slim
COPY . /app
WORKDIR /app
RUN pip install -r requirements.txt
EXPOSE 5000

# Define environment variable
ENV MODEL_NAME DeepMnist
ENV API_TYPE REST
ENV SERVICE_TYPE MODEL
ENV PERSISTENCE 0

CMD exec seldon-core-microservice $MODEL_NAME $API_TYPE --service-type $SERVICE_TYPE --persistence $PERSISTENCE

================================================
FILE: models/tf_mnist/runtime/Makefile
================================================

seldon_build_image_local:
	docker build . -t seldonio/deepmnistclassifier_runtime:0.2

seldon_push_docker_hub:
	docker push seldonio/deepmnistclassifier_runtime:0.2

================================================
FILE: models/tf_mnist/runtime/contract.json
================================================
{
    "features":[
	{
	    "name":"x",
	    "dtype":"FLOAT",
	    "ftype":"continuous",
	    "range":[0,1],
	    "repeat":784
	}
    ],
    "targets":[
	{
	    "name":"class",
	    "dtype":"FLOAT",
	    "ftype":"continuous",
	    "range":[0,1],
	    "repeat":10
	}
    ]
}

    
================================================
FILE: models/tf_mnist/runtime/requirements.txt
================================================
tensorflow==1.13.1
seldon-core>=0.2.5

================================================
FILE: models/tf_mnist/train/Dockerfile
================================================
FROM tensorflow/tensorflow:1.3.0

RUN mkdir training
COPY ./create_model.py /training/create_model.py
WORKDIR /training

CMD ["python","-u","create_model.py"]


================================================
FILE: models/tf_mnist/train/Makefile
================================================


build_model:
	docker build --force-rm=true -t seldonio/deepmnistclassifier_trainer:0.1 .

push_image:
	docker push seldonio/deepmnistclassifier_trainer:0.1 


================================================
FILE: models/tf_mnist/train/create_model.py
================================================
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot = True)
import tensorflow as tf

if __name__ == '__main__':
    
    x = tf.placeholder(tf.float32, [None,784], name="x")

    W = tf.Variable(tf.zeros([784,10]))
    b = tf.Variable(tf.zeros([10]))

    y = tf.nn.softmax(tf.matmul(x,W) + b, name="y")

    y_ = tf.placeholder(tf.float32, [None, 10])


    cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))

    train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

    init = tf.initialize_all_variables()

    sess = tf.Session()
    sess.run(init)

    for i in range(1000):
        batch_xs, batch_ys = mnist.train.next_batch(100)
        sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

    correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    print(sess.run(accuracy, feed_dict = {x: mnist.test.images, y_:mnist.test.labels}))

    saver = tf.train.Saver()

    saver.save(sess, "/data/deep_mnist_model")


================================================
FILE: nfs.md
================================================
# Example NFS Setup

The steps below are a consolidated set of steps following the guide [here](https://cloud.google.com/community/tutorials/gke-filestore-dynamic-provisioning).

Set the following variables

  * `FS` : the name of your filestore
  * `PROJECT` : Your Google Project
  * `ZONE` : Your GCP Zone

Create a Google Filestore and install the helm chart for nfs-client-provisioner to use it.
```
  PROJECT=seldon-demos
  FS=mnist-data
  ZONE=europe-west1-b    

  gcloud beta filestore instances create ${FS}     --project=${PROJECT}     --location=${ZONE}     --tier=STANDARD     --file-share=name="volumes",capacity=1TB     --network=name="default",reserved-ip-range="10.0.0.0/29"

  FSADDR=$(gcloud beta filestore instances describe ${FS} --project=${PROJECT} --location=${ZONE} --format="value(networks.ipAddresses[0])")

  helm install stable/nfs-client-provisioner --name nfs-cp --set nfs.server=${FSADDR} --set nfs.path=/volumes
  kubectl rollout status  deploy/nfs-cp-nfs-client-provisioner -n kubeflow
```

To create the NFS claim save the following and apply to your kubernetes cluster

```
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: nfs-1
spec:
  accessModes:
    - ReadWriteMany
  storageClassName: nfs-client
  resources:
    requests:
      storage: 30Gi
```


================================================
FILE: notebooks/Makefile
================================================
SHELL=/bin/bash

tensorflow/core/framework/tensor.proto:
	./create-protos.sh

.PHONY: create_protos
create_protos: tensorflow/core/framework/tensor.proto

.PHONY: clean
clean:
	@rm -rfv tensorflow


================================================
FILE: notebooks/__init__.py
================================================


================================================
FILE: notebooks/create-protos.sh
================================================
#!/bin/bash

release=${1:-"master"}

echo Downloading proto files for ${release}

base=https://raw.githubusercontent.com/tensorflow
tensorflow_base=${base}/tensorflow/${release}

base_folder=tensorflow/core/framework/
mkdir -p ${base_folder}

curl -s ${tensorflow_base}/tensorflow/core/framework/types.proto > ${base_folder}/types.proto
curl -s ${tensorflow_base}/tensorflow/core/framework/resource_handle.proto > ${base_folder}/resource_handle.proto
curl -s ${tensorflow_base}/tensorflow/core/framework/tensor_shape.proto > ${base_folder}/tensor_shape.proto
curl -s ${tensorflow_base}/tensorflow/core/framework/tensor.proto > ${base_folder}/tensor.proto


================================================
FILE: notebooks/proto/__init__.py
================================================


================================================
FILE: notebooks/proto/prediction.proto
================================================
syntax = "proto3";

import "google/protobuf/struct.proto";
import "tensorflow/core/framework/tensor.proto";

package seldon.protos;

option java_package = "io.seldon.protos";
option java_outer_classname = "PredictionProtos";
option go_package = "github.com/seldonio/seldon-core/examples/wrappers/go/pkg/api";

// [START Messages]

message SeldonMessage {

  Status status = 1;
  Meta meta = 2;
  oneof data_oneof {
    DefaultData data = 3;
    bytes binData = 4;
    string strData = 5;
  }
}

message DefaultData {
  repeated string names = 1;
  oneof data_oneof {
    Tensor tensor = 2;
    google.protobuf.ListValue ndarray = 3;
    tensorflow.TensorProto tftensor = 4;
  }
}

message Tensor {
  repeated int32 shape = 1 [packed=true];
  repeated double values = 2 [packed=true];
}

message Meta {
  string puid = 1; 
  map<string,google.protobuf.Value> tags = 2;
  map<string,int32> routing = 3;
  map<string,string> requestPath = 4;
  repeated Metric metrics = 5;
}

message Metric {
 enum MetricType {
     COUNTER = 0;
     GAUGE = 1;
     TIMER = 2;
 }
 string key = 1;
 MetricType type = 2;
 float value = 3;
 map<string,string> tags = 4;
}

message SeldonMessageList {
  repeated SeldonMessage seldonMessages = 1;
}

message Status {

    enum StatusFlag {
        SUCCESS = 0;
        FAILURE = 1;
    }

    int32 code = 1;
    string info = 2;
    string reason = 3;
    StatusFlag status = 4;
}

message Feedback {
  SeldonMessage request = 1;
  SeldonMessage response = 2;
  float reward = 3;
  SeldonMessage truth = 4;
}

message RequestResponse {
  SeldonMessage request = 1;
  SeldonMessage response = 2;
}

// [END Messages]


// [START Services]

service Generic {
  rpc TransformInput(SeldonMessage) returns (SeldonMessage) {};
  rpc TransformOutput(SeldonMessage) returns (SeldonMessage) {};
  rpc Route(SeldonMessage) returns (SeldonMessage) {};
  rpc Aggregate(SeldonMessageList) returns (SeldonMessage) {};
  rpc SendFeedback(Feedback) returns (SeldonMessage) {};
}

service Model {
  rpc Predict(SeldonMessage) returns (SeldonMessage) {};
  rpc SendFeedback(Feedback) returns (SeldonMessage) {};  
 }

service Router {
  rpc Route(SeldonMessage) returns (SeldonMessage) {};
  rpc SendFeedback(Feedback) returns (SeldonMessage) {};
 }

service Transformer {
  rpc TransformInput(SeldonMessage) returns (SeldonMessage) {};
}

service OutputTransformer {
  rpc TransformOutput(SeldonMessage) returns (SeldonMessage) {};
}

service Combiner {
  rpc Aggregate(SeldonMessageList) returns (SeldonMessage) {};
}


service Seldon {
  rpc Predict(SeldonMessage) returns (SeldonMessage) {};
  rpc SendFeedback(Feedback) returns (SeldonMessage) {};
 }

// [END Services]

================================================
FILE: notebooks/requirements.txt
================================================
matplotlib==3.0.3
grpcio==1.20.1
grpcio-tools==1.20.1
graphviz==0.10.1


================================================
FILE: notebooks/serving.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Deploying Various MNIST Models on Kubernetes \n",
    "\n",
    "Using:\n",
    "\n",
    " * kubeflow\n",
    " * seldon-core\n",
    " \n",
    " \n",
    "Follow the main README to setup kubeflow and seldon-core. This notebook will show various rolling deployments of the trained models\n",
    "\n",
    " * Single model\n",
    " * AB Test between 2 models\n",
    " * Multi-Armed Bandit over 3 models\n",
    " \n",
    "### Dependencies\n",
    " \n",
    "  * Tensorflow\n",
    "  * grpcio package\n",
    " "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Setup\n",
    "\n",
    "Set kubectl to use the namespace where you installed kubeflow and seldon. In the README it is kubeflow."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!kubectl config set-context $(kubectl config current-context) --namespace=kubeflow"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!make create_protos"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!python -m grpc.tools.protoc -I. --python_out=. --grpc_python_out=. ./proto/prediction.proto"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "import utils\n",
    "from visualizer import get_graph\n",
    "mnist = utils.download_mnist()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "\n",
    "\n",
    "**Ensure you have port forwarded the ambassador reverse proxy**\n",
    "\n",
    "```bash\n",
    "kubectl port-forward $(kubectl get pods -n kubeflow -l service=ambassador -o jsonpath='{.items[0].metadata.name}') -n kubeflow 8002:80\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Deploy Single Tensorflow Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "get_graph(\"../k8s_serving/serving_model.json\",'r')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pygmentize ../k8s_serving/serving_model.json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!kubectl apply -f ../k8s_serving/serving_model.json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!kubectl get seldondeployments mnist-classifier -o jsonpath='{.status}'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "utils.predict_rest_mnist(mnist)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "utils.predict_grpc_mnist(mnist)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Start load test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!kubectl label nodes $(kubectl get nodes -o jsonpath='{.items[0].metadata.name}') role=locust"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!helm install seldon-core-loadtesting --name loadtest  \\\n",
    "    --namespace kubeflow \\\n",
    "    --repo https://storage.googleapis.com/seldon-charts \\\n",
    "    --set locust.script=mnist_rest_locust.py \\\n",
    "    --set locust.host=http://mnist-classifier:8000 \\\n",
    "    --set oauth.enabled=false \\\n",
    "    --set oauth.key=oauth-key \\\n",
    "    --set oauth.secret=oauth-secret \\\n",
    "    --set locust.hatchRate=1 \\\n",
    "    --set locust.clients=1 \\\n",
    "    --set loadtest.sendFeedback=1 \\\n",
    "    --set locust.minWait=0 \\\n",
    "    --set locust.maxWait=0 \\\n",
    "    --set replicaCount=1 \\\n",
    "    --set data.size=784\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Rolling update to AB Test\n",
    " Run an AB Test between 2 models:\n",
    "  * Tensorflow neural network model\n",
    "  * Scikit-learn random forest.\n",
    " "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "get_graph(\"../k8s_serving/ab_test_sklearn_tensorflow.json\",'r')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pygmentize ../k8s_serving/ab_test_sklearn_tensorflow.json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!kubectl apply -f ../k8s_serving/ab_test_sklearn_tensorflow.json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!kubectl get seldondeployments mnist-classifier -o jsonpath='{.status}'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "utils.predict_rest_mnist(mnist)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "utils.evaluate_abtest(mnist,100)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Rolling Update to Multi-Armed Bandit\n",
    "Run a epsilon-greey multi-armed bandit over 3 models:\n",
    "  * Tensorflow neural network model\n",
    "  * Scikit-learn random forest model\n",
    "  * R least-squares model\n",
    "  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "get_graph(\"../k8s_serving/epsilon_greedy_3way.json\",'r')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pygmentize ../k8s_serving/epsilon_greedy_3way.json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!kubectl apply -f ../k8s_serving/epsilon_greedy_3way.json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!kubectl get seldondeployments mnist-classifier -o jsonpath='{.status}'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "utils.predict_rest_mnist(mnist)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "utils.evaluate_egreedy(mnist,100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}


================================================
FILE: notebooks/training.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Train Various Models on MNIST using kubeflow and seldon-core\n",
    "\n",
    "Using:\n",
    "\n",
    " * kubeflow\n",
    " * seldon-core\n",
    " \n",
    "The example will be the MNIST handwriiten digit classification task.\n",
    "\n",
    "![MNIST](mnist.png \"MNIST Digits\")\n",
    "\n",
    "### Dependencies\n",
    "\n",
    "  * Argo"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Setup\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!kubectl config set-context $(kubectl config current-context) --namespace=kubeflow"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Tensorflow Model\n",
    " A simple neural network in Tensorflow."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Training\n",
    " * Create image from source\n",
    " * Run training\n",
    " \n",
    "\n",
    "Run with:\n",
    "  * ``` -p build-push-image=true``` to build image and push to repo, needed extra params:\n",
    "    * ``` -p version=<version>``` create ```<version>``` of model\n",
    "    * ``` -p github-user=<github-user>``` to download example-seldon source from ```<github-user>``` account\n",
    "    * ``` -p github-revision=<revision>``` to use the github branch ```<revision>```\n",
    "    * ``` -p docker-org=<docker-org>``` to use Docker repo ```<docker-org>``` to push image to. Needs docker credentials in secret as described in README."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pygmentize ../workflows/training-tf-mnist-workflow.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!argo submit ../workflows/training-tf-mnist-workflow.yaml -p tfjob-version-hack=1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!argo list"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Runtime Image\n",
    "\n",
    "Run with:\n",
    "  * ``` -p build-push-image=true``` to build image and push to repo, needed extra params:\n",
    "    * ``` -p version=<version>``` create ```<version>``` of model\n",
    "    * ``` -p github-user=<github-user>``` to download example-seldon source from ```<github-user>``` account\n",
    "    * ``` -p github-revision=<revision>``` to use the github branch ```<revision>```\n",
    "    * ``` -p docker-org=<docker-org>``` to use Docker user ```<docker-org>``` to push image to. Needs docker credentials in secret as described in README.\n",
    "  * ``` -p deploy-model=true``` to deploy model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pygmentize ../workflows/serving-tf-mnist-workflow.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!argo submit ../workflows/serving-tf-mnist-workflow.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!argo list"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Sklearn Model\n",
    "A Random forest in sklearn."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Training\n",
    "\n",
    " * For options see above Tensorflow example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pygmentize ../workflows/training-sk-mnist-workflow.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!argo submit ../workflows/training-sk-mnist-workflow.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!argo list"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Runtime Image\n",
    " * For options see above Tensorflow example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pygmentize ../workflows/serving-sk-mnist-workflow.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!argo submit ../workflows/serving-sk-mnist-workflow.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!argo list"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# R Model\n",
    "A partial least squares model in R."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Training\n",
    "\n",
    " * For options see above Tensorflow example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pygmentize ../workflows/training-r-mnist-workflow.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!argo submit ../workflows/training-r-mnist-workflow.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!argo list"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Runtime Image\n",
    " * For options see above Tensorflow example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pygmentize ../workflows/serving-r-mnist-workflow.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!argo submit ../workflows/serving-r-mnist-workflow.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!argo list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}


================================================
FILE: notebooks/utils.py
================================================
import requests
from requests.auth import HTTPBasicAuth
from random import randint,random
from proto import prediction_pb2
from proto import prediction_pb2_grpc
import grpc
import json
from visualizer import get_graph
from matplotlib import pyplot as plt
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
from google.protobuf.json_format import MessageToJson

AMBASSADOR_API_IP="localhost:8002"

def rest_request(deploymentName,request):
    response = requests.post(
                "http://"+AMBASSADOR_API_IP+"/seldon/"+deploymentName+"/api/v0.1/predictions",
                json=request)
    j = response.json()
    return j
    
def rest_request_auth(deploymentName,data,username,password):
    payload = {"data":{"ndarray":data.tolist()}}
    response = requests.post(
                "http://"+AMBASSADOR_API_IP+"/seldon/"+deploymentName+"/api/v0.1/predictions",
                json=payload,
                auth=HTTPBasicAuth(username, password))
    print(response.status_code)
    return response.json()   

def grpc_request(deploymentName,data):
    datadef = prediction_pb2.DefaultData(
            names = ["a","b"],
            tensor = prediction_pb2.Tensor(
                shape = [1,784],
                values = data
                )
            )
    request = prediction_pb2.SeldonMessage(data = datadef)
    channel = grpc.insecure_channel(AMBASSADOR_API_IP)
    stub = prediction_pb2_grpc.SeldonStub(channel)
    metadata = [('seldon',deploymentName)]
    response = stub.Predict(request=request,metadata=metadata)
    return response

def send_feedback_rest(deploymentName,request,response,reward):
    feedback = {
        "request": request,
        "response": response,
        "reward": reward
    }
    ret = requests.post(
         "http://"+AMBASSADOR_API_IP+"/seldon/"+deploymentName+"/api/v0.1/feedback",
        json=feedback)
    return ret.text


def gen_image(arr):
    two_d = (np.reshape(arr, (28, 28)) * 255).astype(np.uint8)
    plt.imshow(two_d,cmap=plt.cm.gray_r, interpolation='nearest')
    return plt

def download_mnist():
    return input_data.read_data_sets("MNIST_data/", one_hot = True)


def predict_rest_mnist(mnist):
    batch_xs, batch_ys = mnist.train.next_batch(1)
    chosen=0
    gen_image(batch_xs[chosen]).show()
    data = batch_xs[chosen].reshape((1,784))
    features = ["X"+str(i+1) for i in range (0,784)]
    request = {"data":{"names":features,"ndarray":data.tolist()}}
    predictions = rest_request("mnist-classifier",request)
    print(json.dumps(predictions,indent=2))
    #print("Route:"+json.dumps(predictions["meta"]["routing"],indent=2))
    fpreds = [ '%.2f' % elem for elem in predictions["data"]["ndarray"][0] ]
    m = dict(zip(predictions["data"]["names"],fpreds))
    print("Returned probabilities")
    print(json.dumps(m,indent=2))


def predict_grpc_mnist(mnist):
    batch_xs, batch_ys = mnist.train.next_batch(1)
    chosen=0
    gen_image(batch_xs[chosen]).show()
    data = batch_xs[chosen].reshape((784))
    resp = grpc_request("mnist-classifier",data)
    predictions = MessageToJson(resp)
    predictions = json.loads(predictions)
    print(json.dumps(predictions,indent=2))    
    fpreds = [ '%.2f' % elem for elem in predictions["data"]["tensor"]["values"] ]
    m = dict(zip(predictions["data"]["names"],fpreds))
    print("Returned probabilities")    
    print(json.dumps(m,indent=2))

def evaluate_abtest(mnist,sz=100):
    batch_xs, batch_ys = mnist.train.next_batch(sz)
    routes_history = []
    for idx in range(sz):
        if idx % 10 == 0:
            print("{}/{}".format(idx,sz))
        data = batch_xs[idx].reshape((1,784))
        request = {"data":{"ndarray":data.tolist()}}
        response = rest_request("mnist-classifier",request)
        route = response.get("meta").get("routing").get("random-ab-test")
        routes_history.append(route)

    plt.figure(figsize=(15,6))
    ax = plt.scatter(range(len(routes_history)),routes_history)
    ax.axes.xaxis.set_label_text("Incoming Requests over Time")
    ax.axes.yaxis.set_label_text("Selected Branch")
    plt.yticks([0,1,2])
    _ = plt.title("Branch Chosen for Incoming Requests")


def evaluate_egreedy(mnist,sz=100):
    score = [0.0,0.0,0.0]
    sz = 100
    batch_xs, batch_ys = mnist.train.next_batch(sz)
    routes_history = []
    for idx in range(sz):
        if idx % 10 == 0:
            print("{}/{}".format(idx,sz))
        data = batch_xs[idx].reshape((1,784))
        request = {"data":{"ndarray":data.tolist()}}
        response = rest_request("mnist-classifier",request)
        route = response.get("meta").get("routing").get("eg-router")
        proba = response["data"]["ndarray"][0]
        predicted = proba.index(max(proba))
        correct = np.argmax(batch_ys[idx])
        if predicted == correct:
            score[route] = score[route] + 1
            send_feedback_rest("mnist-classifier",request,response,reward=1)
        else:
            send_feedback_rest("mnist-classifier",request,response,reward=0)
        routes_history.append(route)

    plt.figure(figsize=(15,6))
    ax = plt.scatter(range(len(routes_history)),routes_history)
    ax.axes.xaxis.set_label_text("Incoming Requests over Time")
    ax.axes.yaxis.set_label_text("Selected Branch")
    plt.yticks([0,1,2])
    _ = plt.title("Branch Chosen for Incoming Requests")
    print(score)    

    
================================================
FILE: notebooks/visualizer.py
================================================
import graphviz
import json

def _populate_graph(dot, root, suffix=''):
    name = root.get("name")
    id = name+suffix
    if root.get("implementation"):
        dot.node(id, label=name, shape="box", style="filled", color="lightgrey")
    else:
        dot.node(id, label=name, shape="box")
    endpoint_type = root.get("endpoint",{}).get("type")
    if endpoint_type is not None:
        dot.node(id+'endpoint', label=endpoint_type)
        dot.edge(id,id+'endpoint')
    for child in root.get("children",[]):
        child_id = _populate_graph(dot,child)
        dot.edge(id, child_id)
    return id

def get_graph(filename,predictor=0):
    deployment = json.load(open(filename,'r'))
    predictors = deployment.get("spec").get("predictors")
    dot = graphviz.Digraph()
    
    with dot.subgraph(name="cluster_0") as pdot:
        graph = predictors[0].get("graph")
        _populate_graph(pdot, graph, suffix='0')
        pdot.attr(label="predictor")
        
    if len(predictors)>1:
        with dot.subgraph(name="cluster_1") as cdot:
            graph = predictors[1].get("graph")
            _populate_graph(cdot, graph, suffix='1')
            cdot.attr(label="canary")
        
    return dot


================================================
FILE: scripts/README.md
================================================
# Create MNIST Demo

 1. You will need all prerequisites (gcloud, kubectl, ks) in your path.
 1. Copy `env-example.sh` to `env.sh` and edit with your own settings
 1. run `create_demo.sh`
 
# Delete Demo

 1. run `delete-demo.sh` - this will delete the GCP resources except the Filestore disk. You will need to delete this manually at present.


================================================
FILE: scripts/create_demo.sh
================================================
#!/usr/bin/env bash

set -o nounset
set -o errexit
set -o pipefail

create_src() {
    mkdir -p ${KUBEFLOW_SRC}
    cd ${KUBEFLOW_SRC}
    curl https://raw.githubusercontent.com/kubeflow/kubeflow/${KUBEFLOW_TAG}/scripts/download.sh | bash
}


launch_kubeflow() {
    
    KUBEFLOW_REPO=${KUBEFLOW_SRC} ${KUBEFLOW_SRC}/scripts/kfctl.sh init ${KFAPP} --platform gcp --project ${PROJECT}
    
    cd ${KFAPP}
    ${KUBEFLOW_SRC}/scripts/kfctl.sh generate platform
    ${KUBEFLOW_SRC}/scripts/kfctl.sh apply platform
    ${KUBEFLOW_SRC}/scripts/kfctl.sh generate k8s
    ${KUBEFLOW_SRC}/scripts/kfctl.sh apply k8s

}

launch_seldon() {
    cd ${KUBEFLOW_SRC}/${KFAPP}/ks_app

    ks pkg install kubeflow/seldon
    ks generate seldon seldon
    ks apply default -c seldon
}

add_helm() {
    kubectl -n kube-system create sa tiller
    kubectl create clusterrolebinding tiller --clusterrole cluster-admin --serviceaccount=kube-system:tiller
    helm init --service-account tiller
    kubectl rollout status deploy/tiller-deploy -n kube-system
}

add_nfs_disk() {

    set +e
    FSADDR=$(gcloud beta filestore instances describe ${FS} --project=${PROJECT} --location=${ZONE} --format="value(networks.ipAddresses[0])")
    if [ -z "$FSADDR" ]; then
	echo "Creating filestore NFS volume"
	gcloud beta filestore instances create ${FS}     --project=${PROJECT}     --location=${ZONE}     --tier=STANDARD     --file-share=name="volumes",capacity=1TB     --network=name="default",reserved-ip-range="10.0.0.0/29"
    fi
    set -e

    FSADDR=$(gcloud beta filestore instances describe ${FS} --project=${PROJECT} --location=${ZONE} --format="value(networks.ipAddresses[0])")

    helm install stable/nfs-client-provisioner --name nfs-cp --set nfs.server=${FSADDR} --set nfs.path=/volumes
    kubectl rollout status  deploy/nfs-cp-nfs-client-provisioner -n kubeflow

    kubectl apply -f ${STARTUP_DIR}/nfs-pvc.yaml -n kubeflow
}

add_argo_clusterrole() {
    kubectl create clusterrolebinding my-cluster-admin-binding --clusterrole=cluster-admin --user=$(gcloud info --format="value(config.account)")
    kubectl create clusterrolebinding default-admin2 --clusterrole=cluster-admin --serviceaccount=kubeflow:default

}

add_seldon_analytics() {
    helm install seldon-core-analytics --name seldon-core-analytics --set grafana_prom_admin_password=password --set persistence.enabled=false --repo https://storage.googleapis.com/seldon-charts --namespace kubeflow
}

if [ ! -f env.sh ]; then
    echo "Create env.sh by copying env-example.sh"
fi
source env.sh
create_src
launch_kubeflow
launch_seldon
add_helm
add_nfs_disk
add_argo_clusterrole
add_seldon_analytics


================================================
FILE: scripts/delete-demo.sh
================================================
#!/usr/bin/env bash

set -o nounset
set -o errexit
set -o pipefail


if [ ! -f env.sh ]; then
    echo "Create env.sh by copying env-example.sh"
fi
source env.sh

cd ${KUBEFLOW_SRC}/${KFAPP}
${KUBEFLOW_SRC}/scripts/kfctl.sh  delete all


================================================
FILE: scripts/env-example.sh
================================================
STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )"
KFAPP=my-kubeflow    
PROJECT=seldon-demos
KUBEFLOW_SRC=${STARTUP_DIR}/kubeflow_src
FS=mnist-data
ZONE=europe-west1-b
# Next two lines are set from values created as discussed in https://www.kubeflow.org/docs/started/getting-started-gke/
export CLIENT_ID=<your-client-id>
export CLIENT_SECRET=<your-secret>
export KUBEFLOW_TAG=v0.5.1


================================================
FILE: scripts/nfs-pvc.yaml
================================================
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: nfs-1
spec:
  accessModes:
    - ReadWriteMany
  storageClassName: nfs-client
  resources:
    requests:
      storage: 30Gi


================================================
FILE: scripts/port-forwards.sh
================================================

#Argo
kubectl port-forward $(kubectl get pods -n kubeflow -l app=argo-ui -o jsonpath='{.items[0].metadata.name}') -n kubeflow 8001:8001 &

#Seldon Grafana
kubectl port-forward $(kubectl get pods -n kubeflow -l app=grafana-prom-server -o jsonpath='{.items[0].metadata.name}') -n kubeflow 3000:3000 &

#Ambassador reverse proxy
kubectl port-forward $(kubectl get pods -n kubeflow -l service=ambassador -o jsonpath='{.items[0].metadata.name}') -n kubeflow 8002:80 &

#Ambassador admin
kubectl port-forward $(kubectl get pods -n kubeflow -l service=ambassador -o jsonpath='{.items[0].metadata.name}') -n kubeflow 8877:8877 &


================================================
FILE: scripts/watch-mnist.sh
================================================
watch kubectl get pods -l seldon-app=mnist-classifier


================================================
FILE: workflows/serving-r-mnist-workflow.yaml
================================================
# This example demonstrates the use of a git repo as a hard-wired
# input artifact. The argo repo is cloned to its target destination
# at '/src' for the main container to consume.
apiVersion: argoproj.io/v1alpha1
kind: Workflow
metadata:
  generateName: seldon-r-deploy-
spec:
  entrypoint: workflow
  arguments:
    parameters:
    - name: version
      value: 0.1
    - name: github-user
      value: kubeflow
    - name: github-revision
      value: master
    - name: docker-org
      value: index.docker.io/seldonio
    - name: build-push-image
      value: false
    - name: deploy-model
      value: false
  volumes:
  - name: docker-config
    secret:
      secretName: docker-config     # name of an existing k8s secret
  volumeClaimTemplates:
  - metadata:
      name: workspace
    spec:
      accessModes: [ "ReadWriteOnce" ]
      resources:
        requests:
          storage: 0.5Gi
  templates:
  - name: workflow
    steps:
    - - name: get-source
        template: get-source-code
    - - name: build-push
        template: build-and-push
        when: "{{workflow.parameters.build-push-image}} == true"
    - - name: serve
        template: seldon
        when: "{{workflow.parameters.deploy-model}} == true"
  - name: get-source-code
    inputs:
      artifacts:
      - name: argo-source
        path: /src/example-seldon
        git:
          repo: https://github.com/{{workflow.parameters.github-user}}/example-seldon.git
          revision: "{{workflow.parameters.github-revision}}"
    container:
      image: alpine:latest
      command: [sh, -c]
      args: ["cp /src/example-seldon/models/r_mnist/runtime/* /workspace/; ls /workspace/"]
      volumeMounts:
      - name: workspace
        mountPath: /workspace
  - name: build-and-push
    container:
      image: gcr.io/kaniko-project/executor:latest
      args: ["--dockerfile","Dockerfile","--destination","{{workflow.parameters.docker-org}}/rmnistclassifier_runtime:{{workflow.parameters.version}}"]
      workingDir: /src/example-seldon/models/r_mnist/runtime/
      volumeMounts:
      - name: docker-config
        mountPath: "/root/.docker/"
      - name: workspace
        mountPath: /workspace
  - name: seldon
    resource:                   #indicates that this is a resource template
      action: apply             #can be any kubectl action (e.g. create, delete, apply, patch)
      #successCondition: ?
      manifest: |   #put your kubernetes spec here
       apiVersion: "machinelearning.seldon.io/v1alpha2"
       kind: "SeldonDeployment"
       metadata:
         labels:
           app: "seldon"
         name: "mnist-classifier"
       spec:
         annotations:
           deployment_version: "v1"
           project_name: "MNIST Example"
         name: "mnist-classifier"
         predictors:
           -
             annotations:
               predictor_version: "v1"
             componentSpecs:
               -
                 spec:
                   containers:
                     -
                       image: "{{workflow.parameters.docker-org}}/rmnistclassifier_runtime:{{workflow.parameters.version}}"
                       imagePullPolicy: "Always"
                       name: "mnist-classifier"
                       volumeMounts:
                         -
                           mountPath: "/data"
                           name: "persistent-storage"
                   terminationGracePeriodSeconds: 1
                   volumes:
                     -
                       name: "persistent-storage"
                       volumeSource:
                         persistentVolumeClaim:
                           claimName: "nfs-1"
             graph:
               children: []
               endpoint:
                 type: "REST"
               name: "mnist-classifier"
               type: "MODEL"
             name: "mnist-classifier"
             replicas: 1


================================================
FILE: workflows/serving-sk-mnist-workflow.yaml
================================================
# This example demonstrates the use of a git repo as a hard-wired
# input artifact. The argo repo is cloned to its target destination
# at '/src' for the main container to consume.
apiVersion: argoproj.io/v1alpha1
kind: Workflow
metadata:
  generateName: seldon-sk-deploy-
spec:
  entrypoint: workflow
  arguments:
    parameters:
    - name: version
      value: 0.1
    - name: github-user
      value: kubeflow
    - name: github-revision
      value: master
    - name: docker-org
      value: index.docker.io/seldonio
    - name: build-push-image
      value: false
    - name: deploy-model
      value: false
  volumes:
  - name: docker-config
    secret:
      secretName: docker-config     # name of an existing k8s secret
  volumeClaimTemplates:
  - metadata:
      name: workspace
    spec:
      accessModes: [ "ReadWriteOnce" ]
      resources:
        requests:
          storage: 0.5Gi
  templates:
  - name: workflow
    steps:
    - - name: get-source
        template: get-source-code
    - - name: build-push
        template: build-and-push
        when: "{{workflow.parameters.build-push-image}} == true"
    - - name: serve
        template: seldon
        when: "{{workflow.parameters.deploy-model}} == true"
  - name: get-source-code
    inputs:
      artifacts:
      - name: argo-source
        path: /src/example-seldon
        git:
          repo: https://github.com/{{workflow.parameters.github-user}}/example-seldon.git
          revision: "{{workflow.parameters.github-revision}}"
    container:
      image: alpine:latest
      command: [sh, -c]
      args: ["cp /src/example-seldon/models/sk_mnist/runtime/* /workspace/; ls /workspace/"]
      volumeMounts:
      - name: workspace
        mountPath: /workspace
  - name: build-and-push
    container:
      image: gcr.io/kaniko-project/executor:latest
      args: ["--dockerfile","Dockerfile","--destination","{{workflow.parameters.docker-org}}/skmnistclassifier_runtime:{{workflow.parameters.version}}"]
      workingDir: /src/example-seldon/models/sk_mnist/runtime/
      volumeMounts:
      - name: docker-config
        mountPath: "/root/.docker/"
      - name: workspace
        mountPath: /workspace
  - name: seldon
    resource:                   #indicates that this is a resource template
      action: apply             #can be any kubectl action (e.g. create, delete, apply, patch)
      #successCondition: ?
      manifest: |   #put your kubernetes spec here
       apiVersion: "machinelearning.seldon.io/v1alpha2"
       kind: "SeldonDeployment"
       metadata:
         labels:
           app: "seldon"
         name: "mnist-classifier"
       spec:
         annotations:
           deployment_version: "v1"
           project_name: "MNIST Example"
         name: "mnist-classifier"
         predictors:
           -
             annotations:
               predictor_version: "v1"
             componentSpecs:
               -
                 spec:
                   containers:
                     -
                       image: "{{workflow.parameters.docker-org}}/skmnistclassifier_runtime:{{workflow.parameters.version}}"
                       imagePullPolicy: "Always"
                       name: "mnist-classifier"
                       volumeMounts:
                         -
                           mountPath: "/data"
                           name: "persistent-storage"
                   terminationGracePeriodSeconds: 1
                   volumes:
                     -
                       name: "persistent-storage"
                       volumeSource:
                         persistentVolumeClaim:
                           claimName: "nfs-1"
             graph:
               children: []
               endpoint:
                 type: "REST"
               name: "mnist-classifier"
               type: "MODEL"
             name: "mnist-classifier"
             replicas: 1


================================================
FILE: workflows/serving-tf-mnist-workflow.md
================================================
# Example Argo Workflow to dockerize runtime model and deploy it for serving

Comments on the [serving-tf-mnist-workflow.yaml](serving-tf-mnist-workflow.yaml)

## Workflow Summary

To serve our runtime model we create:

 * [```models/tf_mnist/runtime/Dockerfile```](../models/tf_mnist/runtime/Dockerfile) to wrap model using the seldon-core python wrapper.
 * An Argo workflow to:
    * Wrap the runtime model, builds a docker container for it and optionally push it to your repo
    * Optionally starts a seldon deployment that will run and expose your model


## Workflow parameters

 * version
   * The version tag for the Docker image
 * github-user
   * The github user to use to clone this repo/fork
 * github-revision
   * The github revision to use for cloning the repo (can be a branch name)
 * docker-org
   * The Docker host and org/user/project to use when pushing an image to the registry
 * build-push-image
   * Whether to build and push the image to docker registry (true/false)
 * deploy-model
   * Whether to start a seldon deployment to run and expose your model (true/false)


================================================
FILE: workflows/serving-tf-mnist-workflow.yaml
================================================
apiVersion: argoproj.io/v1alpha1
kind: Workflow
metadata:
  generateName: seldon-tf-deploy-
spec:
  entrypoint: workflow
  arguments:
    parameters:
    - name: version
      value: 0.1
    - name: github-user
      value: kubeflow
    - name: github-revision
      value: master
    - name: docker-org
      value: index.docker.io/seldonio
    - name: build-push-image
      value: false
    - name: deploy-model
      value: false
  volumes:
  - name: docker-config
    secret:
      secretName: docker-config     # name of an existing k8s secret
  volumeClaimTemplates:
  - metadata:
      name: workspace
    spec:
      accessModes: [ "ReadWriteOnce" ]
      resources:
        requests:
          storage: 0.5Gi
  templates:
  - name: workflow
    steps:
    - - name: get-source
        template: get-source-code
    - - name: build-push
        template: build-and-push
        when: "{{workflow.parameters.build-push-image}} == true"
    - - name: serve
        template: seldon
        when: "{{workflow.parameters.deploy-model}} == true"
  - name: get-source-code
    inputs:
      artifacts:
      - name: argo-source
        path: /src/example-seldon
        git:
          repo: https://github.com/{{workflow.parameters.github-user}}/example-seldon.git
          revision: "{{workflow.parameters.github-revision}}"
    container:
      image: alpine:latest
      command: [sh, -c]
      args: ["cp /src/example-seldon/models/tf_mnist/runtime/* /workspace/; ls /workspace/"]
      volumeMounts:
      - name: workspace
        mountPath: /workspace
  - name: build-and-push
    container:
      image: gcr.io/kaniko-project/executor:latest
      args: ["--dockerfile","Dockerfile","--destination","{{workflow.parameters.docker-org}}/deepmnistclassifier_runtime:{{workflow.parameters.version}}"]
      workingDir: /src/example-seldon/models/tf_mnist/runtime/
      volumeMounts:
      - name: docker-config
        mountPath: "/root/.docker/"
      - name: workspace
        mountPath: /workspace
  - name: seldon
    resource:                   #indicates that this is a resource template
      action: apply             #can be any kubectl action (e.g. create, delete, apply, patch)
      #successCondition: ?
      manifest: |   #put your kubernetes spec here
       apiVersion: "machinelearning.seldon.io/v1alpha2"
       kind: "SeldonDeployment"
       metadata:
         labels:
           app: "seldon"
         name: "mnist-classifier"
       spec:
         annotations:
           deployment_version: "v1"
           project_name: "MNIST Example"
         name: "mnist-classifier"
         predictors:
           -
             annotations:
               predictor_version: "v1"
             componentSpecs:
               -
                 spec:
                   containers:
                     -
                       image: "{{workflow.parameters.docker-org}}/deepmnistclassifier_runtime:{{workflow.parameters.version}}"
                       imagePullPolicy: "Always"
                       name: "mnist-classifier"
                       volumeMounts:
                         -
                           mountPath: "/data"
                           name: "persistent-storage"
                   terminationGracePeriodSeconds: 1
                   volumes:
                     -
                       name: "persistent-storage"
                       volumeSource:
                         persistentVolumeClaim:
                           claimName: "nfs-1"
             graph:
               children: []
               endpoint:
                 type: "REST"
               name: "mnist-classifier"
               type: "MODEL"
             name: "mnist-classifier"
             replicas: 1


================================================
FILE: workflows/training-r-mnist-workflow.yaml
================================================
apiVersion: argoproj.io/v1alpha1
kind: Workflow
metadata:
  generateName: kubeflow-r-train-
spec:
  entrypoint: workflow
  arguments:
    parameters:
    - name: version
      value: 0.1
    - name: github-user
      value: kubeflow
    - name: github-revision
      value: master
    - name: docker-org
      value: seldonio
    - name: build-push-image
      value: false
  volumes:
  - name: docker-config
    secret:
      secretName: docker-config     # name of an existing k8s secret
  volumeClaimTemplates:
  - metadata:
      name: workspace
    spec:
      accessModes: [ "ReadWriteOnce" ]
      resources:
        requests:
          storage: 0.5Gi
  templates:
  - name: workflow
    steps:
    - - name: get-source
        template: get-source-code
    - - name: build-push
        template: build-and-push
        when: "{{workflow.parameters.build-push-image}} == true"
    - - name: train
        template: tfjob
  - name: get-source-code
    inputs:
      artifacts:
      - name: argo-source
        path: /src/example-seldon
        git:
          repo: https://github.com/{{workflow.parameters.github-user}}/example-seldon.git
          revision: "{{workflow.parameters.github-revision}}"
    container:
      image: alpine:latest
      command: [sh, -c]
      args: ["cp /src/example-seldon/models/r_mnist/train/* /workspace/; ls /workspace/"]
      volumeMounts:
      - name: workspace
        mountPath: /workspace
  - name: build-and-push
    container:
      image: gcr.io/kaniko-project/executor:latest
      args: ["--dockerfile","Dockerfile","--destination","{{workflow.parameters.docker-org}}/rmnistclassifier_trainer:{{workflow.parameters.version}}"]
      workingDir: /src/example-seldon/models/r_mnist/train/
      volumeMounts:
      - name: docker-config
        mountPath: "/root/.docker/"
      - name: workspace
        mountPath: /workspace
  - name: tfjob
    resource:                   #indicates that this is a resource template
      action: create             #can be any kubectl action (e.g. create, delete, apply, patch)
      successCondition: status.succeeded == 1
      manifest: |   #put your kubernetes spec here
       apiVersion: "batch/v1"
       kind: "Job"
       metadata:
         name: "r-train"
         ownerReferences:
         - apiVersion: argoproj.io/v1alpha1
           kind: Workflow
           controller: true
           name: {{workflow.name}}
           uid: {{workflow.uid}}
       spec:
         template:
           metadata:
             name: "r-train"
           spec:
             containers:
               -
                 image: "{{workflow.parameters.docker-org}}/rmnistclassifier_trainer:{{workflow.parameters.version}}"
                 name: "r-train"
                 volumeMounts:
                   -
                     mountPath: "/data"
                     name: "persistent-storage"
             restartPolicy: "Never"
             volumes:
               -
                 name: "persistent-storage"
                 persistentVolumeClaim:
                   claimName: "nfs-1"


================================================
FILE: workflows/training-sk-mnist-workflow.yaml
================================================
apiVersion: argoproj.io/v1alpha1
kind: Workflow
metadata:
  generateName: kubeflow-sk-train-
spec:
  entrypoint: workflow
  arguments:
    parameters:
    - name: version
      value: 0.2
    - name: github-user
      value: kubeflow
    - name: github-revision
      value: master
    - name: docker-org
      value: index.docker.io/seldonio
    - name: build-push-image
      value: false
  volumes:
  - name: docker-config
    secret:
      secretName: docker-config     # name of an existing k8s secret
  volumeClaimTemplates:
  - metadata:
      name: workspace
    spec:
      accessModes: [ "ReadWriteOnce" ]
      resources:
        requests:
          storage: 0.5Gi
  templates:
  - name: workflow
    steps:
    - - name: get-source
        template: get-source-code
    - - name: build-push
        template: build-and-push
        when: "{{workflow.parameters.build-push-image}} == true"
    - - name: train
        template: tfjob
  - name: get-source-code
    inputs:
      artifacts:
      - name: argo-source
        path: /src/example-seldon
        git:
          repo: https://github.com/{{workflow.parameters.github-user}}/example-seldon.git
          revision: "{{workflow.parameters.github-revision}}"
    container:
      image: alpine:latest
      command: [sh, -c]
      args: ["cp /src/example-seldon/models/sk_mnist/train/* /workspace/; ls /workspace/"]
      volumeMounts:
      - name: workspace
        mountPath: /workspace
  - name: build-and-push
    container:
      image: gcr.io/kaniko-project/executor:latest
      args: ["--dockerfile","Dockerfile","--destination","{{workflow.parameters.docker-org}}/skmnistclassifier_trainer:{{workflow.parameters.version}}"]
      workingDir: /src/example-seldon/models/sk_mnist/train/
      volumeMounts:
      - name: docker-config
        mountPath: "/root/.docker/"
      - name: workspace
        mountPath: /workspace
  - name: tfjob
    resource:                   #indicates that this is a resource template
      action: create             #can be any kubectl action (e.g. create, delete, apply, patch)
      successCondition: status.succeeded == 1
      manifest: |   #put your kubernetes spec here
       apiVersion: "batch/v1"
       kind: "Job"
       metadata:
         name: "sk-train"
         ownerReferences:
         - apiVersion: argoproj.io/v1alpha1
           kind: Workflow
           controller: true
           name: {{workflow.name}}
           uid: {{workflow.uid}}
       spec:
         template:
           metadata:
             name: "sk-train"
           spec:
             containers:
               -
                 image: "{{workflow.parameters.docker-org}}/skmnistclassifier_trainer:{{workflow.parameters.version}}"
                 name: "sk-train"
                 imagePullPolicy: Always
                 volumeMounts:
                   -
                     mountPath: "/data"
                     name: "persistent-storage"
             restartPolicy: "Never"
             volumes:
               -
                 name: "persistent-storage"
                 persistentVolumeClaim:
                   claimName: "nfs-1"


================================================
FILE: workflows/training-tf-mnist-workflow.md
================================================
# Example Argo Workflow to dockerize and Train Model

Comments on the [training-tf-mnist-workflow.yaml](training-tf-mnist-workflow.yaml)

## Workflow summary

To dockerize our model training and run it we create:

  * [```models/tf_mnist/train/build_and_push.sh```](../models/tf_mnist/train/build_and_push.sh) that will build an image for our Tensorflow training and push to our repo.
  * An Argo workflow [```workflows/training-tf-mnist-workflow.yaml```](training-tf-mnist-workflow.yaml) is created which:
    * Clones the project from github
    * Runs the build and push script (using DockerInDocker)
    * Starts a kubeflow TfJob to train the model and save the results to the persistent volume


## Workflow parameters

 * version
   * The version tag for the Docker image
 * github-user
   * The github user/org for which to clone this repo/fork
 * github-revision
   * The github revision to use for cloning the repo (can be a branch name)
 * docker-org
   * The Docker host and org/user/project to use when pushing an image to the registry
 * tfjob-version-hack
   * A temporary random integer for the tfjob ID
 * build-push-image
   * Whether to build and push the image to docker registry (true/false)

## Setup For Pushing Images

**To push to your own repo the Docker images you will need to setup your docker credentials as a Kubernetes secret containing a [config.json](https://www.projectatomic.io/blog/2016/03/docker-credentials-store/). To do this you can find your docker home (typically ~/.docker) and run `kubectl create secret generic docker-config --from-file=config.json=${DOCKERHOME}/config.json --type=kubernetes.io/config` to [create a secret](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/#registry-secret-existing-credentials).**


================================================
FILE: workflows/training-tf-mnist-workflow.yaml
================================================
apiVersion: argoproj.io/v1alpha1
kind: Workflow
metadata:
  generateName: kubeflow-tf-train-
spec:
  entrypoint: workflow
  arguments:
    parameters:
    - name: version
      value: 0.1
    - name: github-user
      value: kubeflow
    - name: github-revision
      value: master
    - name: docker-org
      value: index.docker.io/seldonio
    - name: tfjob-version-hack
      value: 1
    - name: build-push-image
      value: false
  volumes:
  - name: docker-config
    secret:
      secretName: docker-config     # name of an existing k8s secret
  volumeClaimTemplates:
  - metadata:
      name: workspace
    spec:
      accessModes: [ "ReadWriteOnce" ]
      resources:
        requests:
          storage: 0.5Gi
  templates:
  - name: workflow
    steps:
    - - name: get-source
        template: get-source-code
    - - name: build-push
        template: build-and-push
        when: "{{workflow.parameters.build-push-image}} == true"
    - - name: train
        template: tfjob
  - name: get-source-code
    inputs:
      artifacts:
      - name: argo-source
        path: /src/example-seldon
        git:
          repo: https://github.com/{{workflow.parameters.github-user}}/example-seldon.git
          revision: "{{workflow.parameters.github-revision}}"
    container:
      image: alpine:latest
      command: [sh, -c]
      args: ["cp /src/example-seldon/models/tf_mnist/train/* /workspace/; ls /workspace/"]
      volumeMounts:
      - name: workspace
        mountPath: /workspace
  - name: build-and-push
    container:
      image: gcr.io/kaniko-project/executor:latest
      args: ["--dockerfile","Dockerfile","--destination","{{workflow.parameters.docker-org}}/deepmnistclassifier_trainer:{{workflow.parameters.version}}"]
      workingDir: /src/example-seldon/models/tf_mnist/train/
      volumeMounts:
      - name: docker-config
        mountPath: "/root/.docker/"
      - name: workspace
        mountPath: /workspace
  - name: tfjob
    resource:                   #indicates that this is a resource template
      action: create             #can be any kubectl action (e.g. create, delete, apply, patch)
      #successCondition: status.tfReplicaStatuses.Worker.succeeded == 1
      #successCondition: status.conditions.type == Succeeded
      successCondition: status.replicaStatuses.Worker.succeeded == 1
      manifest: |   #put your kubernetes spec here
       apiVersion: "kubeflow.org/v1beta1"
       kind: "TFJob"
       metadata:
         name: mnist-train-{{workflow.parameters.tfjob-version-hack}}
         ownerReferences:
         - apiVersion: argoproj.io/v1alpha1
           kind: Workflow
           controller: true
           name: {{workflow.name}}
           uid: {{workflow.uid}}
       spec:
         tfReplicaSpecs:
           Worker:
             replicas: 1
             template:
               spec:
                 containers:
                   -
                     image: "{{workflow.parameters.docker-org}}/deepmnistclassifier_trainer:{{workflow.parameters.version}}"
                     name: "tensorflow"
                     volumeMounts:
                       -
                         mountPath: "/data"
                         name: "persistent-storage"
                 restartPolicy: "OnFailure"
                 volumes:
                   -
                     name: "persistent-storage"
                     persistentVolumeClaim:
                       claimName: "nfs-1"
             tfReplicaType: "MASTER"