Repository: kubeflow/example-seldon Branch: master Commit: d0e056aaa7ec Files: 69 Total size: 98.7 KB Directory structure: gitextract_39vdm35i/ ├── .gitignore ├── LICENSE ├── OWNERS ├── README.md ├── VERSION ├── k8s_serving/ │ ├── ab_test_sklearn_tensorflow.json │ ├── ambassador-auth-service-config.yaml │ ├── ambassador-auth-service-setup.yaml │ ├── epsilon_greedy.json │ ├── epsilon_greedy_3way.json │ ├── serving_model.json │ ├── serving_r_model.json │ └── serving_sk_model.json ├── k8s_train/ │ ├── sklearn_training_job.yaml │ └── tfJob.json ├── models/ │ ├── r_mnist/ │ │ ├── runtime/ │ │ │ ├── Dockerfile │ │ │ ├── Makefile │ │ │ ├── install.R │ │ │ └── mnist.R │ │ └── train/ │ │ ├── Dockerfile │ │ ├── Makefile │ │ ├── get_data.sh │ │ ├── install.R │ │ ├── train.R │ │ └── train.sh │ ├── sk_mnist/ │ │ ├── runtime/ │ │ │ ├── Dockerfile │ │ │ ├── Makefile │ │ │ ├── SkMnist.py │ │ │ ├── contract.json │ │ │ └── requirements.txt │ │ └── train/ │ │ ├── Dockerfile │ │ ├── Makefile │ │ ├── create_model.py │ │ ├── requirements.txt │ │ └── train.sh │ └── tf_mnist/ │ ├── runtime/ │ │ ├── DeepMnist.py │ │ ├── Dockerfile │ │ ├── Makefile │ │ ├── contract.json │ │ └── requirements.txt │ └── train/ │ ├── Dockerfile │ ├── Makefile │ └── create_model.py ├── nfs.md ├── notebooks/ │ ├── Makefile │ ├── __init__.py │ ├── create-protos.sh │ ├── proto/ │ │ ├── __init__.py │ │ └── prediction.proto │ ├── requirements.txt │ ├── serving.ipynb │ ├── training.ipynb │ ├── utils.py │ └── visualizer.py ├── scripts/ │ ├── README.md │ ├── create_demo.sh │ ├── delete-demo.sh │ ├── env-example.sh │ ├── nfs-pvc.yaml │ ├── port-forwards.sh │ └── watch-mnist.sh └── workflows/ ├── serving-r-mnist-workflow.yaml ├── serving-sk-mnist-workflow.yaml ├── serving-tf-mnist-workflow.md ├── serving-tf-mnist-workflow.yaml ├── training-r-mnist-workflow.yaml ├── training-sk-mnist-workflow.yaml ├── training-tf-mnist-workflow.md └── training-tf-mnist-workflow.yaml ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ # build /target/ /public cluster-manager/.m2/ .ipynb_checkpoints # eclipse .classpath .settings/ .project # Netbeans and IntelliJ files !.gitignore /nbproject /*.ipr /*.iws *.iml .idea /bin/ *~ *.pyc .m2 \#* _*.yaml _*.json models/tf_mnist/runtime/build/ models/sk_mnist/runtime/build/ models/sk_mnist/train/mnist-original.mat notebooks/proto/prediction_pb2.py notebooks/proto/prediction_pb2_grpc.py notebooks/tensorflow scripts/kubeflow_src scripts/env.sh ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: OWNERS ================================================ approvers: - cliveseldon - jinchihe - ryandawsonuk reviewers: - cliveseldon - jinchihe ================================================ FILE: README.md ================================================ ## :warning: **kubeflow/example-seldon is not maintained** This repository has been deprecated and [archived](https://github.com/kubeflow/community/issues/479) on Nov 30th, 2021. # Train and Deploy Machine Learning Models on Kubernetes with Kubeflow and Seldon-Core ![MNIST](notebooks/mnist.png "MNIST Digits") Using: * [kubeflow](https://github.com/kubeflow/kubeflow) * [seldon-core](https://github.com/SeldonIO/seldon-core) The example will be the MNIST handwritten digit classification task. We will train 3 different models to solve this task: * A TensorFlow neural network model. * A scikit-learn random forest model. * An R least squares model. We will then show various rolling deployments 1. Deploy the single Tensorflow model. 2. Do a rolling update to an AB test of the Tensorflow model and the sklearn model. 3. Do a rolling update to a Multi-armed Bandit over all 3 models to direct traffic in real time to the best model. In the follow we will: 1. [Install kubeflow and seldon-core on a kubernetes cluster](#setup) 1. [Train the models](#train-the-models) 1. [Serve the models](#serve-the-models) # Requirements * gcloud * kubectl * ksonnet * argo # Setup There is a consolidated script to create the demo which can be found [here](./scripts/README.md). For a step by step guide do the following: 1. [Install kubeflow on GKE](https://www.kubeflow.org/docs/started/getting-started-gke/). This should create kubeflow in a namespace ```kubeflow```. We suggest you use the command line install so you can easily modify your Ksonnet installation. Ensure you have the environment variables `KUBEFLOW_SRC` and `KFAPP` set. OAUTH is preferred as with basic auth [port-forwarding to ambassador is insufficient](https://github.com/kubeflow/kubeflow/issues/3213) 1. Install seldon. Go to your Ksonnet application folder setup in the previous step and run ``` cd ${KUBEFLOW_SRC}/${KFAPP}/ks_app ks pkg install kubeflow/seldon ks generate seldon seldon ks apply default -c seldon ``` 1. Install Helm ``` kubectl -n kube-system create sa tiller kubectl create clusterrolebinding tiller --clusterrole cluster-admin --serviceaccount=kube-system:tiller helm init --service-account tiller kubectl rollout status deploy/tiller-deploy -n kube-system ``` 1. Create an NFS disk and persistent volume claim called `nfs-1`. You can follow one guide on create an NFS volume using Google Filestore [here](https://cloud.google.com/community/tutorials/gke-filestore-dynamic-provisioning). A consolidated set of steps is shown [here](nfs.md) 1. Add Cluster Roles so Argo can start jobs successfully ``` kubectl create clusterrolebinding my-cluster-admin-binding --clusterrole=cluster-admin --user=$(gcloud info --format="value(config.account)") kubectl create clusterrolebinding default-admin2 --clusterrole=cluster-admin --serviceaccount=kubeflow:default ``` 1. Install Seldon Analytics Dashboard ``` helm install seldon-core-analytics --name seldon-core-analytics --set grafana_prom_admin_password=password --set persistence.enabled=false --repo https://storage.googleapis.com/seldon-charts --namespace kubeflow ``` 1. Port forward the dashboard when running ``` kubectl port-forward $(kubectl get pods -n kubeflow -l app=grafana-prom-server -o jsonpath='{.items[0].metadata.name}') -n kubeflow 3000:3000 ``` 1. Visit http://localhost:3000/dashboard/db/prediction-analytics?refresh=5s&orgId=1 and login using "admin" and the password you set above when launching with helm. # MNIST models ## Tensorflow Model * [Python training code](models/tf_mnist/train/create_model.py) * [Python runtime prediction code](models/tf_mnist/runtime/DeepMnist.py) * [Dockerfile to wrap runtime prediction code to run under seldon-Core](models/tf_mnist/runtime/Dockerfile). ## SKLearn Model * [Python training code](models/sk_mnist/train/create_model.py) * [Python runtime prediction code](models/sk_mnist/runtime/SkMnist.py) * [Dockerfile to wrap runtime prediction code to run under seldon-Core](models/sk_mnist/runtime/Dockerfile). ## R Model * [R training code](models/r_mnist/train/train.R) * [R runtime prediction code](models/r_mnist/runtime/mnist.R) * [Dockerfile to wrap runtime prediction code to run under seldon-Core](models/r_mnist/runtime/Dockerfile). # Train the Models Follow the steps in [./notebooks/training.ipynb](./notebooks/training.ipynb) to: * Run Argo Jobs for each model to: * Creating training images and push to repo * Run training * Create runtime prediction images and push to repo * Deploy individual runtime model **To push to your own repo the Docker images you will need to setup your docker credentials as a Kubernetes secret containing a [config.json](https://www.projectatomic.io/blog/2016/03/docker-credentials-store/). To do this you can find your docker home (typically ~/.docker) and run `kubectl create secret generic docker-config --from-file=config.json=${DOCKERHOME}/config.json --type=kubernetes.io/config` to [create a secret](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/#registry-secret-existing-credentials).** # Serve the Models Follow the steps in [./notebooks/serving.ipynb](./notebooks/serving.ipynb) to: 1. Deploy the single Tensorflow model. 2. Do a rolling update to an AB test of the Tensorflow model and the sklearn model. 3. Do a rolling update to a Multi-armed Bandit over all 3 models to direct traffic in real time to the best model. To ensure the notebook can run successfully install the python dependencies: ``` pip install -r notebooks/requirements.txt ``` If you have [installed the Seldon-Core analytics](#setup) you can view them on the grafana dashboard: ![Grafana](grafana.png "Grafana Dashboard") ================================================ FILE: VERSION ================================================ 0.1 ================================================ FILE: k8s_serving/ab_test_sklearn_tensorflow.json ================================================ { "apiVersion": "machinelearning.seldon.io/v1alpha2", "kind": "SeldonDeployment", "metadata": { "labels": { "app": "seldon" }, "name": "mnist-classifier" }, "spec": { "annotations": { "project_name": "kubeflow-seldon", "deployment_version": "v1", "seldon.io/rest-connection-timeout": "100" }, "name": "mnist-classifier", "predictors": [ { "componentSpecs": [{ "spec": { "containers": [ { "image": "seldonio/deepmnistclassifier_runtime:0.2", "name": "tf-model", "volumeMounts": [ { "mountPath": "/data", "name": "persistent-storage" } ] }, { "image": "seldonio/skmnistclassifier_runtime:0.2", "name": "sk-model", "volumeMounts": [ { "mountPath": "/data", "name": "persistent-storage" } ] } ], "volumes": [ { "name": "persistent-storage", "volumeSource" : { "persistentVolumeClaim": { "claimName": "nfs-1" } } } ] } }], "name": "mnist-classifier", "replicas": 1, "annotations": { "predictor_version": "v1" }, "graph": { "name": "random-ab-test", "implementation":"RANDOM_ABTEST", "parameters": [ { "name":"ratioA", "value":"0.5", "type":"FLOAT" } ], "children": [ { "name": "tf-model", "endpoint":{ "type":"REST" }, "type":"MODEL" }, { "name": "sk-model", "endpoint":{ "type":"REST" }, "type":"MODEL" } ] } } ] } } ================================================ FILE: k8s_serving/ambassador-auth-service-config.yaml ================================================ --- apiVersion: v1 kind: Service metadata: name: example-auth annotations: getambassador.io/config: | --- apiVersion: ambassador/v0 kind: Module name: authentication config: auth_service: "example-auth:3000" path_prefix: "/extauth" spec: type: ClusterIP selector: app: example-auth ports: - port: 3000 name: http-example-auth targetPort: http-api ================================================ FILE: k8s_serving/ambassador-auth-service-setup.yaml ================================================ --- apiVersion: v1 kind: Service metadata: name: example-auth spec: type: ClusterIP selector: app: example-auth ports: - port: 3000 name: http-example-auth targetPort: http-api --- apiVersion: extensions/v1beta1 kind: Deployment metadata: name: example-auth spec: replicas: 1 strategy: type: RollingUpdate template: metadata: labels: app: example-auth spec: containers: - name: example-auth image: seldonio/ambassador-auth-service:1.1.1 imagePullPolicy: IfNotPresent ports: - name: http-api containerPort: 3000 resources: limits: cpu: "0.1" memory: 100Mi ================================================ FILE: k8s_serving/epsilon_greedy.json ================================================ { "apiVersion": "machinelearning.seldon.io/v1alpha2", "kind": "SeldonDeployment", "metadata": { "labels": { "app": "seldon" }, "name": "mnist-classifier" }, "spec": { "annotations": { "project_name": "kubeflow-seldon", "deployment_version": "v1" }, "name": "mnist-classifier", "predictors": [ { "componentSpecs": [{ "spec": { "containers": [ { "image": "seldonio/deepmnistclassifier_runtime:0.2", "name": "tf-model", "volumeMounts": [ { "mountPath": "/data", "name": "persistent-storage" } ] }, { "image": "seldonio/skmnistclassifier_runtime:0.2", "name": "sk-model", "volumeMounts": [ { "mountPath": "/data", "name": "persistent-storage" } ] }, { "image": "seldonio/mab_epsilon_greedy:1.1", "name": "eg-router" } ], "volumes": [ { "name": "persistent-storage", "volumeSource" : { "persistentVolumeClaim": { "claimName": "nfs-1" } } } ] } }], "name": "mnist-classifier", "replicas": 1, "annotations": { "predictor_version": "v1" }, "graph": { "name": "eg-router", "type":"ROUTER", "parameters": [ { "name": "n_branches", "value": "2", "type": "INT" }, { "name": "epsilon", "value": "0.1", "type": "FLOAT" }, { "name": "verbose", "value": "1", "type": "BOOL" } ], "children": [ { "name": "sk-model", "type": "MODEL", "endpoint":{ "type":"REST" } }, { "name": "tf-model", "type": "MODEL", "endpoint":{ "type":"REST" } } ] } } ] } } ================================================ FILE: k8s_serving/epsilon_greedy_3way.json ================================================ { "apiVersion": "machinelearning.seldon.io/v1alpha2", "kind": "SeldonDeployment", "metadata": { "labels": { "app": "seldon" }, "name": "mnist-classifier" }, "spec": { "annotations": { "project_name": "kubeflow-seldon", "deployment_version": "v1" }, "name": "mnist-classifier", "predictors": [ { "componentSpecs": [{ "spec": { "containers": [ { "image": "seldonio/deepmnistclassifier_runtime:0.2", "name": "tf-model", "volumeMounts": [ { "mountPath": "/data", "name": "persistent-storage" } ] }, { "image": "seldonio/skmnistclassifier_runtime:0.2", "name": "sk-model", "volumeMounts": [ { "mountPath": "/data", "name": "persistent-storage" } ] }, { "image": "seldonio/rmnistclassifier_runtime:0.2", "name": "r-model", "volumeMounts": [ { "mountPath": "/data", "name": "persistent-storage" } ] }, { "image": "seldonio/mab_epsilon_greedy:1.1", "name": "eg-router" } ], "volumes": [ { "name": "persistent-storage", "volumeSource" : { "persistentVolumeClaim": { "claimName": "nfs-1" } } } ] } }], "name": "mnist-classifier", "replicas": 1, "annotations": { "predictor_version": "v1" }, "graph": { "name": "eg-router", "type":"ROUTER", "parameters": [ { "name": "n_branches", "value": "3", "type": "INT" }, { "name": "epsilon", "value": "0.2", "type": "FLOAT" }, { "name": "verbose", "value": "1", "type": "BOOL" } ], "children": [ { "name": "sk-model", "type": "MODEL", "endpoint":{ "type":"REST" } }, { "name": "tf-model", "type": "MODEL", "endpoint":{ "type":"REST" } }, { "name": "r-model", "type": "MODEL", "endpoint":{ "type":"REST" } } ] } } ] } } ================================================ FILE: k8s_serving/serving_model.json ================================================ { "apiVersion": "machinelearning.seldon.io/v1alpha2", "kind": "SeldonDeployment", "metadata": { "labels": { "app": "seldon" }, "name": "mnist-classifier" }, "spec": { "annotations": { "deployment_version": "v1", "project_name": "MNIST Example", "seldon.io/engine-separate-pod": "false", "seldon.io/rest-connection-timeout": "100" }, "name": "mnist-classifier", "predictors": [ { "annotations": { "predictor_version": "v1" }, "componentSpecs": [{ "spec": { "containers": [ { "image": "seldonio/deepmnistclassifier_runtime:0.2", "imagePullPolicy": "Always", "name": "tf-model", "volumeMounts": [ { "mountPath": "/data", "name": "persistent-storage" } ] } ], "terminationGracePeriodSeconds": 1, "volumes": [ { "name": "persistent-storage", "volumeSource" : { "persistentVolumeClaim": { "claimName": "nfs-1" } } } ] } }], "graph": { "children": [], "endpoint": { "type": "REST" }, "name": "tf-model", "type": "MODEL" }, "name": "mnist-classifier", "replicas": 1 } ] } } ================================================ FILE: k8s_serving/serving_r_model.json ================================================ { "apiVersion": "machinelearning.seldon.io/v1alpha2", "kind": "SeldonDeployment", "metadata": { "labels": { "app": "seldon" }, "name": "mnist-classifier" }, "spec": { "annotations": { "deployment_version": "v1", "project_name": "MNIST Example" }, "name": "mnist-classifier", "predictors": [ { "annotations": { "predictor_version": "v1" }, "componentSpecs": [{ "spec": { "containers": [ { "image": "seldonio/rmnistclassifier_runtime:0.2", "imagePullPolicy": "Always", "name": "r-model", "volumeMounts": [ { "mountPath": "/data", "name": "persistent-storage" } ] } ], "terminationGracePeriodSeconds": 1, "volumes": [ { "name": "persistent-storage", "volumeSource" : { "persistentVolumeClaim": { "claimName": "nfs-1" } } } ] } }], "graph": { "children": [], "endpoint": { "type": "REST" }, "name": "r-model", "type": "MODEL" }, "name": "mnist-classifier", "replicas": 1 } ] } } ================================================ FILE: k8s_serving/serving_sk_model.json ================================================ { "apiVersion": "machinelearning.seldon.io/v1alpha2", "kind": "SeldonDeployment", "metadata": { "labels": { "app": "seldon" }, "name": "mnist-classifier" }, "spec": { "annotations": { "deployment_version": "v1", "project_name": "MNIST Example" }, "name": "mnist-classifier", "predictors": [ { "annotations": { "predictor_version": "v1" }, "componentSpecs": [{ "spec": { "containers": [ { "image": "seldonio/skmnistclassifier_runtime:0.2", "imagePullPolicy": "Always", "name": "sk-model", "volumeMounts": [ { "mountPath": "/data", "name": "persistent-storage" } ] } ], "terminationGracePeriodSeconds": 1, "volumes": [ { "name": "persistent-storage", "volumeSource" : { "persistentVolumeClaim": { "claimName": "nfs-1" } } } ] } }], "graph": { "children": [], "endpoint": { "type": "REST" }, "name": "sk-model", "type": "MODEL" }, "name": "mnist-classifier", "replicas": 1 } ] } } ================================================ FILE: k8s_train/sklearn_training_job.yaml ================================================ apiVersion: "batch/v1" kind: "Job" metadata: name: "sk-train" namespace: "default" spec: template: metadata: name: "sk-train" spec: containers: - image: "seldonio/skmnistclassifier_trainer:0.1" name: "sk-train" volumeMounts: - mountPath: "/data" name: "persistent-storage" restartPolicy: "Never" volumes: - name: "persistent-storage" persistentVolumeClaim: claimName: "ml-data" ================================================ FILE: k8s_train/tfJob.json ================================================ { "apiVersion": "kubeflow.org/v1alpha1", "kind": "TFJob", "metadata": { "name": "mnist-train", "namespace": "kubeflow-seldon" }, "spec": { "replicaSpecs": [ { "replicas": 1, "template": { "spec": { "containers": [ { "image": "seldonio/deepmnistclassifier_trainer:0.1", "name": "tensorflow", "volumeMounts": [ { "mountPath": "/data", "name": "persistent-storage" } ] } ], "restartPolicy": "OnFailure", "volumes": [ { "name": "persistent-storage", "persistentVolumeClaim": { "claimName": "ml-data" } } ] } }, "tfReplicaType": "MASTER" } ] } } ================================================ FILE: models/r_mnist/runtime/Dockerfile ================================================ FROM rocker/r-apt:bionic RUN apt-get update && \ apt-get install -y -qq \ r-cran-plumber \ r-cran-jsonlite \ r-cran-optparse \ r-cran-stringr \ r-cran-urltools \ r-cran-caret \ r-cran-pls \ curl ENV MODEL_NAME mnist.R ENV API_TYPE REST ENV SERVICE_TYPE MODEL ENV PERSISTENCE 0 RUN mkdir microservice COPY . /microservice WORKDIR /microservice RUN curl -OL https://raw.githubusercontent.com/SeldonIO/seldon-core/v0.2.7/wrappers/s2i/R/microservice.R > /microservice/microservice.R EXPOSE 5000 CMD Rscript microservice.R --model $MODEL_NAME --api $API_TYPE --service $SERVICE_TYPE --persistence $PERSISTENCE ================================================ FILE: models/r_mnist/runtime/Makefile ================================================ seldon_build_image_local: docker build . -t seldonio/rmnistclassifier_runtime:0.2 seldon_push_docker_hub: docker push seldonio/rmnistclassifier_runtime:0.2 ================================================ FILE: models/r_mnist/runtime/install.R ================================================ install.packages('pls') ================================================ FILE: models/r_mnist/runtime/mnist.R ================================================ library(methods) predict.mnist <- function(mnist,newdata=list()) { cn <- 1:784 for (i in seq_along(cn)){cn[i] <- paste("X",cn[i],sep = "")} colnames(newdata) <- cn predict(mnist$model, newdata = newdata, type='prob') } send_feedback.mnist <- function(mnist,request=list(),reward=1,truth=list()) { } new_mnist <- function(filename) { model <- readRDS(filename) structure(list(model=model), class = "mnist") } initialise_seldon <- function(params) { new_mnist("/data/model.Rds") } ================================================ FILE: models/r_mnist/train/Dockerfile ================================================ FROM rocker/r-apt:bionic RUN apt-get update && \ apt-get install -y -qq \ r-cran-caret \ r-cran-pls \ r-cran-e1071 RUN R -e 'install.packages("doParallel")' RUN mkdir training COPY /train.R /training/train.R COPY /get_data.sh /training/get_data.sh COPY ./train.sh /training/train.sh RUN cd /training && \ ./get_data.sh WORKDIR /training CMD ["/training/train.sh"] ================================================ FILE: models/r_mnist/train/Makefile ================================================ build_model: docker build --force-rm=true -t seldonio/rmnistclassifier_trainer:0.1 . push_image: docker push seldonio/rmnistclassifier_trainer:0.1 ================================================ FILE: models/r_mnist/train/get_data.sh ================================================ wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz wget http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz wget http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz gunzip train-images-idx3-ubyte.gz gunzip train-labels-idx1-ubyte.gz gunzip t10k-images-idx3-ubyte.gz gunzip t10k-labels-idx1-ubyte.gz ================================================ FILE: models/r_mnist/train/install.R ================================================ install.packages('caret') install.packages('doParallel') install.packages('pls') install.packages('e1071') ================================================ FILE: models/r_mnist/train/train.R ================================================ library(caret) library(doParallel) # Enable parallel processing. cl <- makeCluster(detectCores()) registerDoParallel(cl) # Load the MNIST digit recognition dataset into R # http://yann.lecun.com/exdb/mnist/ # assume you have all 4 files and gunzip'd them # creates train$n, train$x, train$y and test$n, test$x, test$y # e.g. train$x is a 60000 x 784 matrix, each row is one digit (28x28) # call: show_digit(train$x[5,]) to see a digit. # brendan o'connor - gist.github.com/39760 - anyall.org load_mnist <- function() { load_image_file <- function(filename) { ret = list() f = file(filename,'rb') readBin(f,'integer',n=1,size=4,endian='big') ret$n = readBin(f,'integer',n=1,size=4,endian='big') nrow = readBin(f,'integer',n=1,size=4,endian='big') ncol = readBin(f,'integer',n=1,size=4,endian='big') x = readBin(f,'integer',n=ret$n*nrow*ncol,size=1,signed=F) ret$x = matrix(x, ncol=nrow*ncol, byrow=T) close(f) ret } load_label_file <- function(filename) { f = file(filename,'rb') readBin(f,'integer',n=1,size=4,endian='big') n = readBin(f,'integer',n=1,size=4,endian='big') y = readBin(f,'integer',n=n,size=1,signed=F) close(f) y } train <<- load_image_file('train-images-idx3-ubyte') test <<- load_image_file('t10k-images-idx3-ubyte') train$y <<- load_label_file('train-labels-idx1-ubyte') test$y <<- load_label_file('t10k-labels-idx1-ubyte') } show_digit <- function(arr784, col=gray(12:1/12), ...) { image(matrix(arr784, nrow=28)[,28:1], col=col, ...) } train <- data.frame() test <- data.frame() # Load data. load_mnist() # Normalize: X = (X - min) / (max - min) => X = (X - 0) / (255 - 0) => X = X / 255. train$x <- train$x / 255 # Setup training data with digit and pixel values with 60/40 split for train/cv. inTrain = data.frame(y=train$y, train$x) inTrain$y <- as.factor(inTrain$y) trainIndex = createDataPartition(inTrain$y, p = 0.60,list=FALSE) training = inTrain[trainIndex,] cv = inTrain[-trainIndex,] # SVM. 95/94. #fit <- train(y ~ ., data = head(training, 1000), method = 'svmRadial', tuneGrid = data.frame(sigma=0.0107249, C=1)) fit <- train(y ~ ., data = head(training, 1000), method = 'pls') results <- predict(fit, newdata = head(cv, 1000), type='prob') #confusionMatrix(results, head(cv$y, 1000)) saveRDS(fit, file = "/data/model.Rds", compress = TRUE) ================================================ FILE: models/r_mnist/train/train.sh ================================================ #!/usr/bin/env bash # exit when any command fails set -e until mountpoint -q /data; do echo "$(date) - waiting for /data to be mounted..." sleep 1 done ls -l /data Rscript train.R ls -l /data ================================================ FILE: models/sk_mnist/runtime/Dockerfile ================================================ FROM python:3.7-slim COPY . /app WORKDIR /app RUN pip install -r requirements.txt EXPOSE 5000 # Define environment variable ENV MODEL_NAME SkMnist ENV API_TYPE REST ENV SERVICE_TYPE MODEL ENV PERSISTENCE 0 CMD exec seldon-core-microservice $MODEL_NAME $API_TYPE --service-type $SERVICE_TYPE --persistence $PERSISTENCE ================================================ FILE: models/sk_mnist/runtime/Makefile ================================================ seldon_build_image_local: docker build . -t seldonio/skmnistclassifier_runtime:0.2 seldon_push_docker_hub: docker push seldonio/skmnistclassifier_runtime:0.2 ================================================ FILE: models/sk_mnist/runtime/SkMnist.py ================================================ from sklearn.externals import joblib class SkMnist(object): def __init__(self): self.class_names = ["class:{}".format(str(i)) for i in range(10)] self.clf = joblib.load('/data/sk.pkl') def predict(self,X,feature_names): predictions = self.clf.predict_proba(X) return predictions ================================================ FILE: models/sk_mnist/runtime/contract.json ================================================ { "features":[ { "name":"x", "dtype":"FLOAT", "ftype":"continuous", "range":[0,1], "repeat":784 } ], "targets":[ { "name":"class", "dtype":"FLOAT", "ftype":"continuous", "range":[0,1], "repeat":10 } ] } ================================================ FILE: models/sk_mnist/runtime/requirements.txt ================================================ scipy>= 0.13.3 scikit-learn>=0.18 seldon-core>=0.2.5 ================================================ FILE: models/sk_mnist/train/Dockerfile ================================================ FROM python:3.7-slim RUN apt-get update -y RUN apt-get install -y python-pip python-dev build-essential COPY /requirements.txt /tmp/ RUN cd /tmp && \ pip install --no-cache-dir -r requirements.txt RUN mkdir training COPY ./create_model.py /training/create_model.py COPY ./train.sh /training/train.sh WORKDIR /training CMD ["/training/train.sh"] ================================================ FILE: models/sk_mnist/train/Makefile ================================================ build_model: docker build --force-rm=true -t seldonio/skmnistclassifier_trainer:0.2 . push_image: docker push seldonio/skmnistclassifier_trainer:0.2 ================================================ FILE: models/sk_mnist/train/create_model.py ================================================ from sklearn.ensemble import RandomForestClassifier from sklearn import datasets, metrics from sklearn.utils import shuffle from sklearn.datasets import fetch_mldata from sklearn.externals import joblib from six.moves import urllib if __name__ == '__main__': try: mnist = fetch_mldata('MNIST original') except: print("Could not download MNIST data from mldata.org, trying alternative...") # Alternative method to load MNIST, if mldata.org is down from scipy.io import loadmat mnist_alternative_url = "https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat" mnist_path = "./mnist-original.mat" response = urllib.request.urlopen(mnist_alternative_url) with open(mnist_path, "wb") as f: content = response.read() f.write(content) mnist_raw = loadmat(mnist_path) mnist = { "data": mnist_raw["data"].T, "target": mnist_raw["label"][0], "COL_NAMES": ["label", "data"], "DESCR": "mldata.org dataset: mnist-original", } print("Success!") #mnist = fetch_mldata('MNIST original', data_home="./mnist_sklearn") # To apply a classifier on this data, we need to flatten the image, to # turn the data in a (samples, feature) matrix: n_samples = len(mnist['data']) data = mnist['data'].reshape((n_samples, -1)) targets = mnist['target'] data,targets = shuffle(data,targets) classifier = RandomForestClassifier(n_estimators=30) # We learn the digits on the first half of the digits classifier.fit(data[:n_samples // 2], targets[:n_samples // 2]) # Now predict the value of the digit on the second half: expected = targets[n_samples // 2:] test_data = data[n_samples // 2:] print(classifier.score(test_data, expected)) predicted = classifier.predict(data[n_samples // 2:]) print("Classification report for classifier %s:\n%s\n" % (classifier, metrics.classification_report(expected, predicted))) print("Confusion matrix:\n%s" % metrics.confusion_matrix(expected, predicted)) joblib.dump(classifier, '/data/sk.pkl') ================================================ FILE: models/sk_mnist/train/requirements.txt ================================================ scipy scikit-learn>=0.18 six ================================================ FILE: models/sk_mnist/train/train.sh ================================================ #!/usr/bin/env bash # exit when any command fails set -e until mountpoint -q /data; do echo "$(date) - wainting for /data to be mounted..." sleep 1 done ls -l /data python -u create_model.py ls -l /data ================================================ FILE: models/tf_mnist/runtime/DeepMnist.py ================================================ import tensorflow as tf import logging logging.basicConfig(format='%(asctime)s.%(msecs)03d %(levelname)s {%(module)s} [%(funcName)s] %(message)s', datefmt='%Y-%m-%d,%H:%M:%S', level=logging.INFO) logger = logging.getLogger(__name__) class DeepMnist(object): def __init__(self): self.class_names = ["class:{}".format(str(i)) for i in range(10)] self.sess = tf.Session() saver = tf.train.import_meta_graph("/data/deep_mnist_model.meta") saver.restore(self.sess,tf.train.latest_checkpoint("/data/")) graph = tf.get_default_graph() self.x = graph.get_tensor_by_name("x:0") self.y = graph.get_tensor_by_name("y:0") def predict(self,X,feature_names): predictions = self.sess.run(self.y,feed_dict={self.x:X}) return predictions ================================================ FILE: models/tf_mnist/runtime/Dockerfile ================================================ FROM python:3.7-slim COPY . /app WORKDIR /app RUN pip install -r requirements.txt EXPOSE 5000 # Define environment variable ENV MODEL_NAME DeepMnist ENV API_TYPE REST ENV SERVICE_TYPE MODEL ENV PERSISTENCE 0 CMD exec seldon-core-microservice $MODEL_NAME $API_TYPE --service-type $SERVICE_TYPE --persistence $PERSISTENCE ================================================ FILE: models/tf_mnist/runtime/Makefile ================================================ seldon_build_image_local: docker build . -t seldonio/deepmnistclassifier_runtime:0.2 seldon_push_docker_hub: docker push seldonio/deepmnistclassifier_runtime:0.2 ================================================ FILE: models/tf_mnist/runtime/contract.json ================================================ { "features":[ { "name":"x", "dtype":"FLOAT", "ftype":"continuous", "range":[0,1], "repeat":784 } ], "targets":[ { "name":"class", "dtype":"FLOAT", "ftype":"continuous", "range":[0,1], "repeat":10 } ] } ================================================ FILE: models/tf_mnist/runtime/requirements.txt ================================================ tensorflow==1.13.1 seldon-core>=0.2.5 ================================================ FILE: models/tf_mnist/train/Dockerfile ================================================ FROM tensorflow/tensorflow:1.3.0 RUN mkdir training COPY ./create_model.py /training/create_model.py WORKDIR /training CMD ["python","-u","create_model.py"] ================================================ FILE: models/tf_mnist/train/Makefile ================================================ build_model: docker build --force-rm=true -t seldonio/deepmnistclassifier_trainer:0.1 . push_image: docker push seldonio/deepmnistclassifier_trainer:0.1 ================================================ FILE: models/tf_mnist/train/create_model.py ================================================ from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets("MNIST_data/", one_hot = True) import tensorflow as tf if __name__ == '__main__': x = tf.placeholder(tf.float32, [None,784], name="x") W = tf.Variable(tf.zeros([784,10])) b = tf.Variable(tf.zeros([10])) y = tf.nn.softmax(tf.matmul(x,W) + b, name="y") y_ = tf.placeholder(tf.float32, [None, 10]) cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1])) train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) init = tf.initialize_all_variables() sess = tf.Session() sess.run(init) for i in range(1000): batch_xs, batch_ys = mnist.train.next_batch(100) sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys}) correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) print(sess.run(accuracy, feed_dict = {x: mnist.test.images, y_:mnist.test.labels})) saver = tf.train.Saver() saver.save(sess, "/data/deep_mnist_model") ================================================ FILE: nfs.md ================================================ # Example NFS Setup The steps below are a consolidated set of steps following the guide [here](https://cloud.google.com/community/tutorials/gke-filestore-dynamic-provisioning). Set the following variables * `FS` : the name of your filestore * `PROJECT` : Your Google Project * `ZONE` : Your GCP Zone Create a Google Filestore and install the helm chart for nfs-client-provisioner to use it. ``` PROJECT=seldon-demos FS=mnist-data ZONE=europe-west1-b gcloud beta filestore instances create ${FS} --project=${PROJECT} --location=${ZONE} --tier=STANDARD --file-share=name="volumes",capacity=1TB --network=name="default",reserved-ip-range="10.0.0.0/29" FSADDR=$(gcloud beta filestore instances describe ${FS} --project=${PROJECT} --location=${ZONE} --format="value(networks.ipAddresses[0])") helm install stable/nfs-client-provisioner --name nfs-cp --set nfs.server=${FSADDR} --set nfs.path=/volumes kubectl rollout status deploy/nfs-cp-nfs-client-provisioner -n kubeflow ``` To create the NFS claim save the following and apply to your kubernetes cluster ``` apiVersion: v1 kind: PersistentVolumeClaim metadata: name: nfs-1 spec: accessModes: - ReadWriteMany storageClassName: nfs-client resources: requests: storage: 30Gi ``` ================================================ FILE: notebooks/Makefile ================================================ SHELL=/bin/bash tensorflow/core/framework/tensor.proto: ./create-protos.sh .PHONY: create_protos create_protos: tensorflow/core/framework/tensor.proto .PHONY: clean clean: @rm -rfv tensorflow ================================================ FILE: notebooks/__init__.py ================================================ ================================================ FILE: notebooks/create-protos.sh ================================================ #!/bin/bash release=${1:-"master"} echo Downloading proto files for ${release} base=https://raw.githubusercontent.com/tensorflow tensorflow_base=${base}/tensorflow/${release} base_folder=tensorflow/core/framework/ mkdir -p ${base_folder} curl -s ${tensorflow_base}/tensorflow/core/framework/types.proto > ${base_folder}/types.proto curl -s ${tensorflow_base}/tensorflow/core/framework/resource_handle.proto > ${base_folder}/resource_handle.proto curl -s ${tensorflow_base}/tensorflow/core/framework/tensor_shape.proto > ${base_folder}/tensor_shape.proto curl -s ${tensorflow_base}/tensorflow/core/framework/tensor.proto > ${base_folder}/tensor.proto ================================================ FILE: notebooks/proto/__init__.py ================================================ ================================================ FILE: notebooks/proto/prediction.proto ================================================ syntax = "proto3"; import "google/protobuf/struct.proto"; import "tensorflow/core/framework/tensor.proto"; package seldon.protos; option java_package = "io.seldon.protos"; option java_outer_classname = "PredictionProtos"; option go_package = "github.com/seldonio/seldon-core/examples/wrappers/go/pkg/api"; // [START Messages] message SeldonMessage { Status status = 1; Meta meta = 2; oneof data_oneof { DefaultData data = 3; bytes binData = 4; string strData = 5; } } message DefaultData { repeated string names = 1; oneof data_oneof { Tensor tensor = 2; google.protobuf.ListValue ndarray = 3; tensorflow.TensorProto tftensor = 4; } } message Tensor { repeated int32 shape = 1 [packed=true]; repeated double values = 2 [packed=true]; } message Meta { string puid = 1; map tags = 2; map routing = 3; map requestPath = 4; repeated Metric metrics = 5; } message Metric { enum MetricType { COUNTER = 0; GAUGE = 1; TIMER = 2; } string key = 1; MetricType type = 2; float value = 3; map tags = 4; } message SeldonMessageList { repeated SeldonMessage seldonMessages = 1; } message Status { enum StatusFlag { SUCCESS = 0; FAILURE = 1; } int32 code = 1; string info = 2; string reason = 3; StatusFlag status = 4; } message Feedback { SeldonMessage request = 1; SeldonMessage response = 2; float reward = 3; SeldonMessage truth = 4; } message RequestResponse { SeldonMessage request = 1; SeldonMessage response = 2; } // [END Messages] // [START Services] service Generic { rpc TransformInput(SeldonMessage) returns (SeldonMessage) {}; rpc TransformOutput(SeldonMessage) returns (SeldonMessage) {}; rpc Route(SeldonMessage) returns (SeldonMessage) {}; rpc Aggregate(SeldonMessageList) returns (SeldonMessage) {}; rpc SendFeedback(Feedback) returns (SeldonMessage) {}; } service Model { rpc Predict(SeldonMessage) returns (SeldonMessage) {}; rpc SendFeedback(Feedback) returns (SeldonMessage) {}; } service Router { rpc Route(SeldonMessage) returns (SeldonMessage) {}; rpc SendFeedback(Feedback) returns (SeldonMessage) {}; } service Transformer { rpc TransformInput(SeldonMessage) returns (SeldonMessage) {}; } service OutputTransformer { rpc TransformOutput(SeldonMessage) returns (SeldonMessage) {}; } service Combiner { rpc Aggregate(SeldonMessageList) returns (SeldonMessage) {}; } service Seldon { rpc Predict(SeldonMessage) returns (SeldonMessage) {}; rpc SendFeedback(Feedback) returns (SeldonMessage) {}; } // [END Services] ================================================ FILE: notebooks/requirements.txt ================================================ matplotlib==3.0.3 grpcio==1.20.1 grpcio-tools==1.20.1 graphviz==0.10.1 ================================================ FILE: notebooks/serving.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Deploying Various MNIST Models on Kubernetes \n", "\n", "Using:\n", "\n", " * kubeflow\n", " * seldon-core\n", " \n", " \n", "Follow the main README to setup kubeflow and seldon-core. This notebook will show various rolling deployments of the trained models\n", "\n", " * Single model\n", " * AB Test between 2 models\n", " * Multi-Armed Bandit over 3 models\n", " \n", "### Dependencies\n", " \n", " * Tensorflow\n", " * grpcio package\n", " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Setup\n", "\n", "Set kubectl to use the namespace where you installed kubeflow and seldon. In the README it is kubeflow." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!kubectl config set-context $(kubectl config current-context) --namespace=kubeflow" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!make create_protos" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!python -m grpc.tools.protoc -I. --python_out=. --grpc_python_out=. ./proto/prediction.proto" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "import utils\n", "from visualizer import get_graph\n", "mnist = utils.download_mnist()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "\n", "**Ensure you have port forwarded the ambassador reverse proxy**\n", "\n", "```bash\n", "kubectl port-forward $(kubectl get pods -n kubeflow -l service=ambassador -o jsonpath='{.items[0].metadata.name}') -n kubeflow 8002:80\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Deploy Single Tensorflow Model" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "get_graph(\"../k8s_serving/serving_model.json\",'r')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pygmentize ../k8s_serving/serving_model.json" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!kubectl apply -f ../k8s_serving/serving_model.json" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!kubectl get seldondeployments mnist-classifier -o jsonpath='{.status}'" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "utils.predict_rest_mnist(mnist)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "utils.predict_grpc_mnist(mnist)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Start load test" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!kubectl label nodes $(kubectl get nodes -o jsonpath='{.items[0].metadata.name}') role=locust" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!helm install seldon-core-loadtesting --name loadtest \\\n", " --namespace kubeflow \\\n", " --repo https://storage.googleapis.com/seldon-charts \\\n", " --set locust.script=mnist_rest_locust.py \\\n", " --set locust.host=http://mnist-classifier:8000 \\\n", " --set oauth.enabled=false \\\n", " --set oauth.key=oauth-key \\\n", " --set oauth.secret=oauth-secret \\\n", " --set locust.hatchRate=1 \\\n", " --set locust.clients=1 \\\n", " --set loadtest.sendFeedback=1 \\\n", " --set locust.minWait=0 \\\n", " --set locust.maxWait=0 \\\n", " --set replicaCount=1 \\\n", " --set data.size=784\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Rolling update to AB Test\n", " Run an AB Test between 2 models:\n", " * Tensorflow neural network model\n", " * Scikit-learn random forest.\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "get_graph(\"../k8s_serving/ab_test_sklearn_tensorflow.json\",'r')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pygmentize ../k8s_serving/ab_test_sklearn_tensorflow.json" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!kubectl apply -f ../k8s_serving/ab_test_sklearn_tensorflow.json" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!kubectl get seldondeployments mnist-classifier -o jsonpath='{.status}'" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "utils.predict_rest_mnist(mnist)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "utils.evaluate_abtest(mnist,100)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Rolling Update to Multi-Armed Bandit\n", "Run a epsilon-greey multi-armed bandit over 3 models:\n", " * Tensorflow neural network model\n", " * Scikit-learn random forest model\n", " * R least-squares model\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "get_graph(\"../k8s_serving/epsilon_greedy_3way.json\",'r')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pygmentize ../k8s_serving/epsilon_greedy_3way.json" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!kubectl apply -f ../k8s_serving/epsilon_greedy_3way.json" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!kubectl get seldondeployments mnist-classifier -o jsonpath='{.status}'" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "utils.predict_rest_mnist(mnist)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "utils.evaluate_egreedy(mnist,100)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 1 } ================================================ FILE: notebooks/training.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Train Various Models on MNIST using kubeflow and seldon-core\n", "\n", "Using:\n", "\n", " * kubeflow\n", " * seldon-core\n", " \n", "The example will be the MNIST handwriiten digit classification task.\n", "\n", "![MNIST](mnist.png \"MNIST Digits\")\n", "\n", "### Dependencies\n", "\n", " * Argo" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Setup\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!kubectl config set-context $(kubectl config current-context) --namespace=kubeflow" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Tensorflow Model\n", " A simple neural network in Tensorflow." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Training\n", " * Create image from source\n", " * Run training\n", " \n", "\n", "Run with:\n", " * ``` -p build-push-image=true``` to build image and push to repo, needed extra params:\n", " * ``` -p version=``` create `````` of model\n", " * ``` -p github-user=``` to download example-seldon source from `````` account\n", " * ``` -p github-revision=``` to use the github branch ``````\n", " * ``` -p docker-org=``` to use Docker repo `````` to push image to. Needs docker credentials in secret as described in README." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pygmentize ../workflows/training-tf-mnist-workflow.yaml" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!argo submit ../workflows/training-tf-mnist-workflow.yaml -p tfjob-version-hack=1" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!argo list" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Runtime Image\n", "\n", "Run with:\n", " * ``` -p build-push-image=true``` to build image and push to repo, needed extra params:\n", " * ``` -p version=``` create `````` of model\n", " * ``` -p github-user=``` to download example-seldon source from `````` account\n", " * ``` -p github-revision=``` to use the github branch ``````\n", " * ``` -p docker-org=``` to use Docker user `````` to push image to. Needs docker credentials in secret as described in README.\n", " * ``` -p deploy-model=true``` to deploy model" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pygmentize ../workflows/serving-tf-mnist-workflow.yaml" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!argo submit ../workflows/serving-tf-mnist-workflow.yaml" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!argo list" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Sklearn Model\n", "A Random forest in sklearn." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Training\n", "\n", " * For options see above Tensorflow example" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pygmentize ../workflows/training-sk-mnist-workflow.yaml" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!argo submit ../workflows/training-sk-mnist-workflow.yaml" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!argo list" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Runtime Image\n", " * For options see above Tensorflow example" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pygmentize ../workflows/serving-sk-mnist-workflow.yaml" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!argo submit ../workflows/serving-sk-mnist-workflow.yaml" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!argo list" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# R Model\n", "A partial least squares model in R." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Training\n", "\n", " * For options see above Tensorflow example" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pygmentize ../workflows/training-r-mnist-workflow.yaml" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!argo submit ../workflows/training-r-mnist-workflow.yaml" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!argo list" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Runtime Image\n", " * For options see above Tensorflow example" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pygmentize ../workflows/serving-r-mnist-workflow.yaml" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!argo submit ../workflows/serving-r-mnist-workflow.yaml" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!argo list" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 1 } ================================================ FILE: notebooks/utils.py ================================================ import requests from requests.auth import HTTPBasicAuth from random import randint,random from proto import prediction_pb2 from proto import prediction_pb2_grpc import grpc import json from visualizer import get_graph from matplotlib import pyplot as plt import numpy as np from tensorflow.examples.tutorials.mnist import input_data from google.protobuf.json_format import MessageToJson AMBASSADOR_API_IP="localhost:8002" def rest_request(deploymentName,request): response = requests.post( "http://"+AMBASSADOR_API_IP+"/seldon/"+deploymentName+"/api/v0.1/predictions", json=request) j = response.json() return j def rest_request_auth(deploymentName,data,username,password): payload = {"data":{"ndarray":data.tolist()}} response = requests.post( "http://"+AMBASSADOR_API_IP+"/seldon/"+deploymentName+"/api/v0.1/predictions", json=payload, auth=HTTPBasicAuth(username, password)) print(response.status_code) return response.json() def grpc_request(deploymentName,data): datadef = prediction_pb2.DefaultData( names = ["a","b"], tensor = prediction_pb2.Tensor( shape = [1,784], values = data ) ) request = prediction_pb2.SeldonMessage(data = datadef) channel = grpc.insecure_channel(AMBASSADOR_API_IP) stub = prediction_pb2_grpc.SeldonStub(channel) metadata = [('seldon',deploymentName)] response = stub.Predict(request=request,metadata=metadata) return response def send_feedback_rest(deploymentName,request,response,reward): feedback = { "request": request, "response": response, "reward": reward } ret = requests.post( "http://"+AMBASSADOR_API_IP+"/seldon/"+deploymentName+"/api/v0.1/feedback", json=feedback) return ret.text def gen_image(arr): two_d = (np.reshape(arr, (28, 28)) * 255).astype(np.uint8) plt.imshow(two_d,cmap=plt.cm.gray_r, interpolation='nearest') return plt def download_mnist(): return input_data.read_data_sets("MNIST_data/", one_hot = True) def predict_rest_mnist(mnist): batch_xs, batch_ys = mnist.train.next_batch(1) chosen=0 gen_image(batch_xs[chosen]).show() data = batch_xs[chosen].reshape((1,784)) features = ["X"+str(i+1) for i in range (0,784)] request = {"data":{"names":features,"ndarray":data.tolist()}} predictions = rest_request("mnist-classifier",request) print(json.dumps(predictions,indent=2)) #print("Route:"+json.dumps(predictions["meta"]["routing"],indent=2)) fpreds = [ '%.2f' % elem for elem in predictions["data"]["ndarray"][0] ] m = dict(zip(predictions["data"]["names"],fpreds)) print("Returned probabilities") print(json.dumps(m,indent=2)) def predict_grpc_mnist(mnist): batch_xs, batch_ys = mnist.train.next_batch(1) chosen=0 gen_image(batch_xs[chosen]).show() data = batch_xs[chosen].reshape((784)) resp = grpc_request("mnist-classifier",data) predictions = MessageToJson(resp) predictions = json.loads(predictions) print(json.dumps(predictions,indent=2)) fpreds = [ '%.2f' % elem for elem in predictions["data"]["tensor"]["values"] ] m = dict(zip(predictions["data"]["names"],fpreds)) print("Returned probabilities") print(json.dumps(m,indent=2)) def evaluate_abtest(mnist,sz=100): batch_xs, batch_ys = mnist.train.next_batch(sz) routes_history = [] for idx in range(sz): if idx % 10 == 0: print("{}/{}".format(idx,sz)) data = batch_xs[idx].reshape((1,784)) request = {"data":{"ndarray":data.tolist()}} response = rest_request("mnist-classifier",request) route = response.get("meta").get("routing").get("random-ab-test") routes_history.append(route) plt.figure(figsize=(15,6)) ax = plt.scatter(range(len(routes_history)),routes_history) ax.axes.xaxis.set_label_text("Incoming Requests over Time") ax.axes.yaxis.set_label_text("Selected Branch") plt.yticks([0,1,2]) _ = plt.title("Branch Chosen for Incoming Requests") def evaluate_egreedy(mnist,sz=100): score = [0.0,0.0,0.0] sz = 100 batch_xs, batch_ys = mnist.train.next_batch(sz) routes_history = [] for idx in range(sz): if idx % 10 == 0: print("{}/{}".format(idx,sz)) data = batch_xs[idx].reshape((1,784)) request = {"data":{"ndarray":data.tolist()}} response = rest_request("mnist-classifier",request) route = response.get("meta").get("routing").get("eg-router") proba = response["data"]["ndarray"][0] predicted = proba.index(max(proba)) correct = np.argmax(batch_ys[idx]) if predicted == correct: score[route] = score[route] + 1 send_feedback_rest("mnist-classifier",request,response,reward=1) else: send_feedback_rest("mnist-classifier",request,response,reward=0) routes_history.append(route) plt.figure(figsize=(15,6)) ax = plt.scatter(range(len(routes_history)),routes_history) ax.axes.xaxis.set_label_text("Incoming Requests over Time") ax.axes.yaxis.set_label_text("Selected Branch") plt.yticks([0,1,2]) _ = plt.title("Branch Chosen for Incoming Requests") print(score) ================================================ FILE: notebooks/visualizer.py ================================================ import graphviz import json def _populate_graph(dot, root, suffix=''): name = root.get("name") id = name+suffix if root.get("implementation"): dot.node(id, label=name, shape="box", style="filled", color="lightgrey") else: dot.node(id, label=name, shape="box") endpoint_type = root.get("endpoint",{}).get("type") if endpoint_type is not None: dot.node(id+'endpoint', label=endpoint_type) dot.edge(id,id+'endpoint') for child in root.get("children",[]): child_id = _populate_graph(dot,child) dot.edge(id, child_id) return id def get_graph(filename,predictor=0): deployment = json.load(open(filename,'r')) predictors = deployment.get("spec").get("predictors") dot = graphviz.Digraph() with dot.subgraph(name="cluster_0") as pdot: graph = predictors[0].get("graph") _populate_graph(pdot, graph, suffix='0') pdot.attr(label="predictor") if len(predictors)>1: with dot.subgraph(name="cluster_1") as cdot: graph = predictors[1].get("graph") _populate_graph(cdot, graph, suffix='1') cdot.attr(label="canary") return dot ================================================ FILE: scripts/README.md ================================================ # Create MNIST Demo 1. You will need all prerequisites (gcloud, kubectl, ks) in your path. 1. Copy `env-example.sh` to `env.sh` and edit with your own settings 1. run `create_demo.sh` # Delete Demo 1. run `delete-demo.sh` - this will delete the GCP resources except the Filestore disk. You will need to delete this manually at present. ================================================ FILE: scripts/create_demo.sh ================================================ #!/usr/bin/env bash set -o nounset set -o errexit set -o pipefail create_src() { mkdir -p ${KUBEFLOW_SRC} cd ${KUBEFLOW_SRC} curl https://raw.githubusercontent.com/kubeflow/kubeflow/${KUBEFLOW_TAG}/scripts/download.sh | bash } launch_kubeflow() { KUBEFLOW_REPO=${KUBEFLOW_SRC} ${KUBEFLOW_SRC}/scripts/kfctl.sh init ${KFAPP} --platform gcp --project ${PROJECT} cd ${KFAPP} ${KUBEFLOW_SRC}/scripts/kfctl.sh generate platform ${KUBEFLOW_SRC}/scripts/kfctl.sh apply platform ${KUBEFLOW_SRC}/scripts/kfctl.sh generate k8s ${KUBEFLOW_SRC}/scripts/kfctl.sh apply k8s } launch_seldon() { cd ${KUBEFLOW_SRC}/${KFAPP}/ks_app ks pkg install kubeflow/seldon ks generate seldon seldon ks apply default -c seldon } add_helm() { kubectl -n kube-system create sa tiller kubectl create clusterrolebinding tiller --clusterrole cluster-admin --serviceaccount=kube-system:tiller helm init --service-account tiller kubectl rollout status deploy/tiller-deploy -n kube-system } add_nfs_disk() { set +e FSADDR=$(gcloud beta filestore instances describe ${FS} --project=${PROJECT} --location=${ZONE} --format="value(networks.ipAddresses[0])") if [ -z "$FSADDR" ]; then echo "Creating filestore NFS volume" gcloud beta filestore instances create ${FS} --project=${PROJECT} --location=${ZONE} --tier=STANDARD --file-share=name="volumes",capacity=1TB --network=name="default",reserved-ip-range="10.0.0.0/29" fi set -e FSADDR=$(gcloud beta filestore instances describe ${FS} --project=${PROJECT} --location=${ZONE} --format="value(networks.ipAddresses[0])") helm install stable/nfs-client-provisioner --name nfs-cp --set nfs.server=${FSADDR} --set nfs.path=/volumes kubectl rollout status deploy/nfs-cp-nfs-client-provisioner -n kubeflow kubectl apply -f ${STARTUP_DIR}/nfs-pvc.yaml -n kubeflow } add_argo_clusterrole() { kubectl create clusterrolebinding my-cluster-admin-binding --clusterrole=cluster-admin --user=$(gcloud info --format="value(config.account)") kubectl create clusterrolebinding default-admin2 --clusterrole=cluster-admin --serviceaccount=kubeflow:default } add_seldon_analytics() { helm install seldon-core-analytics --name seldon-core-analytics --set grafana_prom_admin_password=password --set persistence.enabled=false --repo https://storage.googleapis.com/seldon-charts --namespace kubeflow } if [ ! -f env.sh ]; then echo "Create env.sh by copying env-example.sh" fi source env.sh create_src launch_kubeflow launch_seldon add_helm add_nfs_disk add_argo_clusterrole add_seldon_analytics ================================================ FILE: scripts/delete-demo.sh ================================================ #!/usr/bin/env bash set -o nounset set -o errexit set -o pipefail if [ ! -f env.sh ]; then echo "Create env.sh by copying env-example.sh" fi source env.sh cd ${KUBEFLOW_SRC}/${KFAPP} ${KUBEFLOW_SRC}/scripts/kfctl.sh delete all ================================================ FILE: scripts/env-example.sh ================================================ STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" KFAPP=my-kubeflow PROJECT=seldon-demos KUBEFLOW_SRC=${STARTUP_DIR}/kubeflow_src FS=mnist-data ZONE=europe-west1-b # Next two lines are set from values created as discussed in https://www.kubeflow.org/docs/started/getting-started-gke/ export CLIENT_ID= export CLIENT_SECRET= export KUBEFLOW_TAG=v0.5.1 ================================================ FILE: scripts/nfs-pvc.yaml ================================================ apiVersion: v1 kind: PersistentVolumeClaim metadata: name: nfs-1 spec: accessModes: - ReadWriteMany storageClassName: nfs-client resources: requests: storage: 30Gi ================================================ FILE: scripts/port-forwards.sh ================================================ #Argo kubectl port-forward $(kubectl get pods -n kubeflow -l app=argo-ui -o jsonpath='{.items[0].metadata.name}') -n kubeflow 8001:8001 & #Seldon Grafana kubectl port-forward $(kubectl get pods -n kubeflow -l app=grafana-prom-server -o jsonpath='{.items[0].metadata.name}') -n kubeflow 3000:3000 & #Ambassador reverse proxy kubectl port-forward $(kubectl get pods -n kubeflow -l service=ambassador -o jsonpath='{.items[0].metadata.name}') -n kubeflow 8002:80 & #Ambassador admin kubectl port-forward $(kubectl get pods -n kubeflow -l service=ambassador -o jsonpath='{.items[0].metadata.name}') -n kubeflow 8877:8877 & ================================================ FILE: scripts/watch-mnist.sh ================================================ watch kubectl get pods -l seldon-app=mnist-classifier ================================================ FILE: workflows/serving-r-mnist-workflow.yaml ================================================ # This example demonstrates the use of a git repo as a hard-wired # input artifact. The argo repo is cloned to its target destination # at '/src' for the main container to consume. apiVersion: argoproj.io/v1alpha1 kind: Workflow metadata: generateName: seldon-r-deploy- spec: entrypoint: workflow arguments: parameters: - name: version value: 0.1 - name: github-user value: kubeflow - name: github-revision value: master - name: docker-org value: index.docker.io/seldonio - name: build-push-image value: false - name: deploy-model value: false volumes: - name: docker-config secret: secretName: docker-config # name of an existing k8s secret volumeClaimTemplates: - metadata: name: workspace spec: accessModes: [ "ReadWriteOnce" ] resources: requests: storage: 0.5Gi templates: - name: workflow steps: - - name: get-source template: get-source-code - - name: build-push template: build-and-push when: "{{workflow.parameters.build-push-image}} == true" - - name: serve template: seldon when: "{{workflow.parameters.deploy-model}} == true" - name: get-source-code inputs: artifacts: - name: argo-source path: /src/example-seldon git: repo: https://github.com/{{workflow.parameters.github-user}}/example-seldon.git revision: "{{workflow.parameters.github-revision}}" container: image: alpine:latest command: [sh, -c] args: ["cp /src/example-seldon/models/r_mnist/runtime/* /workspace/; ls /workspace/"] volumeMounts: - name: workspace mountPath: /workspace - name: build-and-push container: image: gcr.io/kaniko-project/executor:latest args: ["--dockerfile","Dockerfile","--destination","{{workflow.parameters.docker-org}}/rmnistclassifier_runtime:{{workflow.parameters.version}}"] workingDir: /src/example-seldon/models/r_mnist/runtime/ volumeMounts: - name: docker-config mountPath: "/root/.docker/" - name: workspace mountPath: /workspace - name: seldon resource: #indicates that this is a resource template action: apply #can be any kubectl action (e.g. create, delete, apply, patch) #successCondition: ? manifest: | #put your kubernetes spec here apiVersion: "machinelearning.seldon.io/v1alpha2" kind: "SeldonDeployment" metadata: labels: app: "seldon" name: "mnist-classifier" spec: annotations: deployment_version: "v1" project_name: "MNIST Example" name: "mnist-classifier" predictors: - annotations: predictor_version: "v1" componentSpecs: - spec: containers: - image: "{{workflow.parameters.docker-org}}/rmnistclassifier_runtime:{{workflow.parameters.version}}" imagePullPolicy: "Always" name: "mnist-classifier" volumeMounts: - mountPath: "/data" name: "persistent-storage" terminationGracePeriodSeconds: 1 volumes: - name: "persistent-storage" volumeSource: persistentVolumeClaim: claimName: "nfs-1" graph: children: [] endpoint: type: "REST" name: "mnist-classifier" type: "MODEL" name: "mnist-classifier" replicas: 1 ================================================ FILE: workflows/serving-sk-mnist-workflow.yaml ================================================ # This example demonstrates the use of a git repo as a hard-wired # input artifact. The argo repo is cloned to its target destination # at '/src' for the main container to consume. apiVersion: argoproj.io/v1alpha1 kind: Workflow metadata: generateName: seldon-sk-deploy- spec: entrypoint: workflow arguments: parameters: - name: version value: 0.1 - name: github-user value: kubeflow - name: github-revision value: master - name: docker-org value: index.docker.io/seldonio - name: build-push-image value: false - name: deploy-model value: false volumes: - name: docker-config secret: secretName: docker-config # name of an existing k8s secret volumeClaimTemplates: - metadata: name: workspace spec: accessModes: [ "ReadWriteOnce" ] resources: requests: storage: 0.5Gi templates: - name: workflow steps: - - name: get-source template: get-source-code - - name: build-push template: build-and-push when: "{{workflow.parameters.build-push-image}} == true" - - name: serve template: seldon when: "{{workflow.parameters.deploy-model}} == true" - name: get-source-code inputs: artifacts: - name: argo-source path: /src/example-seldon git: repo: https://github.com/{{workflow.parameters.github-user}}/example-seldon.git revision: "{{workflow.parameters.github-revision}}" container: image: alpine:latest command: [sh, -c] args: ["cp /src/example-seldon/models/sk_mnist/runtime/* /workspace/; ls /workspace/"] volumeMounts: - name: workspace mountPath: /workspace - name: build-and-push container: image: gcr.io/kaniko-project/executor:latest args: ["--dockerfile","Dockerfile","--destination","{{workflow.parameters.docker-org}}/skmnistclassifier_runtime:{{workflow.parameters.version}}"] workingDir: /src/example-seldon/models/sk_mnist/runtime/ volumeMounts: - name: docker-config mountPath: "/root/.docker/" - name: workspace mountPath: /workspace - name: seldon resource: #indicates that this is a resource template action: apply #can be any kubectl action (e.g. create, delete, apply, patch) #successCondition: ? manifest: | #put your kubernetes spec here apiVersion: "machinelearning.seldon.io/v1alpha2" kind: "SeldonDeployment" metadata: labels: app: "seldon" name: "mnist-classifier" spec: annotations: deployment_version: "v1" project_name: "MNIST Example" name: "mnist-classifier" predictors: - annotations: predictor_version: "v1" componentSpecs: - spec: containers: - image: "{{workflow.parameters.docker-org}}/skmnistclassifier_runtime:{{workflow.parameters.version}}" imagePullPolicy: "Always" name: "mnist-classifier" volumeMounts: - mountPath: "/data" name: "persistent-storage" terminationGracePeriodSeconds: 1 volumes: - name: "persistent-storage" volumeSource: persistentVolumeClaim: claimName: "nfs-1" graph: children: [] endpoint: type: "REST" name: "mnist-classifier" type: "MODEL" name: "mnist-classifier" replicas: 1 ================================================ FILE: workflows/serving-tf-mnist-workflow.md ================================================ # Example Argo Workflow to dockerize runtime model and deploy it for serving Comments on the [serving-tf-mnist-workflow.yaml](serving-tf-mnist-workflow.yaml) ## Workflow Summary To serve our runtime model we create: * [```models/tf_mnist/runtime/Dockerfile```](../models/tf_mnist/runtime/Dockerfile) to wrap model using the seldon-core python wrapper. * An Argo workflow to: * Wrap the runtime model, builds a docker container for it and optionally push it to your repo * Optionally starts a seldon deployment that will run and expose your model ## Workflow parameters * version * The version tag for the Docker image * github-user * The github user to use to clone this repo/fork * github-revision * The github revision to use for cloning the repo (can be a branch name) * docker-org * The Docker host and org/user/project to use when pushing an image to the registry * build-push-image * Whether to build and push the image to docker registry (true/false) * deploy-model * Whether to start a seldon deployment to run and expose your model (true/false) ================================================ FILE: workflows/serving-tf-mnist-workflow.yaml ================================================ apiVersion: argoproj.io/v1alpha1 kind: Workflow metadata: generateName: seldon-tf-deploy- spec: entrypoint: workflow arguments: parameters: - name: version value: 0.1 - name: github-user value: kubeflow - name: github-revision value: master - name: docker-org value: index.docker.io/seldonio - name: build-push-image value: false - name: deploy-model value: false volumes: - name: docker-config secret: secretName: docker-config # name of an existing k8s secret volumeClaimTemplates: - metadata: name: workspace spec: accessModes: [ "ReadWriteOnce" ] resources: requests: storage: 0.5Gi templates: - name: workflow steps: - - name: get-source template: get-source-code - - name: build-push template: build-and-push when: "{{workflow.parameters.build-push-image}} == true" - - name: serve template: seldon when: "{{workflow.parameters.deploy-model}} == true" - name: get-source-code inputs: artifacts: - name: argo-source path: /src/example-seldon git: repo: https://github.com/{{workflow.parameters.github-user}}/example-seldon.git revision: "{{workflow.parameters.github-revision}}" container: image: alpine:latest command: [sh, -c] args: ["cp /src/example-seldon/models/tf_mnist/runtime/* /workspace/; ls /workspace/"] volumeMounts: - name: workspace mountPath: /workspace - name: build-and-push container: image: gcr.io/kaniko-project/executor:latest args: ["--dockerfile","Dockerfile","--destination","{{workflow.parameters.docker-org}}/deepmnistclassifier_runtime:{{workflow.parameters.version}}"] workingDir: /src/example-seldon/models/tf_mnist/runtime/ volumeMounts: - name: docker-config mountPath: "/root/.docker/" - name: workspace mountPath: /workspace - name: seldon resource: #indicates that this is a resource template action: apply #can be any kubectl action (e.g. create, delete, apply, patch) #successCondition: ? manifest: | #put your kubernetes spec here apiVersion: "machinelearning.seldon.io/v1alpha2" kind: "SeldonDeployment" metadata: labels: app: "seldon" name: "mnist-classifier" spec: annotations: deployment_version: "v1" project_name: "MNIST Example" name: "mnist-classifier" predictors: - annotations: predictor_version: "v1" componentSpecs: - spec: containers: - image: "{{workflow.parameters.docker-org}}/deepmnistclassifier_runtime:{{workflow.parameters.version}}" imagePullPolicy: "Always" name: "mnist-classifier" volumeMounts: - mountPath: "/data" name: "persistent-storage" terminationGracePeriodSeconds: 1 volumes: - name: "persistent-storage" volumeSource: persistentVolumeClaim: claimName: "nfs-1" graph: children: [] endpoint: type: "REST" name: "mnist-classifier" type: "MODEL" name: "mnist-classifier" replicas: 1 ================================================ FILE: workflows/training-r-mnist-workflow.yaml ================================================ apiVersion: argoproj.io/v1alpha1 kind: Workflow metadata: generateName: kubeflow-r-train- spec: entrypoint: workflow arguments: parameters: - name: version value: 0.1 - name: github-user value: kubeflow - name: github-revision value: master - name: docker-org value: seldonio - name: build-push-image value: false volumes: - name: docker-config secret: secretName: docker-config # name of an existing k8s secret volumeClaimTemplates: - metadata: name: workspace spec: accessModes: [ "ReadWriteOnce" ] resources: requests: storage: 0.5Gi templates: - name: workflow steps: - - name: get-source template: get-source-code - - name: build-push template: build-and-push when: "{{workflow.parameters.build-push-image}} == true" - - name: train template: tfjob - name: get-source-code inputs: artifacts: - name: argo-source path: /src/example-seldon git: repo: https://github.com/{{workflow.parameters.github-user}}/example-seldon.git revision: "{{workflow.parameters.github-revision}}" container: image: alpine:latest command: [sh, -c] args: ["cp /src/example-seldon/models/r_mnist/train/* /workspace/; ls /workspace/"] volumeMounts: - name: workspace mountPath: /workspace - name: build-and-push container: image: gcr.io/kaniko-project/executor:latest args: ["--dockerfile","Dockerfile","--destination","{{workflow.parameters.docker-org}}/rmnistclassifier_trainer:{{workflow.parameters.version}}"] workingDir: /src/example-seldon/models/r_mnist/train/ volumeMounts: - name: docker-config mountPath: "/root/.docker/" - name: workspace mountPath: /workspace - name: tfjob resource: #indicates that this is a resource template action: create #can be any kubectl action (e.g. create, delete, apply, patch) successCondition: status.succeeded == 1 manifest: | #put your kubernetes spec here apiVersion: "batch/v1" kind: "Job" metadata: name: "r-train" ownerReferences: - apiVersion: argoproj.io/v1alpha1 kind: Workflow controller: true name: {{workflow.name}} uid: {{workflow.uid}} spec: template: metadata: name: "r-train" spec: containers: - image: "{{workflow.parameters.docker-org}}/rmnistclassifier_trainer:{{workflow.parameters.version}}" name: "r-train" volumeMounts: - mountPath: "/data" name: "persistent-storage" restartPolicy: "Never" volumes: - name: "persistent-storage" persistentVolumeClaim: claimName: "nfs-1" ================================================ FILE: workflows/training-sk-mnist-workflow.yaml ================================================ apiVersion: argoproj.io/v1alpha1 kind: Workflow metadata: generateName: kubeflow-sk-train- spec: entrypoint: workflow arguments: parameters: - name: version value: 0.2 - name: github-user value: kubeflow - name: github-revision value: master - name: docker-org value: index.docker.io/seldonio - name: build-push-image value: false volumes: - name: docker-config secret: secretName: docker-config # name of an existing k8s secret volumeClaimTemplates: - metadata: name: workspace spec: accessModes: [ "ReadWriteOnce" ] resources: requests: storage: 0.5Gi templates: - name: workflow steps: - - name: get-source template: get-source-code - - name: build-push template: build-and-push when: "{{workflow.parameters.build-push-image}} == true" - - name: train template: tfjob - name: get-source-code inputs: artifacts: - name: argo-source path: /src/example-seldon git: repo: https://github.com/{{workflow.parameters.github-user}}/example-seldon.git revision: "{{workflow.parameters.github-revision}}" container: image: alpine:latest command: [sh, -c] args: ["cp /src/example-seldon/models/sk_mnist/train/* /workspace/; ls /workspace/"] volumeMounts: - name: workspace mountPath: /workspace - name: build-and-push container: image: gcr.io/kaniko-project/executor:latest args: ["--dockerfile","Dockerfile","--destination","{{workflow.parameters.docker-org}}/skmnistclassifier_trainer:{{workflow.parameters.version}}"] workingDir: /src/example-seldon/models/sk_mnist/train/ volumeMounts: - name: docker-config mountPath: "/root/.docker/" - name: workspace mountPath: /workspace - name: tfjob resource: #indicates that this is a resource template action: create #can be any kubectl action (e.g. create, delete, apply, patch) successCondition: status.succeeded == 1 manifest: | #put your kubernetes spec here apiVersion: "batch/v1" kind: "Job" metadata: name: "sk-train" ownerReferences: - apiVersion: argoproj.io/v1alpha1 kind: Workflow controller: true name: {{workflow.name}} uid: {{workflow.uid}} spec: template: metadata: name: "sk-train" spec: containers: - image: "{{workflow.parameters.docker-org}}/skmnistclassifier_trainer:{{workflow.parameters.version}}" name: "sk-train" imagePullPolicy: Always volumeMounts: - mountPath: "/data" name: "persistent-storage" restartPolicy: "Never" volumes: - name: "persistent-storage" persistentVolumeClaim: claimName: "nfs-1" ================================================ FILE: workflows/training-tf-mnist-workflow.md ================================================ # Example Argo Workflow to dockerize and Train Model Comments on the [training-tf-mnist-workflow.yaml](training-tf-mnist-workflow.yaml) ## Workflow summary To dockerize our model training and run it we create: * [```models/tf_mnist/train/build_and_push.sh```](../models/tf_mnist/train/build_and_push.sh) that will build an image for our Tensorflow training and push to our repo. * An Argo workflow [```workflows/training-tf-mnist-workflow.yaml```](training-tf-mnist-workflow.yaml) is created which: * Clones the project from github * Runs the build and push script (using DockerInDocker) * Starts a kubeflow TfJob to train the model and save the results to the persistent volume ## Workflow parameters * version * The version tag for the Docker image * github-user * The github user/org for which to clone this repo/fork * github-revision * The github revision to use for cloning the repo (can be a branch name) * docker-org * The Docker host and org/user/project to use when pushing an image to the registry * tfjob-version-hack * A temporary random integer for the tfjob ID * build-push-image * Whether to build and push the image to docker registry (true/false) ## Setup For Pushing Images **To push to your own repo the Docker images you will need to setup your docker credentials as a Kubernetes secret containing a [config.json](https://www.projectatomic.io/blog/2016/03/docker-credentials-store/). To do this you can find your docker home (typically ~/.docker) and run `kubectl create secret generic docker-config --from-file=config.json=${DOCKERHOME}/config.json --type=kubernetes.io/config` to [create a secret](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/#registry-secret-existing-credentials).** ================================================ FILE: workflows/training-tf-mnist-workflow.yaml ================================================ apiVersion: argoproj.io/v1alpha1 kind: Workflow metadata: generateName: kubeflow-tf-train- spec: entrypoint: workflow arguments: parameters: - name: version value: 0.1 - name: github-user value: kubeflow - name: github-revision value: master - name: docker-org value: index.docker.io/seldonio - name: tfjob-version-hack value: 1 - name: build-push-image value: false volumes: - name: docker-config secret: secretName: docker-config # name of an existing k8s secret volumeClaimTemplates: - metadata: name: workspace spec: accessModes: [ "ReadWriteOnce" ] resources: requests: storage: 0.5Gi templates: - name: workflow steps: - - name: get-source template: get-source-code - - name: build-push template: build-and-push when: "{{workflow.parameters.build-push-image}} == true" - - name: train template: tfjob - name: get-source-code inputs: artifacts: - name: argo-source path: /src/example-seldon git: repo: https://github.com/{{workflow.parameters.github-user}}/example-seldon.git revision: "{{workflow.parameters.github-revision}}" container: image: alpine:latest command: [sh, -c] args: ["cp /src/example-seldon/models/tf_mnist/train/* /workspace/; ls /workspace/"] volumeMounts: - name: workspace mountPath: /workspace - name: build-and-push container: image: gcr.io/kaniko-project/executor:latest args: ["--dockerfile","Dockerfile","--destination","{{workflow.parameters.docker-org}}/deepmnistclassifier_trainer:{{workflow.parameters.version}}"] workingDir: /src/example-seldon/models/tf_mnist/train/ volumeMounts: - name: docker-config mountPath: "/root/.docker/" - name: workspace mountPath: /workspace - name: tfjob resource: #indicates that this is a resource template action: create #can be any kubectl action (e.g. create, delete, apply, patch) #successCondition: status.tfReplicaStatuses.Worker.succeeded == 1 #successCondition: status.conditions.type == Succeeded successCondition: status.replicaStatuses.Worker.succeeded == 1 manifest: | #put your kubernetes spec here apiVersion: "kubeflow.org/v1beta1" kind: "TFJob" metadata: name: mnist-train-{{workflow.parameters.tfjob-version-hack}} ownerReferences: - apiVersion: argoproj.io/v1alpha1 kind: Workflow controller: true name: {{workflow.name}} uid: {{workflow.uid}} spec: tfReplicaSpecs: Worker: replicas: 1 template: spec: containers: - image: "{{workflow.parameters.docker-org}}/deepmnistclassifier_trainer:{{workflow.parameters.version}}" name: "tensorflow" volumeMounts: - mountPath: "/data" name: "persistent-storage" restartPolicy: "OnFailure" volumes: - name: "persistent-storage" persistentVolumeClaim: claimName: "nfs-1" tfReplicaType: "MASTER"