Showing preview only (544K chars total). Download the full file or copy to clipboard to get everything.
Repository: intro-to-ml-with-kubeflow/intro-to-ml-with-kubeflow-examples
Branch: master
Commit: b00b44a88011
Files: 147
Total size: 501.5 KB
Directory structure:
gitextract_v2ir0_h2/
├── .circleci/
│ └── config.yml
├── .gitignore
├── .travis.yaml
├── LICENSE
├── README.md
├── autopep_stuff.sh
├── ch03/
│ ├── example_secret.yaml
│ ├── linux_install.sh
│ ├── mac_install.sh
│ └── minio.sh
├── ch04/
│ ├── code/
│ │ ├── ControlStructures.ipynb
│ │ ├── ControlStructures.py
│ │ ├── Lightweight Pipeline.ipynb
│ │ ├── Lightweight Pipeline.py
│ │ ├── RecommenderPipeline.ipynb
│ │ ├── RecommenderPipeline.py
│ │ └── download_components.sh
│ └── install/
│ ├── deployment.yaml
│ └── virtualservice.yaml
├── ch06/
│ ├── MLflow.ipynb
│ ├── MLflow.py
│ ├── Metadata.ipynb
│ ├── Metadata.py
│ ├── docker/
│ │ ├── Dockerfile
│ │ ├── build.sh
│ │ └── run.sh
│ └── install/
│ └── mlflowchart/
│ ├── .helmignore
│ ├── Chart.yaml
│ ├── templates/
│ │ ├── NOTES.txt
│ │ ├── _helpers.tpl
│ │ └── mlflow.yaml
│ └── values.yaml
├── ch10/
│ ├── experiment.yaml
│ ├── hptuning.py
│ └── random.yaml
├── ch2/
│ ├── Dockerfile
│ ├── build-and-push.sh
│ └── query-endpoint.py
├── ch2_seldon_examples/
│ ├── pipeline_role.yaml
│ ├── pipeline_rolebinding.yaml
│ ├── pv-claim.yaml
│ ├── pv-volume.yaml
│ ├── request_example.ipynb
│ ├── run_example.sh
│ ├── setup_example.sh
│ ├── tf_mnist_no_seldon_pipeline.py
│ ├── tiller_rbac.yaml
│ └── train_pipeline.py
├── ch9/
│ └── ctscans/
│ ├── DICOM Denoising Pipeline.ipynb
│ ├── calculate-basis-vectors/
│ │ ├── Dockerfile
│ │ ├── build-component.sh
│ │ ├── pom.xml
│ │ └── src/
│ │ └── main/
│ │ └── scala/
│ │ └── org/
│ │ └── rawkintrevo/
│ │ └── covid/
│ │ └── App.scala
│ ├── download-dicom/
│ │ ├── Dockerfile
│ │ ├── build-component.sh
│ │ └── run.sh
│ ├── process-dicoms-into-vectors/
│ │ ├── Dockerfile
│ │ ├── build-component.sh
│ │ ├── data/
│ │ │ └── s.150.csv
│ │ ├── process-dicoms-into-vectors.yaml
│ │ └── src/
│ │ └── program.py
│ └── visualize-basis-vectors/
│ ├── Dockerfile
│ ├── build-component.sh
│ └── src/
│ └── program.py
├── ci.sh
├── convert_notebooks.sh
├── data-extraction/
│ ├── README.md
│ ├── github_comments_query.bsql
│ ├── github_issues_query.bsql
│ ├── iot/
│ │ ├── basic.yaml
│ │ └── build.sh
│ ├── python-notebook/
│ │ ├── AddSpamassassinDockerfile
│ │ ├── MailingListDataPrep.ipynb
│ │ ├── MailingListDataPrep.py
│ │ └── RunNBDockerfile
│ ├── python-spark/
│ │ ├── Dockerfile
│ │ ├── LaunchSparkJobs.ipynb
│ │ ├── LaunchSparkJobs.py
│ │ ├── fake_job.py
│ │ └── requirements.txt
│ ├── python-spark-notebook/
│ │ ├── AddGCSDockerfile
│ │ ├── AddPython3.6Dockerfile
│ │ ├── Dockerfile
│ │ ├── SparkMailingListForKF.ipynb
│ │ ├── SparkMailingListForKF.py
│ │ ├── build.sh
│ │ ├── dr.yaml
│ │ ├── no-saprk-tls.yaml
│ │ ├── spark-driver-service.yaml
│ │ └── virt_service.yaml
│ ├── spark-hello-world/
│ │ ├── Dockerfile
│ │ ├── README.md
│ │ ├── hello_world_pipeline.py
│ │ ├── lr_demo/
│ │ │ ├── .gitignore
│ │ │ ├── .travis.yml
│ │ │ ├── README.md
│ │ │ ├── build.sbt
│ │ │ ├── project/
│ │ │ │ ├── build.properties
│ │ │ │ └── plugins.sbt
│ │ │ ├── sample.csv
│ │ │ ├── sbt/
│ │ │ │ └── sbt
│ │ │ └── src/
│ │ │ ├── main/
│ │ │ │ └── scala/
│ │ │ │ └── com/
│ │ │ │ └── introtomlwithkubeflow/
│ │ │ │ └── spark/
│ │ │ │ └── demo/
│ │ │ │ └── lr/
│ │ │ │ ├── TrainingApp.scala
│ │ │ │ └── TrainingPipeline.scala
│ │ │ └── test/
│ │ │ └── scala/
│ │ │ └── com/
│ │ │ └── introtomlwithkubeflow/
│ │ │ └── spark/
│ │ │ └── demo/
│ │ │ └── lr/
│ │ │ └── TrainingPipelineTest.scala
│ │ ├── setup.sh
│ │ ├── spark-pi-min.yaml
│ │ └── spark-pi.yaml
│ ├── stack_overflow_questions.bsql
│ └── tfx/
│ ├── TFDV.ipynb
│ ├── TFDV.py
│ ├── install_tfx.sh
│ ├── requirements.txt
│ └── run_on_dataflow_ex.py
├── dev-setup/
│ ├── install-argo.sh
│ ├── install-kf-pipeline-sdk.sh
│ ├── install-kf.sh
│ ├── install-kubectl.sh
│ ├── install-kustomize.sh
│ ├── install-microk8s.sh
│ └── jsonnet.sh
├── feature-prep/
│ ├── README.md
│ ├── spark/
│ │ ├── SparkMailingListFeaturePrep.ipynb
│ │ └── SparkMailingListFeaturePrep.py
│ └── tft/
│ ├── requirements.txt
│ └── transform.py
├── gcp-setup/
│ ├── cloudshell_scrip.sh
│ └── setup-gcp.sh
├── kfctl_gcp_iap.v1.0.1.yaml
├── pipelines/
│ ├── ControlStructures.ipynb
│ ├── Lightweight Pipeline.ipynb
│ ├── RecommenderPipeline.ipynb
│ └── download_components.sh
├── recommender/
│ ├── Dockerfile
│ ├── Recommender_Kubeflow.ipynb
│ ├── Recommender_Kubeflow.py
│ ├── docker/
│ │ ├── Dockerfile
│ │ └── build.sh
│ └── tfservingchart/
│ ├── .helmignore
│ ├── Chart.yaml
│ ├── templates/
│ │ ├── NOTES.txt
│ │ ├── _helpers.tpl
│ │ ├── minioaccess.yaml
│ │ ├── tfserving.yaml
│ │ └── tfserving1.yaml
│ └── values.yaml
├── runthrough.sh
└── scikitLearn/
└── python/
└── IncomePrediction.ipynb
================================================
FILE CONTENTS
================================================
================================================
FILE: .circleci/config.yml
================================================
version: 2
apt-run: &apt-install
name: Install apt packages
command: |
sudo apt-get -qq update
sudo apt-get install -y \
shellcheck
jobs:
build:
working_directory: ~/mermaid-starter
docker:
- image: circleci/python:3.6-jessie-node-browsers-legacy
steps:
- checkout
- run: *apt-install
- run:
name: Run our basic shell CI
command: ./ci.sh
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.idea
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
# Emacs
*~
# Ignore kfctl's downloaded
kfctl*.t*z
================================================
FILE: .travis.yaml
================================================
language: generic
sudo: true
addons:
apt:
packages:
- shellcheck
script:
- ./ci.sh
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: README.md
================================================
# intro-to-ml-with-kubeflow-examples
Examples for the Intro to ML with Kubeflow book
================================================
FILE: autopep_stuff.sh
================================================
#!/bin/bash
# autopep8 a bunch of things that we can
autopep8 -i -r ./ \
--select E101,E202,E201,E203,E211,E221,E222,E223,E224,E225,E226,E227,\
E228,E231,E241,E242,E251,E252,E262,E271,E272,E273,E274,E301,E302,E303,\
E304,E305,E306,E501,E502,E711,E712,E713,E714,E721,E722,E731,W291,W293,\
W391,W601,W602,W603,W604,W690\
-j 0 --exclude "*venv*"
# Then we use YAPF because it does a better job on long-lines
yapf -i -r ./ --exclude "*venv*"
================================================
FILE: ch03/example_secret.yaml
================================================
apiVersion: v1
kind: Secret
metadata:
name: minioaccess
namespace: mynamespace
data:
AWS_ACCESS_KEY_ID: xxxxxxxxxx
AWS_SECRET_ACCESS_KEY: xxxxxxxxxxxxxxxxxxxxx
================================================
FILE: ch03/linux_install.sh
================================================
#!/bin/bash
#tag::installMCLinux[]
pushd ~/bin
wget https://dl.min.io/client/mc/release/linux-amd64/mc
chmod a+x mc
#end::installMCLinux[]
================================================
FILE: ch03/mac_install.sh
================================================
#!/bin/bash
#tag::installMCMac[]
brew install minio/stable/minio
#end::installMCMac[]
================================================
FILE: ch03/minio.sh
================================================
#!/bin/bash
set -ex
# Minio runs on port 9000 (both UI and service) so expose locally to use cli or UI
#tag::fwdMinio[]
kubectl port-forward -n kubeflow svc/minio-service 9000:9000 &
#end::fwdMinio[]
# Give it a spell to settle
sleep 10
# Kubeflow creates a minio user with password minio123 at install
#tag::configMC[]
mc config host add minio http://localhost:9000 minio minio123
#end::configMC[]
#tag::listMC[]
mc ls minio
#end::listMC[]
# Output [2018-12-13 18:23:41 CST] 0B mlpipeline/
# Make a new bucket for our work
#tag::makeBucket[]
mc mb minio/kf-book-examples
#end::makeBucket[]
================================================
FILE: ch04/code/ControlStructures.ipynb
================================================
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Simple Control structure\n",
"\n",
"Shows how to use conditional execution"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already up-to-date: kfp in ./.local/lib/python3.6/site-packages (0.2.2.1)\n",
"Requirement already satisfied, skipping upgrade: jsonschema>=3.0.1 in /usr/local/lib/python3.6/dist-packages (from kfp) (3.2.0)\n",
"Requirement already satisfied, skipping upgrade: google-cloud-storage>=1.13.0 in /usr/local/lib/python3.6/dist-packages (from kfp) (1.25.0)\n",
"Requirement already satisfied, skipping upgrade: python-dateutil in /usr/local/lib/python3.6/dist-packages (from kfp) (2.8.1)\n",
"Requirement already satisfied, skipping upgrade: google-auth>=1.6.1 in /usr/local/lib/python3.6/dist-packages (from kfp) (1.11.0)\n",
"Requirement already satisfied, skipping upgrade: argo-models==2.2.1a in ./.local/lib/python3.6/site-packages (from kfp) (2.2.1a0)\n",
"Requirement already satisfied, skipping upgrade: urllib3<1.25,>=1.15 in ./.local/lib/python3.6/site-packages (from kfp) (1.24.3)\n",
"Requirement already satisfied, skipping upgrade: cloudpickle==1.1.1 in ./.local/lib/python3.6/site-packages (from kfp) (1.1.1)\n",
"Requirement already satisfied, skipping upgrade: kubernetes<=10.0.0,>=8.0.0 in ./.local/lib/python3.6/site-packages (from kfp) (10.0.0)\n",
"Requirement already satisfied, skipping upgrade: click==7.0 in ./.local/lib/python3.6/site-packages (from kfp) (7.0)\n",
"Requirement already satisfied, skipping upgrade: certifi in /usr/local/lib/python3.6/dist-packages (from kfp) (2019.11.28)\n",
"Requirement already satisfied, skipping upgrade: PyYAML in /usr/local/lib/python3.6/dist-packages (from kfp) (5.3)\n",
"Requirement already satisfied, skipping upgrade: kfp-server-api<=0.1.40,>=0.1.18 in ./.local/lib/python3.6/site-packages (from kfp) (0.1.40)\n",
"Requirement already satisfied, skipping upgrade: six>=1.10 in /usr/lib/python3/dist-packages (from kfp) (1.11.0)\n",
"Requirement already satisfied, skipping upgrade: PyJWT>=1.6.4 in ./.local/lib/python3.6/site-packages (from kfp) (1.7.1)\n",
"Requirement already satisfied, skipping upgrade: Deprecated in ./.local/lib/python3.6/site-packages (from kfp) (1.2.7)\n",
"Requirement already satisfied, skipping upgrade: requests-toolbelt>=0.8.0 in ./.local/lib/python3.6/site-packages (from kfp) (0.9.1)\n",
"Requirement already satisfied, skipping upgrade: cryptography>=2.4.2 in ./.local/lib/python3.6/site-packages (from kfp) (2.8)\n",
"Requirement already satisfied, skipping upgrade: tabulate==0.8.3 in ./.local/lib/python3.6/site-packages (from kfp) (0.8.3)\n",
"Requirement already satisfied, skipping upgrade: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kfp) (1.4.0)\n",
"Requirement already satisfied, skipping upgrade: attrs>=17.4.0 in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kfp) (19.3.0)\n",
"Requirement already satisfied, skipping upgrade: pyrsistent>=0.14.0 in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kfp) (0.15.7)\n",
"Requirement already satisfied, skipping upgrade: setuptools in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kfp) (45.1.0)\n",
"Requirement already satisfied, skipping upgrade: google-resumable-media<0.6dev,>=0.5.0 in /usr/local/lib/python3.6/dist-packages (from google-cloud-storage>=1.13.0->kfp) (0.5.0)\n",
"Requirement already satisfied, skipping upgrade: google-cloud-core<2.0dev,>=1.2.0 in /usr/local/lib/python3.6/dist-packages (from google-cloud-storage>=1.13.0->kfp) (1.3.0)\n",
"Requirement already satisfied, skipping upgrade: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.6.1->kfp) (4.0.0)\n",
"Requirement already satisfied, skipping upgrade: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.6.1->kfp) (0.2.8)\n",
"Requirement already satisfied, skipping upgrade: rsa<4.1,>=3.1.4 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.6.1->kfp) (4.0)\n",
"Requirement already satisfied, skipping upgrade: requests-oauthlib in /usr/local/lib/python3.6/dist-packages (from kubernetes<=10.0.0,>=8.0.0->kfp) (1.3.0)\n",
"Requirement already satisfied, skipping upgrade: requests in /usr/local/lib/python3.6/dist-packages (from kubernetes<=10.0.0,>=8.0.0->kfp) (2.22.0)\n",
"Requirement already satisfied, skipping upgrade: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /usr/local/lib/python3.6/dist-packages (from kubernetes<=10.0.0,>=8.0.0->kfp) (0.57.0)\n",
"Requirement already satisfied, skipping upgrade: wrapt<2,>=1.10 in /usr/local/lib/python3.6/dist-packages (from Deprecated->kfp) (1.11.2)\n",
"Requirement already satisfied, skipping upgrade: cffi!=1.11.3,>=1.8 in ./.local/lib/python3.6/site-packages (from cryptography>=2.4.2->kfp) (1.14.0)\n",
"Requirement already satisfied, skipping upgrade: zipp>=0.5 in /usr/local/lib/python3.6/dist-packages (from importlib-metadata; python_version < \"3.8\"->jsonschema>=3.0.1->kfp) (2.1.0)\n",
"Requirement already satisfied, skipping upgrade: google-api-core<2.0.0dev,>=1.16.0 in /usr/local/lib/python3.6/dist-packages (from google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (1.16.0)\n",
"Requirement already satisfied, skipping upgrade: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.6/dist-packages (from pyasn1-modules>=0.2.1->google-auth>=1.6.1->kfp) (0.4.8)\n",
"Requirement already satisfied, skipping upgrade: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib->kubernetes<=10.0.0,>=8.0.0->kfp) (3.1.0)\n",
"Requirement already satisfied, skipping upgrade: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->kubernetes<=10.0.0,>=8.0.0->kfp) (3.0.4)\n",
"Requirement already satisfied, skipping upgrade: idna<2.9,>=2.5 in /usr/lib/python3/dist-packages (from requests->kubernetes<=10.0.0,>=8.0.0->kfp) (2.6)\n",
"Requirement already satisfied, skipping upgrade: pycparser in ./.local/lib/python3.6/site-packages (from cffi!=1.11.3,>=1.8->cryptography>=2.4.2->kfp) (2.19)\n",
"Requirement already satisfied, skipping upgrade: protobuf>=3.4.0 in /usr/local/lib/python3.6/dist-packages (from google-api-core<2.0.0dev,>=1.16.0->google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (3.11.2)\n",
"Requirement already satisfied, skipping upgrade: pytz in /usr/local/lib/python3.6/dist-packages (from google-api-core<2.0.0dev,>=1.16.0->google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (2019.3)\n",
"Requirement already satisfied, skipping upgrade: googleapis-common-protos<2.0dev,>=1.6.0 in /usr/local/lib/python3.6/dist-packages (from google-api-core<2.0.0dev,>=1.16.0->google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (1.51.0)\n"
]
}
],
"source": [
"!pip install kfp --upgrade --user\n",
"\n",
"import kfp\n",
"from kfp import dsl\n",
"from kfp.components import func_to_container_op, InputPath, OutputPath"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Functions"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"@func_to_container_op\n",
"def get_random_int_op(minimum: int, maximum: int) -> int:\n",
" \"\"\"Generate a random number between minimum and maximum (inclusive).\"\"\"\n",
" import random\n",
" result = random.randint(minimum, maximum)\n",
" print(result)\n",
" return result\n",
"\n",
"@func_to_container_op\n",
"def process_small_op(data: int):\n",
" \"\"\"Process small numbers.\"\"\"\n",
" print(\"Processing small result\", data)\n",
" return\n",
"\n",
"@func_to_container_op\n",
"def process_medium_op(data: int):\n",
" \"\"\"Process medium numbers.\"\"\"\n",
" print(\"Processing medium result\", data)\n",
" return\n",
"\n",
"@func_to_container_op\n",
"def process_large_op(data: int):\n",
" \"\"\"Process large numbers.\"\"\"\n",
" print(\"Processing large result\", data)\n",
" return"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Conditional pipeline"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"@dsl.pipeline(\n",
" name='Conditional execution pipeline',\n",
" description='Shows how to use dsl.Condition().'\n",
")\n",
"def conditional_pipeline():\n",
" number = get_random_int_op(0, 100).output\n",
" with dsl.Condition(number < 10):\n",
" process_small_op(number)\n",
" with dsl.Condition(number > 10 and number < 50):\n",
" process_medium_op(number)\n",
" with dsl.Condition(number > 50):\n",
" process_large_op(number)\n",
" "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Submit the pipeline for execution:"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"Experiment link <a href=\"/pipeline/#/experiments/details/2abe16d1-fa2e-4f49-a3a5-acad8d36790d\" target=\"_blank\" >here</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Run link <a href=\"/pipeline/#/runs/details/293a92c5-50b2-4a96-bbd4-ebc85106f337\" target=\"_blank\" >here</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"RunPipelineResult(run_id=293a92c5-50b2-4a96-bbd4-ebc85106f337)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"kfp.Client().create_run_from_pipeline_func(conditional_pipeline, arguments={})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
================================================
FILE: ch04/code/ControlStructures.py
================================================
#!/usr/bin/env python
# coding: utf-8
# # Simple Control structure
#
# Shows how to use conditional execution
# In[1]:
get_ipython().system('pip install kfp --upgrade --user')
import kfp
from kfp import dsl
from kfp.components import func_to_container_op, InputPath, OutputPath
# # Functions
# In[2]:
@func_to_container_op
def get_random_int_op(minimum: int, maximum: int) -> int:
"""Generate a random number between minimum and maximum (inclusive)."""
import random
result = random.randint(minimum, maximum)
print(result)
return result
@func_to_container_op
def process_small_op(data: int):
"""Process small numbers."""
print("Processing small result", data)
return
@func_to_container_op
def process_medium_op(data: int):
"""Process medium numbers."""
print("Processing medium result", data)
return
@func_to_container_op
def process_large_op(data: int):
"""Process large numbers."""
print("Processing large result", data)
return
# # Conditional pipeline
# In[3]:
@dsl.pipeline(name='Conditional execution pipeline',
description='Shows how to use dsl.Condition().')
def conditional_pipeline():
number = get_random_int_op(0, 100).output
with dsl.Condition(number < 10):
process_small_op(number)
with dsl.Condition(number > 10 and number < 50):
process_medium_op(number)
with dsl.Condition(number > 50):
process_large_op(number)
# # Submit the pipeline for execution:
# In[4]:
kfp.Client().create_run_from_pipeline_func(conditional_pipeline, arguments={})
# In[ ]:
================================================
FILE: ch04/code/Lightweight Pipeline.ipynb
================================================
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Setup"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already up-to-date: kfp in ./.local/lib/python3.6/site-packages (0.2.2.1)\n",
"Requirement already satisfied, skipping upgrade: PyYAML in /usr/local/lib/python3.6/dist-packages (from kfp) (5.3)\n",
"Requirement already satisfied, skipping upgrade: requests-toolbelt>=0.8.0 in ./.local/lib/python3.6/site-packages (from kfp) (0.9.1)\n",
"Requirement already satisfied, skipping upgrade: click==7.0 in ./.local/lib/python3.6/site-packages (from kfp) (7.0)\n",
"Requirement already satisfied, skipping upgrade: Deprecated in ./.local/lib/python3.6/site-packages (from kfp) (1.2.7)\n",
"Requirement already satisfied, skipping upgrade: kubernetes<=10.0.0,>=8.0.0 in ./.local/lib/python3.6/site-packages (from kfp) (10.0.0)\n",
"Requirement already satisfied, skipping upgrade: six>=1.10 in /usr/lib/python3/dist-packages (from kfp) (1.11.0)\n",
"Requirement already satisfied, skipping upgrade: certifi in /usr/local/lib/python3.6/dist-packages (from kfp) (2019.11.28)\n",
"Requirement already satisfied, skipping upgrade: google-cloud-storage>=1.13.0 in /usr/local/lib/python3.6/dist-packages (from kfp) (1.25.0)\n",
"Requirement already satisfied, skipping upgrade: PyJWT>=1.6.4 in ./.local/lib/python3.6/site-packages (from kfp) (1.7.1)\n",
"Requirement already satisfied, skipping upgrade: cryptography>=2.4.2 in ./.local/lib/python3.6/site-packages (from kfp) (2.8)\n",
"Requirement already satisfied, skipping upgrade: google-auth>=1.6.1 in /usr/local/lib/python3.6/dist-packages (from kfp) (1.11.0)\n",
"Requirement already satisfied, skipping upgrade: cloudpickle==1.1.1 in ./.local/lib/python3.6/site-packages (from kfp) (1.1.1)\n",
"Requirement already satisfied, skipping upgrade: kfp-server-api<=0.1.40,>=0.1.18 in ./.local/lib/python3.6/site-packages (from kfp) (0.1.40)\n",
"Requirement already satisfied, skipping upgrade: argo-models==2.2.1a in ./.local/lib/python3.6/site-packages (from kfp) (2.2.1a0)\n",
"Requirement already satisfied, skipping upgrade: tabulate==0.8.3 in ./.local/lib/python3.6/site-packages (from kfp) (0.8.3)\n",
"Requirement already satisfied, skipping upgrade: python-dateutil in /usr/local/lib/python3.6/dist-packages (from kfp) (2.8.1)\n",
"Requirement already satisfied, skipping upgrade: urllib3<1.25,>=1.15 in ./.local/lib/python3.6/site-packages (from kfp) (1.24.3)\n",
"Requirement already satisfied, skipping upgrade: jsonschema>=3.0.1 in /usr/local/lib/python3.6/dist-packages (from kfp) (3.2.0)\n",
"Requirement already satisfied, skipping upgrade: requests<3.0.0,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from requests-toolbelt>=0.8.0->kfp) (2.22.0)\n",
"Requirement already satisfied, skipping upgrade: wrapt<2,>=1.10 in /usr/local/lib/python3.6/dist-packages (from Deprecated->kfp) (1.11.2)\n",
"Requirement already satisfied, skipping upgrade: setuptools>=21.0.0 in /usr/local/lib/python3.6/dist-packages (from kubernetes<=10.0.0,>=8.0.0->kfp) (45.1.0)\n",
"Requirement already satisfied, skipping upgrade: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /usr/local/lib/python3.6/dist-packages (from kubernetes<=10.0.0,>=8.0.0->kfp) (0.57.0)\n",
"Requirement already satisfied, skipping upgrade: requests-oauthlib in /usr/local/lib/python3.6/dist-packages (from kubernetes<=10.0.0,>=8.0.0->kfp) (1.3.0)\n",
"Requirement already satisfied, skipping upgrade: google-cloud-core<2.0dev,>=1.2.0 in /usr/local/lib/python3.6/dist-packages (from google-cloud-storage>=1.13.0->kfp) (1.3.0)\n",
"Requirement already satisfied, skipping upgrade: google-resumable-media<0.6dev,>=0.5.0 in /usr/local/lib/python3.6/dist-packages (from google-cloud-storage>=1.13.0->kfp) (0.5.0)\n",
"Requirement already satisfied, skipping upgrade: cffi!=1.11.3,>=1.8 in ./.local/lib/python3.6/site-packages (from cryptography>=2.4.2->kfp) (1.14.0)\n",
"Requirement already satisfied, skipping upgrade: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.6.1->kfp) (4.0.0)\n",
"Requirement already satisfied, skipping upgrade: rsa<4.1,>=3.1.4 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.6.1->kfp) (4.0)\n",
"Requirement already satisfied, skipping upgrade: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.6.1->kfp) (0.2.8)\n",
"Requirement already satisfied, skipping upgrade: attrs>=17.4.0 in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kfp) (19.3.0)\n",
"Requirement already satisfied, skipping upgrade: pyrsistent>=0.14.0 in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kfp) (0.15.7)\n",
"Requirement already satisfied, skipping upgrade: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kfp) (1.4.0)\n",
"Requirement already satisfied, skipping upgrade: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests<3.0.0,>=2.0.1->requests-toolbelt>=0.8.0->kfp) (3.0.4)\n",
"Requirement already satisfied, skipping upgrade: idna<2.9,>=2.5 in /usr/lib/python3/dist-packages (from requests<3.0.0,>=2.0.1->requests-toolbelt>=0.8.0->kfp) (2.6)\n",
"Requirement already satisfied, skipping upgrade: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib->kubernetes<=10.0.0,>=8.0.0->kfp) (3.1.0)\n",
"Requirement already satisfied, skipping upgrade: google-api-core<2.0.0dev,>=1.16.0 in /usr/local/lib/python3.6/dist-packages (from google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (1.16.0)\n",
"Requirement already satisfied, skipping upgrade: pycparser in ./.local/lib/python3.6/site-packages (from cffi!=1.11.3,>=1.8->cryptography>=2.4.2->kfp) (2.19)\n",
"Requirement already satisfied, skipping upgrade: pyasn1>=0.1.3 in /usr/local/lib/python3.6/dist-packages (from rsa<4.1,>=3.1.4->google-auth>=1.6.1->kfp) (0.4.8)\n",
"Requirement already satisfied, skipping upgrade: zipp>=0.5 in /usr/local/lib/python3.6/dist-packages (from importlib-metadata; python_version < \"3.8\"->jsonschema>=3.0.1->kfp) (2.1.0)\n",
"Requirement already satisfied, skipping upgrade: googleapis-common-protos<2.0dev,>=1.6.0 in /usr/local/lib/python3.6/dist-packages (from google-api-core<2.0.0dev,>=1.16.0->google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (1.51.0)\n",
"Requirement already satisfied, skipping upgrade: pytz in /usr/local/lib/python3.6/dist-packages (from google-api-core<2.0.0dev,>=1.16.0->google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (2019.3)\n",
"Requirement already satisfied, skipping upgrade: protobuf>=3.4.0 in /usr/local/lib/python3.6/dist-packages (from google-api-core<2.0.0dev,>=1.16.0->google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (3.11.2)\n"
]
}
],
"source": [
"!pip install kfp --upgrade --user\n",
"\n",
"import kfp \n",
"from kfp import compiler\n",
"import kfp.dsl as dsl\n",
"import kfp.notebook\n",
"import kfp.components as comp\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Simple function that just add two numbers:"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"#Define a Python function\n",
"def add(a: float, b: float) -> float:\n",
" '''Calculates sum of two arguments'''\n",
" return a + b"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Convert the function to a pipeline operation"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"add_op = comp.func_to_container_op(add)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"A bit more advanced function which demonstrates how to use imports, helper functions and produce multiple outputs."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"from typing import NamedTuple\n",
"def my_divmod(dividend: float, divisor:float) -> NamedTuple('MyDivmodOutput', [('quotient', float), ('remainder', float)]):\n",
" '''Divides two numbers and calculate the quotient and remainder'''\n",
" #Imports inside a component function:\n",
" import numpy as np\n",
"\n",
" #This function demonstrates how to use nested functions inside a component function:\n",
" def divmod_helper(dividend, divisor):\n",
" return np.divmod(dividend, divisor)\n",
"\n",
" (quotient, remainder) = divmod_helper(dividend, divisor)\n",
"\n",
" from collections import namedtuple\n",
" divmod_output = namedtuple('MyDivmodOutput', ['quotient', 'remainder'])\n",
" return divmod_output(quotient, remainder)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Test running the python function directly"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"MyDivmodOutput(quotient=14, remainder=2)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"my_divmod(100, 7)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Convert the function to a pipeline operation"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"divmod_op = comp.func_to_container_op(my_divmod, base_image='tensorflow/tensorflow:1.14.0-py3')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Define the pipeline\n",
"Pipeline function has to be decorated with the @dsl.pipeline decorator"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"@dsl.pipeline(\n",
" name='Calculation pipeline',\n",
" description='A toy pipeline that performs arithmetic calculations.'\n",
")\n",
"def calc_pipeline(\n",
" a='a',\n",
" b='7',\n",
" c='17',\n",
"):\n",
" #Passing pipeline parameter and a constant value as operation arguments\n",
" add_task = add_op(a, 4) #Returns a dsl.ContainerOp class instance. \n",
" \n",
" #Passing a task output reference as operation arguments\n",
" #For an operation with a single return value, the output reference can be accessed using `task.output` or `task.outputs['output_name']` syntax\n",
" divmod_task = divmod_op(add_task.output, b)\n",
"\n",
" #For an operation with a multiple return values, the output references can be accessed using `task.outputs['output_name']` syntax\n",
" result_task = add_op(divmod_task.outputs['quotient'], c)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Submit the pipeline for execution"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"Experiment link <a href=\"/pipeline/#/experiments/details/2abe16d1-fa2e-4f49-a3a5-acad8d36790d\" target=\"_blank\" >here</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Run link <a href=\"/pipeline/#/runs/details/87276776-0c3a-4d4e-99d0-4563b7f42fa5\" target=\"_blank\" >here</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"RunPipelineResult(run_id=87276776-0c3a-4d4e-99d0-4563b7f42fa5)"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"client = kfp.Client()\n",
"\n",
"#Specify pipeline argument values\n",
"arguments = {'a': '7', 'b': '8'}\n",
"\n",
"#Submit a pipeline run\n",
"client.create_run_from_pipeline_func(calc_pipeline, arguments=arguments)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
================================================
FILE: ch04/code/Lightweight Pipeline.py
================================================
#!/usr/bin/env python
# coding: utf-8
# # Setup
# In[1]:
get_ipython().system('pip install kfp --upgrade --user')
import kfp
from kfp import compiler
import kfp.dsl as dsl
import kfp.notebook
import kfp.components as comp
# Simple function that just add two numbers:
# In[2]:
#Define a Python function
def add(a: float, b: float) -> float:
'''Calculates sum of two arguments'''
return a + b
# Convert the function to a pipeline operation
# In[3]:
add_op = comp.func_to_container_op(add)
# A bit more advanced function which demonstrates how to use imports, helper functions and produce multiple outputs.
# In[4]:
from typing import NamedTuple
def my_divmod(
dividend: float, divisor: float
) -> NamedTuple('MyDivmodOutput', [('quotient', float), ('remainder', float)]):
'''Divides two numbers and calculate the quotient and remainder'''
#Imports inside a component function:
import numpy as np
#This function demonstrates how to use nested functions inside a component function:
def divmod_helper(dividend, divisor):
return np.divmod(dividend, divisor)
(quotient, remainder) = divmod_helper(dividend, divisor)
from collections import namedtuple
divmod_output = namedtuple('MyDivmodOutput', ['quotient', 'remainder'])
return divmod_output(quotient, remainder)
# Test running the python function directly
# In[5]:
my_divmod(100, 7)
# Convert the function to a pipeline operation
# In[6]:
divmod_op = comp.func_to_container_op(
my_divmod, base_image='tensorflow/tensorflow:1.14.0-py3')
# Define the pipeline
# Pipeline function has to be decorated with the @dsl.pipeline decorator
# In[7]:
@dsl.pipeline(
name='Calculation pipeline',
description='A toy pipeline that performs arithmetic calculations.')
def calc_pipeline(
a='a',
b='7',
c='17',
):
#Passing pipeline parameter and a constant value as operation arguments
add_task = add_op(a, 4) # Returns a dsl.ContainerOp class instance.
#Passing a task output reference as operation arguments
#For an operation with a single return value, the output reference can be accessed using `task.output` or `task.outputs['output_name']` syntax
divmod_task = divmod_op(add_task.output, b)
#For an operation with a multiple return values, the output references can be accessed using `task.outputs['output_name']` syntax
result_task = add_op(divmod_task.outputs['quotient'], c)
# Submit the pipeline for execution
# In[8]:
client = kfp.Client()
#Specify pipeline argument values
arguments = {'a': '7', 'b': '8'}
#Submit a pipeline run
client.create_run_from_pipeline_func(calc_pipeline, arguments=arguments)
# In[ ]:
================================================
FILE: ch04/code/RecommenderPipeline.ipynb
================================================
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Kubeflow pipeline\n",
"This is a fairly simple pipeline, containing sequential steps:\n",
"\n",
"1. Update data - implemented by lightbend/recommender-data-update-publisher:0.2 image\n",
"2. Run model training. Ideally we would run TFJob, but due to the current limitations for pipelines, we will directly use an image implementing training lightbend/ml-tf-recommender:0.1\n",
"3. Update serving model - implemented by lightbend/recommender-model-publisher:0.2"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Setup"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already up-to-date: kubernetes in ./.local/lib/python3.6/site-packages (10.0.1)\n",
"Requirement already satisfied, skipping upgrade: pyyaml>=3.12 in /usr/local/lib/python3.6/dist-packages (from kubernetes) (5.3)\n",
"Requirement already satisfied, skipping upgrade: six>=1.9.0 in /usr/lib/python3/dist-packages (from kubernetes) (1.11.0)\n",
"Requirement already satisfied, skipping upgrade: urllib3>=1.24.2 in ./.local/lib/python3.6/site-packages (from kubernetes) (1.24.3)\n",
"Requirement already satisfied, skipping upgrade: requests-oauthlib in /usr/local/lib/python3.6/dist-packages (from kubernetes) (1.3.0)\n",
"Requirement already satisfied, skipping upgrade: certifi>=14.05.14 in /usr/local/lib/python3.6/dist-packages (from kubernetes) (2019.11.28)\n",
"Requirement already satisfied, skipping upgrade: python-dateutil>=2.5.3 in /usr/local/lib/python3.6/dist-packages (from kubernetes) (2.8.1)\n",
"Requirement already satisfied, skipping upgrade: setuptools>=21.0.0 in /usr/local/lib/python3.6/dist-packages (from kubernetes) (45.1.0)\n",
"Requirement already satisfied, skipping upgrade: requests in /usr/local/lib/python3.6/dist-packages (from kubernetes) (2.22.0)\n",
"Requirement already satisfied, skipping upgrade: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /usr/local/lib/python3.6/dist-packages (from kubernetes) (0.57.0)\n",
"Requirement already satisfied, skipping upgrade: google-auth>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from kubernetes) (1.11.0)\n",
"Requirement already satisfied, skipping upgrade: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib->kubernetes) (3.1.0)\n",
"Requirement already satisfied, skipping upgrade: idna<2.9,>=2.5 in /usr/lib/python3/dist-packages (from requests->kubernetes) (2.6)\n",
"Requirement already satisfied, skipping upgrade: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->kubernetes) (3.0.4)\n",
"Requirement already satisfied, skipping upgrade: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.0.1->kubernetes) (0.2.8)\n",
"Requirement already satisfied, skipping upgrade: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.0.1->kubernetes) (4.0.0)\n",
"Requirement already satisfied, skipping upgrade: rsa<4.1,>=3.1.4 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.0.1->kubernetes) (4.0)\n",
"Requirement already satisfied, skipping upgrade: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.6/dist-packages (from pyasn1-modules>=0.2.1->google-auth>=1.0.1->kubernetes) (0.4.8)\n",
"Requirement already up-to-date: kfp in ./.local/lib/python3.6/site-packages (0.2.2.1)\n",
"Requirement already satisfied, skipping upgrade: PyJWT>=1.6.4 in ./.local/lib/python3.6/site-packages (from kfp) (1.7.1)\n",
"Requirement already satisfied, skipping upgrade: requests-toolbelt>=0.8.0 in ./.local/lib/python3.6/site-packages (from kfp) (0.9.1)\n",
"Requirement already satisfied, skipping upgrade: python-dateutil in /usr/local/lib/python3.6/dist-packages (from kfp) (2.8.1)\n",
"Requirement already satisfied, skipping upgrade: PyYAML in /usr/local/lib/python3.6/dist-packages (from kfp) (5.3)\n",
"Requirement already satisfied, skipping upgrade: kfp-server-api<=0.1.40,>=0.1.18 in ./.local/lib/python3.6/site-packages (from kfp) (0.1.40)\n",
"Requirement already satisfied, skipping upgrade: google-cloud-storage>=1.13.0 in /usr/local/lib/python3.6/dist-packages (from kfp) (1.25.0)\n",
"Requirement already satisfied, skipping upgrade: jsonschema>=3.0.1 in /usr/local/lib/python3.6/dist-packages (from kfp) (3.2.0)\n",
"Requirement already satisfied, skipping upgrade: Deprecated in ./.local/lib/python3.6/site-packages (from kfp) (1.2.7)\n",
"Requirement already satisfied, skipping upgrade: google-auth>=1.6.1 in /usr/local/lib/python3.6/dist-packages (from kfp) (1.11.0)\n",
"Collecting kubernetes<=10.0.0,>=8.0.0\n",
" Using cached kubernetes-10.0.0-py2.py3-none-any.whl (1.5 MB)\n",
"Requirement already satisfied, skipping upgrade: argo-models==2.2.1a in ./.local/lib/python3.6/site-packages (from kfp) (2.2.1a0)\n",
"Requirement already satisfied, skipping upgrade: urllib3<1.25,>=1.15 in ./.local/lib/python3.6/site-packages (from kfp) (1.24.3)\n",
"Requirement already satisfied, skipping upgrade: certifi in /usr/local/lib/python3.6/dist-packages (from kfp) (2019.11.28)\n",
"Requirement already satisfied, skipping upgrade: tabulate==0.8.3 in ./.local/lib/python3.6/site-packages (from kfp) (0.8.3)\n",
"Requirement already satisfied, skipping upgrade: click==7.0 in ./.local/lib/python3.6/site-packages (from kfp) (7.0)\n",
"Requirement already satisfied, skipping upgrade: cloudpickle==1.1.1 in ./.local/lib/python3.6/site-packages (from kfp) (1.1.1)\n",
"Requirement already satisfied, skipping upgrade: six>=1.10 in /usr/lib/python3/dist-packages (from kfp) (1.11.0)\n",
"Requirement already satisfied, skipping upgrade: cryptography>=2.4.2 in ./.local/lib/python3.6/site-packages (from kfp) (2.8)\n",
"Requirement already satisfied, skipping upgrade: requests<3.0.0,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from requests-toolbelt>=0.8.0->kfp) (2.22.0)\n",
"Requirement already satisfied, skipping upgrade: google-resumable-media<0.6dev,>=0.5.0 in /usr/local/lib/python3.6/dist-packages (from google-cloud-storage>=1.13.0->kfp) (0.5.0)\n",
"Requirement already satisfied, skipping upgrade: google-cloud-core<2.0dev,>=1.2.0 in /usr/local/lib/python3.6/dist-packages (from google-cloud-storage>=1.13.0->kfp) (1.3.0)\n",
"Requirement already satisfied, skipping upgrade: setuptools in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kfp) (45.1.0)\n",
"Requirement already satisfied, skipping upgrade: attrs>=17.4.0 in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kfp) (19.3.0)\n",
"Requirement already satisfied, skipping upgrade: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kfp) (1.4.0)\n",
"Requirement already satisfied, skipping upgrade: pyrsistent>=0.14.0 in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kfp) (0.15.7)\n",
"Requirement already satisfied, skipping upgrade: wrapt<2,>=1.10 in /usr/local/lib/python3.6/dist-packages (from Deprecated->kfp) (1.11.2)\n",
"Requirement already satisfied, skipping upgrade: rsa<4.1,>=3.1.4 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.6.1->kfp) (4.0)\n",
"Requirement already satisfied, skipping upgrade: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.6.1->kfp) (4.0.0)\n",
"Requirement already satisfied, skipping upgrade: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.6.1->kfp) (0.2.8)\n",
"Requirement already satisfied, skipping upgrade: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /usr/local/lib/python3.6/dist-packages (from kubernetes<=10.0.0,>=8.0.0->kfp) (0.57.0)\n",
"Requirement already satisfied, skipping upgrade: requests-oauthlib in /usr/local/lib/python3.6/dist-packages (from kubernetes<=10.0.0,>=8.0.0->kfp) (1.3.0)\n",
"Requirement already satisfied, skipping upgrade: cffi!=1.11.3,>=1.8 in ./.local/lib/python3.6/site-packages (from cryptography>=2.4.2->kfp) (1.14.0)\n",
"Requirement already satisfied, skipping upgrade: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests<3.0.0,>=2.0.1->requests-toolbelt>=0.8.0->kfp) (3.0.4)\n",
"Requirement already satisfied, skipping upgrade: idna<2.9,>=2.5 in /usr/lib/python3/dist-packages (from requests<3.0.0,>=2.0.1->requests-toolbelt>=0.8.0->kfp) (2.6)\n",
"Requirement already satisfied, skipping upgrade: google-api-core<2.0.0dev,>=1.16.0 in /usr/local/lib/python3.6/dist-packages (from google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (1.16.0)\n",
"Requirement already satisfied, skipping upgrade: zipp>=0.5 in /usr/local/lib/python3.6/dist-packages (from importlib-metadata; python_version < \"3.8\"->jsonschema>=3.0.1->kfp) (2.1.0)\n",
"Requirement already satisfied, skipping upgrade: pyasn1>=0.1.3 in /usr/local/lib/python3.6/dist-packages (from rsa<4.1,>=3.1.4->google-auth>=1.6.1->kfp) (0.4.8)\n",
"Requirement already satisfied, skipping upgrade: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib->kubernetes<=10.0.0,>=8.0.0->kfp) (3.1.0)\n",
"Requirement already satisfied, skipping upgrade: pycparser in ./.local/lib/python3.6/site-packages (from cffi!=1.11.3,>=1.8->cryptography>=2.4.2->kfp) (2.19)\n",
"Requirement already satisfied, skipping upgrade: googleapis-common-protos<2.0dev,>=1.6.0 in /usr/local/lib/python3.6/dist-packages (from google-api-core<2.0.0dev,>=1.16.0->google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (1.51.0)\n",
"Requirement already satisfied, skipping upgrade: protobuf>=3.4.0 in /usr/local/lib/python3.6/dist-packages (from google-api-core<2.0.0dev,>=1.16.0->google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (3.11.2)\n",
"Requirement already satisfied, skipping upgrade: pytz in /usr/local/lib/python3.6/dist-packages (from google-api-core<2.0.0dev,>=1.16.0->google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (2019.3)\n",
"Installing collected packages: kubernetes\n",
" Attempting uninstall: kubernetes\n",
" Found existing installation: kubernetes 10.0.1\n",
" Uninstalling kubernetes-10.0.1:\n",
" Successfully uninstalled kubernetes-10.0.1\n",
"Successfully installed kubernetes-10.0.0\n"
]
}
],
"source": [
"!pip install kubernetes --upgrade --user\n",
"!pip install kfp --upgrade --user\n",
"\n",
"\n",
"import kfp # the Pipelines SDK. This library is included with the notebook image.\n",
"from kfp import compiler\n",
"import kfp.dsl as dsl\n",
"import kfp.notebook\n",
"from kubernetes import client as k8s_client"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Create/Get an Experiment in the Kubeflow Pipeline System\n",
"The Kubeflow Pipeline system requires an \"Experiment\" to group pipeline runs. You can create a new experiment, or call client.list_experiments() to get existing ones."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"client = kfp.Client()\n",
"client.list_experiments()\n",
"#exp = client.create_experiment(name='mdupdate')\n",
"exp = client.get_experiment(experiment_name ='mdupdate')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Define a Pipeline\n",
"Authoring a pipeline is like authoring a normal Python function. The pipeline function describes the topology of the pipeline.\n",
"\n",
"Each step in the pipeline is typically a ContainerOp --- a simple class or function describing how to interact with a docker container image. In the pipeline, all the container images referenced in the pipeline are already built."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"@dsl.pipeline(\n",
" name='Recommender model update',\n",
" description='Demonstrate usage of pipelines for multi-step model update'\n",
")\n",
"def recommender_pipeline():\n",
" # Load new data\n",
" data = dsl.ContainerOp(\n",
" name='updatedata',\n",
" image='lightbend/recommender-data-update-publisher:0.2') \\\n",
" .add_env_variable(k8s_client.V1EnvVar(name='MINIO_URL',value='http://minio-service.kubeflow.svc.cluster.local:9000')) \\\n",
" .add_env_variable(k8s_client.V1EnvVar(name='MINIO_KEY', value='minio')) \\\n",
" .add_env_variable(k8s_client.V1EnvVar(name='MINIO_SECRET', value='minio123'))\n",
" # Train the model\n",
" train = dsl.ContainerOp(\n",
" name='trainmodel',\n",
" image='lightbend/ml-tf-recommender:0.1') \\\n",
" .add_env_variable(k8s_client.V1EnvVar(name='MINIO_URL',value='minio-service.kubeflow.svc.cluster.local:9000')) \\\n",
" .add_env_variable(k8s_client.V1EnvVar(name='MINIO_KEY', value='minio')) \\\n",
" .add_env_variable(k8s_client.V1EnvVar(name='MINIO_SECRET', value='minio123'))\n",
" train.after(data)\n",
" # Publish new model model\n",
" publish = dsl.ContainerOp(\n",
" name='publishmodel',\n",
" image='lightbend/recommender-model-publisher:0.2') \\\n",
" .add_env_variable(k8s_client.V1EnvVar(name='MINIO_URL',value='http://minio-service.kubeflow.svc.cluster.local:9000')) \\\n",
" .add_env_variable(k8s_client.V1EnvVar(name='MINIO_KEY', value='minio')) \\\n",
" .add_env_variable(k8s_client.V1EnvVar(name='MINIO_SECRET', value='minio123')) \\\n",
" .add_env_variable(k8s_client.V1EnvVar(name='KAFKA_BROKERS', value='cloudflow-kafka-brokers.cloudflow.svc.cluster.local:9092')) \\\n",
" .add_env_variable(k8s_client.V1EnvVar(name='DEFAULT_RECOMMENDER_URL', value='http://recommendermodelserver.kubeflow.svc.cluster.local:8501')) \\\n",
" .add_env_variable(k8s_client.V1EnvVar(name='ALTERNATIVE_RECOMMENDER_URL', value='http://recommendermodelserver1.kubeflow.svc.cluster.local:8501'))\n",
" publish.after(train)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Compile pipeline"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"compiler.Compiler().compile(recommender_pipeline, 'pipeline.tar.gz')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Submit an experiment run"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"Run link <a href=\"/pipeline/#/runs/details/df24284c-c7a1-480e-91b6-398bd352f164\" target=\"_blank\" >here</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"run = client.run_pipeline(exp.id, 'pipeline1', 'pipeline.tar.gz')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
================================================
FILE: ch04/code/RecommenderPipeline.py
================================================
#!/usr/bin/env python
# coding: utf-8
# # Kubeflow pipeline
# This is a fairly simple pipeline, containing sequential steps:
#
# 1. Update data - implemented by lightbend/recommender-data-update-publisher:0.2 image
# 2. Run model training. Ideally we would run TFJob, but due to the current limitations for pipelines, we will directly use an image implementing training lightbend/ml-tf-recommender:0.1
# 3. Update serving model - implemented by lightbend/recommender-model-publisher:0.2
# # Setup
# In[1]:
get_ipython().system('pip install kubernetes --upgrade --user')
get_ipython().system('pip install kfp --upgrade --user')
# the Pipelines SDK. This library is included with the notebook image.
import kfp
from kfp import compiler
import kfp.dsl as dsl
import kfp.notebook
from kubernetes import client as k8s_client
# # Create/Get an Experiment in the Kubeflow Pipeline System
# The Kubeflow Pipeline system requires an "Experiment" to group pipeline runs. You can create a new experiment, or call client.list_experiments() to get existing ones.
# In[3]:
client = kfp.Client()
client.list_experiments()
#exp = client.create_experiment(name='mdupdate')
exp = client.get_experiment(experiment_name='mdupdate')
# # Define a Pipeline
# Authoring a pipeline is like authoring a normal Python function. The pipeline function describes the topology of the pipeline.
#
# Each step in the pipeline is typically a ContainerOp --- a simple class or function describing how to interact with a docker container image. In the pipeline, all the container images referenced in the pipeline are already built.
# In[4]:
@dsl.pipeline(
name='Recommender model update',
description='Demonstrate usage of pipelines for multi-step model update')
def recommender_pipeline():
# Load new data
data = dsl.ContainerOp(
name='updatedata',
image='lightbend/recommender-data-update-publisher:0.2') \
.add_env_variable(k8s_client.V1EnvVar(name='MINIO_URL', value='http://minio-service.kubeflow.svc.cluster.local:9000')) \
.add_env_variable(k8s_client.V1EnvVar(name='MINIO_KEY', value='minio')) \
.add_env_variable(k8s_client.V1EnvVar(name='MINIO_SECRET', value='minio123'))
# Train the model
train = dsl.ContainerOp(
name='trainmodel',
image='lightbend/ml-tf-recommender:0.1') \
.add_env_variable(k8s_client.V1EnvVar(name='MINIO_URL', value='minio-service.kubeflow.svc.cluster.local:9000')) \
.add_env_variable(k8s_client.V1EnvVar(name='MINIO_KEY', value='minio')) \
.add_env_variable(k8s_client.V1EnvVar(name='MINIO_SECRET', value='minio123'))
train.after(data)
# Publish new model model
publish = dsl.ContainerOp(
name='publishmodel',
image='lightbend/recommender-model-publisher:0.2') \
.add_env_variable(k8s_client.V1EnvVar(name='MINIO_URL', value='http://minio-service.kubeflow.svc.cluster.local:9000')) \
.add_env_variable(k8s_client.V1EnvVar(name='MINIO_KEY', value='minio')) \
.add_env_variable(k8s_client.V1EnvVar(name='MINIO_SECRET', value='minio123')) \
.add_env_variable(k8s_client.V1EnvVar(name='KAFKA_BROKERS', value='cloudflow-kafka-brokers.cloudflow.svc.cluster.local:9092')) \
.add_env_variable(k8s_client.V1EnvVar(name='DEFAULT_RECOMMENDER_URL', value='http://recommendermodelserver.kubeflow.svc.cluster.local:8501')) \
.add_env_variable(k8s_client.V1EnvVar(name='ALTERNATIVE_RECOMMENDER_URL', value='http://recommendermodelserver1.kubeflow.svc.cluster.local:8501'))
publish.after(train)
# # Compile pipeline
# In[5]:
compiler.Compiler().compile(recommender_pipeline, 'pipeline.tar.gz')
# # Submit an experiment run
# In[6]:
run = client.run_pipeline(exp.id, 'pipeline1', 'pipeline.tar.gz')
# In[ ]:
================================================
FILE: ch04/code/download_components.sh
================================================
#!/bin/bash
#tag::dlPipelineRelease[]
wget https://github.com/kubeflow/pipelines/archive/0.2.5.tar.gz
tar -xvf 0.2.5.tar.gz
#end::dlPipelineRelease[]
================================================
FILE: ch04/install/deployment.yaml
================================================
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
labels:
app: argo-ui
app.kubernetes.io/component: argo
app.kubernetes.io/instance: argo-v2.3.0
app.kubernetes.io/managed-by: kfctl
app.kubernetes.io/name: argo
app.kubernetes.io/part-of: kubeflow
app.kubernetes.io/version: v2.3.0
kustomize.component: argo
name: argo-ui
namespace: kubeflow
spec:
progressDeadlineSeconds: 600
replicas: 1
revisionHistoryLimit: 10
selector:
matchLabels:
app: argo-ui
app.kubernetes.io/component: argo
app.kubernetes.io/instance: argo-v2.3.0
app.kubernetes.io/managed-by: kfctl
app.kubernetes.io/name: argo
app.kubernetes.io/part-of: kubeflow
app.kubernetes.io/version: v2.3.0
kustomize.component: argo
strategy:
rollingUpdate:
maxSurge: 25%
maxUnavailable: 25%
type: RollingUpdate
template:
metadata:
annotations:
sidecar.istio.io/inject: "false"
creationTimestamp: null
labels:
app: argo-ui
app.kubernetes.io/component: argo
app.kubernetes.io/instance: argo-v2.3.0
app.kubernetes.io/managed-by: kfctl
app.kubernetes.io/name: argo
app.kubernetes.io/part-of: kubeflow
app.kubernetes.io/version: v2.3.0
kustomize.component: argo
spec:
containers:
- env:
- name: ARGO_NAMESPACE
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.namespace
- name: IN_CLUSTER
value: "true"
- name: ENABLE_WEB_CONSOLE
value: "true"
- name: BASE_HREF
value: /
image: argoproj/argoui:v2.3.0
imagePullPolicy: IfNotPresent
name: argo-ui
ports:
- containerPort: 8001
name: ui
protocol: TCP
readinessProbe:
failureThreshold: 3
httpGet:
path: /
port: 8001
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 1
resources: {}
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
dnsPolicy: ClusterFirst
restartPolicy: Always
schedulerName: default-scheduler
securityContext: {}
serviceAccount: argo-ui
serviceAccountName: argo-ui
terminationGracePeriodSeconds: 30
================================================
FILE: ch04/install/virtualservice.yaml
================================================
apiVersion: networking.istio.io/v1alpha3
kind: VirtualService
metadata:
name: argo-ui
namespace: kubeflow
spec:
gateways:
- kubeflow-gateway
hosts:
- '*'
http:
- match:
- uri:
prefix: /argo/
rewrite:
uri: /
route:
- destination:
host: argo-ui.kubeflow.svc.cluster.local
port:
number: 80
================================================
FILE: ch06/MLflow.ipynb
================================================
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# mlflow-energyforecast\n",
"\n",
"This is a showcase for ML Flow capabilities, based on the article\n",
"http://the-odd-dataguy.com/be-more-efficient-to-produce-ml-models-with-mlflow\n",
"and a github https://github.com/jeanmidevacc/mlflow-energyforecast\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting pandas\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/12/d1/a6502c2f5c15b50f5dd579fc1c52b47edf6f2e9f682aed917dd7565b3e60/pandas-1.0.0-cp36-cp36m-manylinux1_x86_64.whl (10.1MB)\n",
"\u001b[K |████████████████████████████████| 10.1MB 3.2MB/s eta 0:00:01\n",
"\u001b[?25hRequirement already satisfied, skipping upgrade: numpy>=1.13.3 in ./.local/lib/python3.6/site-packages (from pandas) (1.18.1)\n",
"Requirement already satisfied, skipping upgrade: python-dateutil>=2.6.1 in /usr/local/lib/python3.6/dist-packages (from pandas) (2.8.0)\n",
"Requirement already satisfied, skipping upgrade: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas) (2019.2)\n",
"Requirement already satisfied, skipping upgrade: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.6.1->pandas) (1.11.0)\n",
"Installing collected packages: pandas\n",
" Found existing installation: pandas 0.25.3\n",
" Uninstalling pandas-0.25.3:\n",
" Successfully uninstalled pandas-0.25.3\n",
"Successfully installed pandas-1.0.0\n",
"\u001b[33mWARNING: You are using pip version 19.1.1, however version 20.0.2 is available.\n",
"You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n",
"Collecting mlflow\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/65/33/5fe1559f7eb95e1fa2077df747ada7fd225045bad4e76bcdb53605e4b937/mlflow-1.6.0.tar.gz (15.9MB)\n",
"\u001b[K |████████████████████████████████| 15.9MB 3.0MB/s eta 0:00:01\n",
"\u001b[?25hRequirement already satisfied, skipping upgrade: alembic in ./.local/lib/python3.6/site-packages (from mlflow) (1.3.2)\n",
"Requirement already satisfied, skipping upgrade: click>=7.0 in /usr/local/lib/python3.6/dist-packages (from mlflow) (7.0)\n",
"Requirement already satisfied, skipping upgrade: cloudpickle in ./.local/lib/python3.6/site-packages (from mlflow) (1.1.1)\n",
"Requirement already satisfied, skipping upgrade: databricks-cli>=0.8.7 in ./.local/lib/python3.6/site-packages (from mlflow) (0.9.1)\n",
"Requirement already satisfied, skipping upgrade: requests>=2.17.3 in /usr/local/lib/python3.6/dist-packages (from mlflow) (2.22.0)\n",
"Requirement already satisfied, skipping upgrade: six>=1.10.0 in /usr/lib/python3/dist-packages (from mlflow) (1.11.0)\n",
"Requirement already satisfied, skipping upgrade: Flask in ./.local/lib/python3.6/site-packages (from mlflow) (1.1.1)\n",
"Requirement already satisfied, skipping upgrade: numpy in ./.local/lib/python3.6/site-packages (from mlflow) (1.18.1)\n",
"Requirement already satisfied, skipping upgrade: pandas in ./.local/lib/python3.6/site-packages (from mlflow) (1.0.0)\n",
"Requirement already satisfied, skipping upgrade: python-dateutil in /usr/local/lib/python3.6/dist-packages (from mlflow) (2.8.0)\n",
"Requirement already satisfied, skipping upgrade: protobuf>=3.6.0 in /usr/local/lib/python3.6/dist-packages (from mlflow) (3.8.0)\n",
"Requirement already satisfied, skipping upgrade: gitpython>=2.1.0 in ./.local/lib/python3.6/site-packages (from mlflow) (3.0.5)\n",
"Requirement already satisfied, skipping upgrade: pyyaml in /usr/local/lib/python3.6/dist-packages (from mlflow) (5.1.2)\n",
"Requirement already satisfied, skipping upgrade: querystring_parser in ./.local/lib/python3.6/site-packages (from mlflow) (1.2.4)\n",
"Requirement already satisfied, skipping upgrade: simplejson in ./.local/lib/python3.6/site-packages (from mlflow) (3.17.0)\n",
"Requirement already satisfied, skipping upgrade: docker>=4.0.0 in /usr/local/lib/python3.6/dist-packages (from mlflow) (4.0.2)\n",
"Requirement already satisfied, skipping upgrade: entrypoints in /usr/local/lib/python3.6/dist-packages (from mlflow) (0.3)\n",
"Requirement already satisfied, skipping upgrade: sqlparse in ./.local/lib/python3.6/site-packages (from mlflow) (0.3.0)\n",
"Requirement already satisfied, skipping upgrade: sqlalchemy in ./.local/lib/python3.6/site-packages (from mlflow) (1.3.12)\n",
"Requirement already satisfied, skipping upgrade: gorilla in ./.local/lib/python3.6/site-packages (from mlflow) (0.3.0)\n",
"Requirement already satisfied, skipping upgrade: prometheus-flask-exporter in ./.local/lib/python3.6/site-packages (from mlflow) (0.12.1)\n",
"Requirement already satisfied, skipping upgrade: gunicorn in ./.local/lib/python3.6/site-packages (from mlflow) (20.0.4)\n",
"Requirement already satisfied, skipping upgrade: Mako in ./.local/lib/python3.6/site-packages (from alembic->mlflow) (1.1.0)\n",
"Requirement already satisfied, skipping upgrade: python-editor>=0.3 in ./.local/lib/python3.6/site-packages (from alembic->mlflow) (1.0.4)\n",
"Requirement already satisfied, skipping upgrade: configparser>=0.3.5 in ./.local/lib/python3.6/site-packages (from databricks-cli>=0.8.7->mlflow) (4.0.2)\n",
"Requirement already satisfied, skipping upgrade: tabulate>=0.7.7 in /usr/local/lib/python3.6/dist-packages (from databricks-cli>=0.8.7->mlflow) (0.8.3)\n",
"Requirement already satisfied, skipping upgrade: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests>=2.17.3->mlflow) (3.0.4)\n",
"Requirement already satisfied, skipping upgrade: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests>=2.17.3->mlflow) (2019.9.11)\n",
"Requirement already satisfied, skipping upgrade: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests>=2.17.3->mlflow) (1.24.3)\n",
"Requirement already satisfied, skipping upgrade: idna<2.9,>=2.5 in /usr/lib/python3/dist-packages (from requests>=2.17.3->mlflow) (2.6)\n",
"Requirement already satisfied, skipping upgrade: Jinja2>=2.10.1 in /usr/local/lib/python3.6/dist-packages (from Flask->mlflow) (2.10.1)\n",
"Requirement already satisfied, skipping upgrade: itsdangerous>=0.24 in ./.local/lib/python3.6/site-packages (from Flask->mlflow) (1.1.0)\n",
"Requirement already satisfied, skipping upgrade: Werkzeug>=0.15 in /usr/local/lib/python3.6/dist-packages (from Flask->mlflow) (0.15.4)\n",
"Requirement already satisfied, skipping upgrade: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas->mlflow) (2019.2)\n",
"Requirement already satisfied, skipping upgrade: setuptools in /usr/local/lib/python3.6/dist-packages (from protobuf>=3.6.0->mlflow) (41.0.1)\n",
"Requirement already satisfied, skipping upgrade: gitdb2>=2.0.0 in ./.local/lib/python3.6/site-packages (from gitpython>=2.1.0->mlflow) (2.0.6)\n",
"Requirement already satisfied, skipping upgrade: websocket-client>=0.32.0 in /usr/local/lib/python3.6/dist-packages (from docker>=4.0.0->mlflow) (0.56.0)\n",
"Requirement already satisfied, skipping upgrade: prometheus-client in /usr/local/lib/python3.6/dist-packages (from prometheus-flask-exporter->mlflow) (0.7.1)\n",
"Requirement already satisfied, skipping upgrade: MarkupSafe>=0.9.2 in /usr/local/lib/python3.6/dist-packages (from Mako->alembic->mlflow) (1.1.1)\n",
"Requirement already satisfied, skipping upgrade: smmap2>=2.0.0 in ./.local/lib/python3.6/site-packages (from gitdb2>=2.0.0->gitpython>=2.1.0->mlflow) (2.0.5)\n",
"Building wheels for collected packages: mlflow\n",
" Building wheel for mlflow (setup.py) ... \u001b[?25ldone\n",
"\u001b[?25h Stored in directory: /home/jovyan/.cache/pip/wheels/46/4e/83/e58b14b6d2d494783e31690de9572c5777882f675f480374b6\n",
"Successfully built mlflow\n",
"Installing collected packages: mlflow\n",
" Found existing installation: mlflow 1.5.0\n",
" Uninstalling mlflow-1.5.0:\n",
" Successfully uninstalled mlflow-1.5.0\n",
"\u001b[33m WARNING: The script mlflow is installed in '/home/jovyan/.local/bin' which is not on PATH.\n",
" Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\n",
"Successfully installed mlflow-1.6.0\n",
"\u001b[33mWARNING: You are using pip version 19.1.1, however version 20.0.2 is available.\n",
"You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n",
"Requirement already up-to-date: joblib in ./.local/lib/python3.6/site-packages (0.14.1)\n",
"\u001b[33mWARNING: You are using pip version 19.1.1, however version 20.0.2 is available.\n",
"You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n",
"Requirement already up-to-date: numpy in ./.local/lib/python3.6/site-packages (1.18.1)\n",
"\u001b[33mWARNING: You are using pip version 19.1.1, however version 20.0.2 is available.\n",
"You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n",
"Requirement already up-to-date: scipy in ./.local/lib/python3.6/site-packages (1.4.1)\n",
"Requirement already satisfied, skipping upgrade: numpy>=1.13.3 in ./.local/lib/python3.6/site-packages (from scipy) (1.18.1)\n",
"\u001b[33mWARNING: You are using pip version 19.1.1, however version 20.0.2 is available.\n",
"You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n",
"Requirement already up-to-date: scikit-learn in ./.local/lib/python3.6/site-packages (0.22.1)\n",
"Requirement already satisfied, skipping upgrade: numpy>=1.11.0 in ./.local/lib/python3.6/site-packages (from scikit-learn) (1.18.1)\n",
"Requirement already satisfied, skipping upgrade: scipy>=0.17.0 in ./.local/lib/python3.6/site-packages (from scikit-learn) (1.4.1)\n",
"Requirement already satisfied, skipping upgrade: joblib>=0.11 in ./.local/lib/python3.6/site-packages (from scikit-learn) (0.14.1)\n",
"\u001b[33mWARNING: You are using pip version 19.1.1, however version 20.0.2 is available.\n",
"You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n",
"Collecting boto3\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/d5/57/e9675a5a8d0ee586594ff19cb9a601334fbf24fa2fb29052d2a900ee5d23/boto3-1.11.9-py2.py3-none-any.whl (128kB)\n",
"\u001b[K |████████████████████████████████| 133kB 3.5MB/s eta 0:00:01\n",
"\u001b[?25hCollecting botocore<1.15.0,>=1.14.9 (from boto3)\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/64/4c/b0b0d3b6f84a05f9135051b56d3eb8708012a289c4b82ee21c8c766f47b5/botocore-1.14.9-py2.py3-none-any.whl (5.9MB)\n",
"\u001b[K |████████████████████████████████| 5.9MB 11.6MB/s eta 0:00:01\n",
"\u001b[?25hRequirement already satisfied, skipping upgrade: jmespath<1.0.0,>=0.7.1 in ./.local/lib/python3.6/site-packages (from boto3) (0.9.4)\n",
"Requirement already satisfied, skipping upgrade: s3transfer<0.4.0,>=0.3.0 in ./.local/lib/python3.6/site-packages (from boto3) (0.3.0)\n",
"Requirement already satisfied, skipping upgrade: python-dateutil<3.0.0,>=2.1 in /usr/local/lib/python3.6/dist-packages (from botocore<1.15.0,>=1.14.9->boto3) (2.8.0)\n",
"Requirement already satisfied, skipping upgrade: docutils<0.16,>=0.10 in ./.local/lib/python3.6/site-packages (from botocore<1.15.0,>=1.14.9->boto3) (0.15.2)\n",
"Requirement already satisfied, skipping upgrade: urllib3<1.26,>=1.20 in /usr/local/lib/python3.6/dist-packages (from botocore<1.15.0,>=1.14.9->boto3) (1.24.3)\n",
"Requirement already satisfied, skipping upgrade: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil<3.0.0,>=2.1->botocore<1.15.0,>=1.14.9->boto3) (1.11.0)\n",
"Installing collected packages: botocore, boto3\n",
" Found existing installation: botocore 1.14.4\n",
" Uninstalling botocore-1.14.4:\n",
" Successfully uninstalled botocore-1.14.4\n",
" Found existing installation: boto3 1.11.4\n",
" Uninstalling boto3-1.11.4:\n",
" Successfully uninstalled boto3-1.11.4\n",
"Successfully installed boto3-1.11.9 botocore-1.14.9\n",
"\u001b[33mWARNING: You are using pip version 19.1.1, however version 20.0.2 is available.\n",
"You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n"
]
}
],
"source": [
"!pip install pandas --upgrade --user\n",
"!pip install mlflow --upgrade --user\n",
"!pip install joblib --upgrade --user\n",
"!pip install numpy --upgrade --user \n",
"!pip install scipy --upgrade --user \n",
"!pip install scikit-learn --upgrade --user\n",
"!pip install boto3 --upgrade --user"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import time\n",
"import json\n",
"import os\n",
"from joblib import Parallel, delayed\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
"import scipy\n",
"\n",
"from sklearn.model_selection import train_test_split, KFold\n",
"from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score\n",
"from sklearn.exceptions import ConvergenceWarning\n",
"\n",
"import mlflow\n",
"import mlflow.sklearn\n",
"from mlflow.tracking import MlflowClient\n",
"\n",
"from warnings import simplefilter\n",
"simplefilter(action='ignore', category = FutureWarning)\n",
"simplefilter(action='ignore', category = ConvergenceWarning)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# Ensure Minio access\n",
"os.environ['MLFLOW_S3_ENDPOINT_URL'] = 'http://minio-service.kubeflow.svc.cluster.local:9000'\n",
"os.environ['AWS_ACCESS_KEY_ID'] = 'minio'\n",
"os.environ['AWS_SECRET_ACCESS_KEY'] = 'minio123'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Data preparation"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# Collect the data \n",
"df_nationalconsumption_electricity_daily = pd.read_csv(\"https://raw.githubusercontent.com/jeanmidevacc/mlflow-energyforecast/master/data/rtu_data.csv\")\n",
"df_nationalconsumption_electricity_daily.set_index([\"day\"], inplace = True)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Size of the training set : 1081\n",
"Size of the testing set : 233\n"
]
}
],
"source": [
"# Prepare the training set and the testing set\n",
"df_trainvalidate_energyconsumption = df_nationalconsumption_electricity_daily[df_nationalconsumption_electricity_daily[\"datastatus\"] == \"Définitif\"]\n",
"del df_trainvalidate_energyconsumption[\"datastatus\"]\n",
"\n",
"df_test_energyconsumption = df_nationalconsumption_electricity_daily[df_nationalconsumption_electricity_daily[\"datastatus\"] == \"Consolidé\"]\n",
"del df_test_energyconsumption[\"datastatus\"]\n",
"\n",
"print(\"Size of the training set : \",len(df_trainvalidate_energyconsumption))\n",
"print(\"Size of the testing set : \",len(df_test_energyconsumption))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Output to predict : dailyconsumption\n",
"Inputs for the prediction : ['weekday', 'week', 'month', 'year', 'avg_min_temperature', 'avg_max_temperature', 'avg_mean_temperature', 'wavg_min_temperature', 'wavg_max_temperature', 'wavg_mean_temperature', 'is_holiday']\n"
]
}
],
"source": [
"# Define the inputs and the output\n",
"output = \"dailyconsumption\"\n",
"allinputs = list(df_trainvalidate_energyconsumption.columns)\n",
"allinputs.remove(output)\n",
"\n",
"print(\"Output to predict : \", output)\n",
"print(\"Inputs for the prediction : \", allinputs)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# Build different set of featurws for the model\n",
"possible_inputs = {\n",
" \"all\" : allinputs,\n",
" \"only_allday_inputs\" : [\"weekday\", \"month\", \"is_holiday\", \"week\"],\n",
" \"only_allweatheravg_inputs\" : [\"avg_min_temperature\", \"avg_max_temperature\", \"avg_mean_temperature\",\"wavg_min_temperature\", \"wavg_max_temperature\", \"wavg_mean_temperature\"],\n",
" \"only_meanweather_inputs_avg\" : [\"avg_mean_temperature\"],\n",
" \"only_meanweather_inputs_wavg\" : [\"wavg_mean_temperature\"],\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"# Prepare the output of the model\n",
"array_output_train = np.array(df_trainvalidate_energyconsumption[output])\n",
"array_output_test = np.array(df_test_energyconsumption[output])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# connect to remote server\n",
"remote_server_uri = \"http://mlflowserver.kubeflow.svc.cluster.local:5000\"\n",
"mlflow.set_tracking_uri(remote_server_uri)\n",
"# Launch the experiment on mlflow\n",
"experiment_name = \"electricityconsumption-forecast\"\n",
"mlflow.set_experiment(experiment_name)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# Define the evaluation function that will do the computation of the different metrics of accuracy (RMSE,MAE,R2)\n",
"def evaluation_model(y_test, y_pred):\n",
"\n",
" rmse = np.sqrt(mean_squared_error(y_test, y_pred))\n",
" mae = mean_absolute_error(y_test, y_pred)\n",
" r2 = r2_score(y_test, y_pred)\n",
"\n",
" metrics = {\n",
" \"rmse\" : rmse,\n",
" \"r2\" : r2,\n",
" \"mae\" : mae,\n",
" }\n",
" \n",
" return metrics"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# KNN regressor"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.neighbors import KNeighborsRegressor\n",
"\n",
"def train_knnmodel(parameters, inputs, tags, log = False):\n",
" with mlflow.start_run(nested = True):\n",
" \n",
" # Prepare the data\n",
" array_inputs_train = np.array(df_trainvalidate_energyconsumption[inputs])\n",
" array_inputs_test = np.array(df_test_energyconsumption[inputs])\n",
" \n",
" \n",
" # Build the model\n",
" tic = time.time()\n",
" model = KNeighborsRegressor(parameters[\"nbr_neighbors\"], weights = parameters[\"weight_method\"])\n",
" model.fit(array_inputs_train, array_output_train)\n",
" duration_training = time.time() - tic\n",
"\n",
" # Make the prediction\n",
" tic1 = time.time()\n",
" prediction = model.predict(array_inputs_test)\n",
" duration_prediction = time.time() - tic1\n",
"\n",
" # Evaluate the model prediction\n",
" metrics = evaluation_model(array_output_test, prediction)\n",
"\n",
" # Log in the console\n",
" if log:\n",
" print(f\"KNN regressor:\")\n",
" print(parameters)\n",
" print(metrics)\n",
"\n",
" # Log in mlflow (parameter)\n",
" mlflow.log_params(parameters)\n",
"\n",
" # Log in mlflow (metrics)\n",
" metrics[\"duration_training\"] = duration_training\n",
" metrics[\"duration_prediction\"] = duration_prediction\n",
" mlflow.log_metrics(metrics)\n",
"\n",
" # log in mlflow (model)\n",
" mlflow.sklearn.log_model(model, f\"model\")\n",
" \n",
" # Tag the model\n",
" mlflow.set_tags(tags)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"# Test the different combinations\n",
"configurations = []\n",
"for nbr_neighbors in [1,2,5,10]:\n",
" for weight_method in ['uniform','distance']:\n",
" for field in possible_inputs:\n",
" parameters = {\n",
" \"nbr_neighbors\" : nbr_neighbors,\n",
" \"weight_method\" : weight_method\n",
" }\n",
"\n",
" tags = {\n",
" \"model\" : \"knn\",\n",
" \"inputs\" : field\n",
" }\n",
" \n",
" configurations.append([parameters, tags])\n",
"\n",
" train_knnmodel(parameters, possible_inputs[field], tags)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# MLP regressor"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.neural_network import MLPRegressor\n",
"\n",
"def train_mlpmodel(parameters, inputs, tags, log = False):\n",
" with mlflow.start_run(nested = True):\n",
" \n",
" # Prepare the data\n",
" array_inputs_train = np.array(df_trainvalidate_energyconsumption[inputs])\n",
" array_inputs_test = np.array(df_test_energyconsumption[inputs])\n",
" \n",
" # Build the model\n",
" tic = time.time()\n",
"\n",
" model = MLPRegressor(\n",
" hidden_layer_sizes = parameters[\"hidden_layers\"],\n",
" activation = parameters[\"activation\"],\n",
" solver = parameters[\"solver\"],\n",
" max_iter = parameters[\"nbr_iteration\"],\n",
" random_state = 0)\n",
" \n",
" model.fit(array_inputs_train, array_output_train)\n",
" duration_training = time.time() - tic\n",
"\n",
" # Make the prediction\n",
" tic1 = time.time()\n",
" prediction = model.predict(array_inputs_test)\n",
" duration_prediction = time.time() - tic1\n",
"\n",
" # Evaluate the model prediction\n",
" metrics = evaluation_model(array_output_test, prediction)\n",
"\n",
" # Log in the console\n",
" if log:\n",
" print(f\"Random forest regressor:\")\n",
" print(parameters)\n",
" print(metrics)\n",
" \n",
" # Log in mlflow (parameter)\n",
" mlflow.log_params(parameters)\n",
"\n",
" # Log in mlflow (metrics)\n",
" metrics[\"duration_training\"] = duration_training\n",
" metrics[\"duration_prediction\"] = duration_prediction\n",
" mlflow.log_metrics(metrics)\n",
"\n",
" # log in mlflow (model)\n",
" mlflow.sklearn.log_model(model, f\"model\")\n",
" \n",
" # Tag the model\n",
" mlflow.set_tags(tags) "
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"for hiddenlayers in [4,8,16]:\n",
" for activation in [\"identity\",\"logistic\",]:\n",
" for solver in [\"lbfgs\"]:\n",
" for nbriteration in [10,100,1000]:\n",
" for field in possible_inputs:\n",
" parameters = {\n",
" \"hidden_layers\" : hiddenlayers,\n",
" \"activation\" : activation,\n",
" \"solver\" : solver,\n",
" \"nbr_iteration\" : nbriteration\n",
" }\n",
"\n",
" tags = {\n",
" \"model\" : \"mlp\",\n",
" \"inputs\" : field\n",
" }\n",
"\n",
" train_mlpmodel(parameters, possible_inputs[field], tags)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Use a handmade model (scipy approach)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"class PTG:\n",
" def __init__(self, thresholds_x0, thresholds_a, thresholds_b):\n",
" self.thresholds_x0 = thresholds_x0\n",
" self.thresholds_a = thresholds_a\n",
" self.thresholds_b = thresholds_b\n",
" \n",
" def get_ptgmodel(self, x, a, b, x0):\n",
" return np.piecewise(x, [x < x0, x >= x0], [lambda x: a*x + b , lambda x : a*x0 + b])\n",
" \n",
" def fit(self, dfx, y):\n",
" x = np.array(dfx)\n",
" \n",
" # Define the bounds\n",
" bounds_min = [thresholds_a[0], thresholds_b[0], thresholds_x0[0]]\n",
" bounds_max = [thresholds_a[1], thresholds_b[1], thresholds_x0[1]]\n",
" bounds = (bounds_min, bounds_max)\n",
"\n",
" # Fit a model\n",
" popt, pcov = scipy.optimize.curve_fit(self.get_ptgmodel, x, y, bounds = bounds)\n",
"\n",
" # Get the parameter of the model\n",
" a = popt[0]\n",
" b = popt[1]\n",
" x0 = popt[2]\n",
" \n",
" self.coefficients = [a, b, x0]\n",
" \n",
" def predict(self,dfx):\n",
" x = np.array(dfx)\n",
" predictions = []\n",
" for elt in x:\n",
" forecast = self.get_ptgmodel(elt, self.coefficients[0], self.coefficients[1], self.coefficients[2])\n",
" predictions.append(forecast)\n",
" return np.array(predictions)\n",
" \n",
"def train_ptgmodel(parameters, inputs, tags, log = False):\n",
" with mlflow.start_run(nested = True):\n",
" \n",
" # Prepare the data\n",
" df_inputs_train = df_trainvalidate_energyconsumption[inputs[0]]\n",
" df_inputs_test = df_test_energyconsumption[inputs[0]]\n",
" \n",
" \n",
" # Build the model\n",
" tic = time.time()\n",
" \n",
" model = PTG(parameters[\"thresholds_x0\"], parameters[\"thresholds_a\"], parameters[\"thresholds_b\"])\n",
" \n",
" model.fit(df_inputs_train, array_output_train)\n",
" duration_training = time.time() - tic\n",
"\n",
" # Make the prediction\n",
" tic1 = time.time()\n",
" prediction = model.predict(df_inputs_test)\n",
" duration_prediction = time.time() - tic1\n",
"\n",
" # Evaluate the model prediction\n",
" metrics = evaluation_model(array_output_test, prediction)\n",
"\n",
" # Log in the console\n",
" if log:\n",
" print(f\"PTG:\")\n",
" print(parameters)\n",
" print(metrics)\n",
" \n",
" # Log in mlflow (parameter)\n",
" mlflow.log_params(parameters) \n",
"\n",
" # Log in mlflow (metrics)\n",
" metrics[\"duration_training\"] = duration_training\n",
" metrics[\"duration_prediction\"] = duration_prediction\n",
" mlflow.log_metrics(metrics)\n",
"\n",
" # log in mlflow (model)\n",
" mlflow.sklearn.log_model(model, f\"model\")\n",
" \n",
" # Tag the model\n",
" mlflow.set_tags(tags) "
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"# Define the parameters of the model\n",
"thresholds_x0 = [0, 20]\n",
"thresholds_a = [-200000, -50000]\n",
"thresholds_b = [1000000, 3000000]\n",
"\n",
"parameters = {\n",
" \"thresholds_x0\" : thresholds_x0,\n",
" \"thresholds_a\" : thresholds_a,\n",
" \"thresholds_b\" : thresholds_b\n",
"}\n",
"\n",
"for field in [\"only_meanweather_inputs_avg\", \"only_meanweather_inputs_wavg\"]:\n",
" \n",
" tags = {\n",
" \"model\" : \"ptg\",\n",
" \"inputs\" : field\n",
" }\n",
" \n",
" train_ptgmodel(parameters, possible_inputs[field], tags, log = False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Evaluate mlflow results"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of runs done : 272\n"
]
}
],
"source": [
"# Select the run of the experiment\n",
"df_runs = mlflow.search_runs(experiment_ids=\"0\")\n",
"print(\"Number of runs done : \", len(df_runs))"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>run_id</th>\n",
" <th>experiment_id</th>\n",
" <th>status</th>\n",
" <th>artifact_uri</th>\n",
" <th>start_time</th>\n",
" <th>end_time</th>\n",
" <th>metrics.r2</th>\n",
" <th>metrics.mae</th>\n",
" <th>metrics.duration_prediction</th>\n",
" <th>metrics.rmse</th>\n",
" <th>...</th>\n",
" <th>params.activation</th>\n",
" <th>params.nbr_iteration</th>\n",
" <th>params.hidden_layers</th>\n",
" <th>params.nbr_neighbors</th>\n",
" <th>params.weight_method</th>\n",
" <th>tags.model</th>\n",
" <th>tags.mlflow.source.type</th>\n",
" <th>tags.inputs</th>\n",
" <th>tags.mlflow.user</th>\n",
" <th>tags.mlflow.source.name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>238</th>\n",
" <td>50ee6409ad3a4778bb9d8cb59034df5d</td>\n",
" <td>0</td>\n",
" <td>FINISHED</td>\n",
" <td>s3://mlflow/mlflow/artifacts/0/50ee6409ad3a477...</td>\n",
" <td>2020-01-17 18:17:47.448000+00:00</td>\n",
" <td>2020-01-17 18:17:47.929000+00:00</td>\n",
" <td>0.935956</td>\n",
" <td>104040.339809</td>\n",
" <td>0.003205</td>\n",
" <td>134649.399348</td>\n",
" <td>...</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>5</td>\n",
" <td>distance</td>\n",
" <td>knn</td>\n",
" <td>LOCAL</td>\n",
" <td>all</td>\n",
" <td>jovyan</td>\n",
" <td>/usr/local/lib/python3.6/dist-packages/ipykern...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>106</th>\n",
" <td>614bcf7042ca465c8d86296f12ac9c09</td>\n",
" <td>0</td>\n",
" <td>FINISHED</td>\n",
" <td>s3://mlflow/mlflow/artifacts/0/614bcf7042ca465...</td>\n",
" <td>2020-01-31 15:21:29.978000+00:00</td>\n",
" <td>2020-01-31 15:21:30.503000+00:00</td>\n",
" <td>0.935956</td>\n",
" <td>104040.339809</td>\n",
" <td>0.003404</td>\n",
" <td>134649.399348</td>\n",
" <td>...</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>5</td>\n",
" <td>distance</td>\n",
" <td>knn</td>\n",
" <td>LOCAL</td>\n",
" <td>all</td>\n",
" <td>jovyan</td>\n",
" <td>/usr/local/lib/python3.6/dist-packages/ipykern...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96</th>\n",
" <td>b05667486f7d45779d23519eb0dbe24f</td>\n",
" <td>0</td>\n",
" <td>FINISHED</td>\n",
" <td>s3://mlflow/mlflow/artifacts/0/b05667486f7d457...</td>\n",
" <td>2020-01-31 15:21:35.424000+00:00</td>\n",
" <td>2020-01-31 15:21:35.922000+00:00</td>\n",
" <td>0.935111</td>\n",
" <td>105833.358681</td>\n",
" <td>0.002732</td>\n",
" <td>135534.759873</td>\n",
" <td>...</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>10</td>\n",
" <td>distance</td>\n",
" <td>knn</td>\n",
" <td>LOCAL</td>\n",
" <td>all</td>\n",
" <td>jovyan</td>\n",
" <td>/usr/local/lib/python3.6/dist-packages/ipykern...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>228</th>\n",
" <td>d279d728946e4b74811203a842d79df3</td>\n",
" <td>0</td>\n",
" <td>FINISHED</td>\n",
" <td>s3://mlflow/mlflow/artifacts/0/d279d728946e4b7...</td>\n",
" <td>2020-01-17 18:17:52.555000+00:00</td>\n",
" <td>2020-01-17 18:17:53.029000+00:00</td>\n",
" <td>0.935111</td>\n",
" <td>105833.358681</td>\n",
" <td>0.002863</td>\n",
" <td>135534.759873</td>\n",
" <td>...</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>10</td>\n",
" <td>distance</td>\n",
" <td>knn</td>\n",
" <td>LOCAL</td>\n",
" <td>all</td>\n",
" <td>jovyan</td>\n",
" <td>/usr/local/lib/python3.6/dist-packages/ipykern...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>111</th>\n",
" <td>88af21719e0a408b91448f7ddd27e84c</td>\n",
" <td>0</td>\n",
" <td>FINISHED</td>\n",
" <td>s3://mlflow/mlflow/artifacts/0/88af21719e0a408...</td>\n",
" <td>2020-01-31 15:21:27.338000+00:00</td>\n",
" <td>2020-01-31 15:21:27.947000+00:00</td>\n",
" <td>0.934465</td>\n",
" <td>105793.727897</td>\n",
" <td>0.002668</td>\n",
" <td>136207.422483</td>\n",
" <td>...</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>5</td>\n",
" <td>uniform</td>\n",
" <td>knn</td>\n",
" <td>LOCAL</td>\n",
" <td>all</td>\n",
" <td>jovyan</td>\n",
" <td>/usr/local/lib/python3.6/dist-packages/ipykern...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 25 columns</p>\n",
"</div>"
],
"text/plain": [
" run_id experiment_id status \\\n",
"238 50ee6409ad3a4778bb9d8cb59034df5d 0 FINISHED \n",
"106 614bcf7042ca465c8d86296f12ac9c09 0 FINISHED \n",
"96 b05667486f7d45779d23519eb0dbe24f 0 FINISHED \n",
"228 d279d728946e4b74811203a842d79df3 0 FINISHED \n",
"111 88af21719e0a408b91448f7ddd27e84c 0 FINISHED \n",
"\n",
" artifact_uri \\\n",
"238 s3://mlflow/mlflow/artifacts/0/50ee6409ad3a477... \n",
"106 s3://mlflow/mlflow/artifacts/0/614bcf7042ca465... \n",
"96 s3://mlflow/mlflow/artifacts/0/b05667486f7d457... \n",
"228 s3://mlflow/mlflow/artifacts/0/d279d728946e4b7... \n",
"111 s3://mlflow/mlflow/artifacts/0/88af21719e0a408... \n",
"\n",
" start_time end_time \\\n",
"238 2020-01-17 18:17:47.448000+00:00 2020-01-17 18:17:47.929000+00:00 \n",
"106 2020-01-31 15:21:29.978000+00:00 2020-01-31 15:21:30.503000+00:00 \n",
"96 2020-01-31 15:21:35.424000+00:00 2020-01-31 15:21:35.922000+00:00 \n",
"228 2020-01-17 18:17:52.555000+00:00 2020-01-17 18:17:53.029000+00:00 \n",
"111 2020-01-31 15:21:27.338000+00:00 2020-01-31 15:21:27.947000+00:00 \n",
"\n",
" metrics.r2 metrics.mae metrics.duration_prediction metrics.rmse \\\n",
"238 0.935956 104040.339809 0.003205 134649.399348 \n",
"106 0.935956 104040.339809 0.003404 134649.399348 \n",
"96 0.935111 105833.358681 0.002732 135534.759873 \n",
"228 0.935111 105833.358681 0.002863 135534.759873 \n",
"111 0.934465 105793.727897 0.002668 136207.422483 \n",
"\n",
" ... params.activation params.nbr_iteration params.hidden_layers \\\n",
"238 ... None None None \n",
"106 ... None None None \n",
"96 ... None None None \n",
"228 ... None None None \n",
"111 ... None None None \n",
"\n",
" params.nbr_neighbors params.weight_method tags.model \\\n",
"238 5 distance knn \n",
"106 5 distance knn \n",
"96 10 distance knn \n",
"228 10 distance knn \n",
"111 5 uniform knn \n",
"\n",
" tags.mlflow.source.type tags.inputs tags.mlflow.user \\\n",
"238 LOCAL all jovyan \n",
"106 LOCAL all jovyan \n",
"96 LOCAL all jovyan \n",
"228 LOCAL all jovyan \n",
"111 LOCAL all jovyan \n",
"\n",
" tags.mlflow.source.name \n",
"238 /usr/local/lib/python3.6/dist-packages/ipykern... \n",
"106 /usr/local/lib/python3.6/dist-packages/ipykern... \n",
"96 /usr/local/lib/python3.6/dist-packages/ipykern... \n",
"228 /usr/local/lib/python3.6/dist-packages/ipykern... \n",
"111 /usr/local/lib/python3.6/dist-packages/ipykern... \n",
"\n",
"[5 rows x 25 columns]"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Quick sorting to get the best models based on the RMSE metric\n",
"df_runs.sort_values([\"metrics.rmse\"], ascending = True, inplace = True)\n",
"df_runs.head()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'50ee6409ad3a4778bb9d8cb59034df5d'"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Get the best one\n",
"runid_selected = df_runs.head(1)[\"run_id\"].values[0]\n",
"runid_selected"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"celltoolbar": "Raw Cell Format",
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
================================================
FILE: ch06/MLflow.py
================================================
#!/usr/bin/env python
# coding: utf-8
# # mlflow-energyforecast
#
# This is a showcase for ML Flow capabilities, based on the article
# http://the-odd-dataguy.com/be-more-efficient-to-produce-ml-models-with-mlflow
# and a github https://github.com/jeanmidevacc/mlflow-energyforecast
#
# In[2]:
get_ipython().system('pip install pandas --upgrade --user')
get_ipython().system('pip install mlflow --upgrade --user')
get_ipython().system('pip install joblib --upgrade --user')
get_ipython().system('pip install numpy --upgrade --user ')
get_ipython().system('pip install scipy --upgrade --user ')
get_ipython().system('pip install scikit-learn --upgrade --user')
get_ipython().system('pip install boto3 --upgrade --user')
# In[3]:
import time
import json
import os
from joblib import Parallel, delayed
import pandas as pd
import numpy as np
import scipy
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score
from sklearn.exceptions import ConvergenceWarning
import mlflow
import mlflow.sklearn
from mlflow.tracking import MlflowClient
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)
simplefilter(action='ignore', category=ConvergenceWarning)
# In[4]:
# Ensure Minio access
os.environ[
'MLFLOW_S3_ENDPOINT_URL'] = 'http://minio-service.kubeflow.svc.cluster.local:9000'
os.environ['AWS_ACCESS_KEY_ID'] = 'minio'
os.environ['AWS_SECRET_ACCESS_KEY'] = 'minio123'
# # Data preparation
# In[5]:
# Collect the data
df_nationalconsumption_electricity_daily = pd.read_csv(
"https://raw.githubusercontent.com/jeanmidevacc/mlflow-energyforecast/master/data/rtu_data.csv"
)
df_nationalconsumption_electricity_daily.set_index(["day"], inplace=True)
# In[6]:
# Prepare the training set and the testing set
df_trainvalidate_energyconsumption = df_nationalconsumption_electricity_daily[
df_nationalconsumption_electricity_daily["datastatus"] == "Définitif"]
del df_trainvalidate_energyconsumption["datastatus"]
df_test_energyconsumption = df_nationalconsumption_electricity_daily[
df_nationalconsumption_electricity_daily["datastatus"] == "Consolidé"]
del df_test_energyconsumption["datastatus"]
print("Size of the training set : ", len(df_trainvalidate_energyconsumption))
print("Size of the testing set : ", len(df_test_energyconsumption))
# In[7]:
# Define the inputs and the output
output = "dailyconsumption"
allinputs = list(df_trainvalidate_energyconsumption.columns)
allinputs.remove(output)
print("Output to predict : ", output)
print("Inputs for the prediction : ", allinputs)
# In[8]:
# Build different set of featurws for the model
possible_inputs = {
"all":
allinputs,
"only_allday_inputs": ["weekday", "month", "is_holiday", "week"],
"only_allweatheravg_inputs": [
"avg_min_temperature", "avg_max_temperature", "avg_mean_temperature",
"wavg_min_temperature", "wavg_max_temperature", "wavg_mean_temperature"
],
"only_meanweather_inputs_avg": ["avg_mean_temperature"],
"only_meanweather_inputs_wavg": ["wavg_mean_temperature"],
}
# In[9]:
# Prepare the output of the model
array_output_train = np.array(df_trainvalidate_energyconsumption[output])
array_output_test = np.array(df_test_energyconsumption[output])
# In[10]:
# connect to remote server
remote_server_uri = "http://mlflowserver.kubeflow.svc.cluster.local:5000"
mlflow.set_tracking_uri(remote_server_uri)
# Launch the experiment on mlflow
experiment_name = "electricityconsumption-forecast"
mlflow.set_experiment(experiment_name)
# In[11]:
# Define the evaluation function that will do the computation of the different metrics of accuracy (RMSE,MAE,R2)
def evaluation_model(y_test, y_pred):
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
metrics = {
"rmse": rmse,
"r2": r2,
"mae": mae,
}
return metrics
# # KNN regressor
# In[12]:
from sklearn.neighbors import KNeighborsRegressor
def train_knnmodel(parameters, inputs, tags, log=False):
with mlflow.start_run(nested=True):
# Prepare the data
array_inputs_train = np.array(
df_trainvalidate_energyconsumption[inputs])
array_inputs_test = np.array(df_test_energyconsumption[inputs])
# Build the model
tic = time.time()
model = KNeighborsRegressor(parameters["nbr_neighbors"],
weights=parameters["weight_method"])
model.fit(array_inputs_train, array_output_train)
duration_training = time.time() - tic
# Make the prediction
tic1 = time.time()
prediction = model.predict(array_inputs_test)
duration_prediction = time.time() - tic1
# Evaluate the model prediction
metrics = evaluation_model(array_output_test, prediction)
# Log in the console
if log:
print(f"KNN regressor:")
print(parameters)
print(metrics)
# Log in mlflow (parameter)
mlflow.log_params(parameters)
# Log in mlflow (metrics)
metrics["duration_training"] = duration_training
metrics["duration_prediction"] = duration_prediction
mlflow.log_metrics(metrics)
# log in mlflow (model)
mlflow.sklearn.log_model(model, f"model")
# Tag the model
mlflow.set_tags(tags)
# In[13]:
# Test the different combinations
configurations = []
for nbr_neighbors in [1, 2, 5, 10]:
for weight_method in ['uniform', 'distance']:
for field in possible_inputs:
parameters = {
"nbr_neighbors": nbr_neighbors,
"weight_method": weight_method
}
tags = {"model": "knn", "inputs": field}
configurations.append([parameters, tags])
train_knnmodel(parameters, possible_inputs[field], tags)
# # MLP regressor
# In[14]:
from sklearn.neural_network import MLPRegressor
def train_mlpmodel(parameters, inputs, tags, log=False):
with mlflow.start_run(nested=True):
# Prepare the data
array_inputs_train = np.array(
df_trainvalidate_energyconsumption[inputs])
array_inputs_test = np.array(df_test_energyconsumption[inputs])
# Build the model
tic = time.time()
model = MLPRegressor(hidden_layer_sizes=parameters["hidden_layers"],
activation=parameters["activation"],
solver=parameters["solver"],
max_iter=parameters["nbr_iteration"],
random_state=0)
model.fit(array_inputs_train, array_output_train)
duration_training = time.time() - tic
# Make the prediction
tic1 = time.time()
prediction = model.predict(array_inputs_test)
duration_prediction = time.time() - tic1
# Evaluate the model prediction
metrics = evaluation_model(array_output_test, prediction)
# Log in the console
if log:
print(f"Random forest regressor:")
print(parameters)
print(metrics)
# Log in mlflow (parameter)
mlflow.log_params(parameters)
# Log in mlflow (metrics)
metrics["duration_training"] = duration_training
metrics["duration_prediction"] = duration_prediction
mlflow.log_metrics(metrics)
# log in mlflow (model)
mlflow.sklearn.log_model(model, f"model")
# Tag the model
mlflow.set_tags(tags)
# In[15]:
for hiddenlayers in [4, 8, 16]:
for activation in [
"identity",
"logistic",
]:
for solver in ["lbfgs"]:
for nbriteration in [10, 100, 1000]:
for field in possible_inputs:
parameters = {
"hidden_layers": hiddenlayers,
"activation": activation,
"solver": solver,
"nbr_iteration": nbriteration
}
tags = {"model": "mlp", "inputs": field}
train_mlpmodel(parameters, possible_inputs[field], tags)
# # Use a handmade model (scipy approach)
# In[16]:
class PTG:
def __init__(self, thresholds_x0, thresholds_a, thresholds_b):
self.thresholds_x0 = thresholds_x0
self.thresholds_a = thresholds_a
self.thresholds_b = thresholds_b
def get_ptgmodel(self, x, a, b, x0):
return np.piecewise(x, [x < x0, x >= x0],
[lambda x: a * x + b, lambda x: a * x0 + b])
def fit(self, dfx, y):
x = np.array(dfx)
# Define the bounds
bounds_min = [thresholds_a[0], thresholds_b[0], thresholds_x0[0]]
bounds_max = [thresholds_a[1], thresholds_b[1], thresholds_x0[1]]
bounds = (bounds_min, bounds_max)
# Fit a model
popt, pcov = scipy.optimize.curve_fit(self.get_ptgmodel,
x,
y,
bounds=bounds)
# Get the parameter of the model
a = popt[0]
b = popt[1]
x0 = popt[2]
self.coefficients = [a, b, x0]
def predict(self, dfx):
x = np.array(dfx)
predictions = []
for elt in x:
forecast = self.get_ptgmodel(elt, self.coefficients[0],
self.coefficients[1],
self.coefficients[2])
predictions.append(forecast)
return np.array(predictions)
def train_ptgmodel(parameters, inputs, tags, log=False):
with mlflow.start_run(nested=True):
# Prepare the data
df_inputs_train = df_trainvalidate_energyconsumption[inputs[0]]
df_inputs_test = df_test_energyconsumption[inputs[0]]
# Build the model
tic = time.time()
model = PTG(parameters["thresholds_x0"], parameters["thresholds_a"],
parameters["thresholds_b"])
model.fit(df_inputs_train, array_output_train)
duration_training = time.time() - tic
# Make the prediction
tic1 = time.time()
prediction = model.predict(df_inputs_test)
duration_prediction = time.time() - tic1
# Evaluate the model prediction
metrics = evaluation_model(array_output_test, prediction)
# Log in the console
if log:
print(f"PTG:")
print(parameters)
print(metrics)
# Log in mlflow (parameter)
mlflow.log_params(parameters)
# Log in mlflow (metrics)
metrics["duration_training"] = duration_training
metrics["duration_prediction"] = duration_prediction
mlflow.log_metrics(metrics)
# log in mlflow (model)
mlflow.sklearn.log_model(model, f"model")
# Tag the model
mlflow.set_tags(tags)
# In[17]:
# Define the parameters of the model
thresholds_x0 = [0, 20]
thresholds_a = [-200000, -50000]
thresholds_b = [1000000, 3000000]
parameters = {
"thresholds_x0": thresholds_x0,
"thresholds_a": thresholds_a,
"thresholds_b": thresholds_b
}
for field in ["only_meanweather_inputs_avg", "only_meanweather_inputs_wavg"]:
tags = {"model": "ptg", "inputs": field}
train_ptgmodel(parameters, possible_inputs[field], tags, log=False)
# # Evaluate mlflow results
# In[18]:
# Select the run of the experiment
df_runs = mlflow.search_runs(experiment_ids="0")
print("Number of runs done : ", len(df_runs))
# In[19]:
# Quick sorting to get the best models based on the RMSE metric
df_runs.sort_values(["metrics.rmse"], ascending=True, inplace=True)
df_runs.head()
# In[20]:
# Get the best one
runid_selected = df_runs.head(1)["run_id"].values[0]
runid_selected
# In[ ]:
================================================
FILE: ch06/Metadata.ipynb
================================================
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Installation and imports"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already up-to-date: kfmd in ./.local/lib/python3.6/site-packages (0.1.8)\n",
"Requirement already up-to-date: pandas in ./.local/lib/python3.6/site-packages (1.0.1)\n",
"Requirement already satisfied, skipping upgrade: python-dateutil>=2.6.1 in /usr/local/lib/python3.6/dist-packages (from pandas) (2.8.1)\n",
"Requirement already satisfied, skipping upgrade: numpy>=1.13.3 in /usr/local/lib/python3.6/dist-packages (from pandas) (1.18.1)\n",
"Requirement already satisfied, skipping upgrade: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas) (2019.3)\n",
"Requirement already satisfied, skipping upgrade: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.6.1->pandas) (1.11.0)\n"
]
}
],
"source": [
"!pip install kfmd --upgrade --user\n",
"!pip install pandas --upgrade --user\n",
"\n",
"from kfmd import metadata\n",
"import pandas\n",
"from datetime import datetime\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create a workspace, run and execution"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"ws1 = metadata.Workspace(\n",
" # Connect to metadata-service in namesapce kubeflow in k8s cluster.\n",
" backend_url_prefix=\"metadata-service.kubeflow.svc.cluster.local:8080\",\n",
" name=\"ws1\",\n",
" description=\"a workspace for testing\",\n",
" labels={\"n1\": \"v1\"})\n",
"r = metadata.Run(\n",
" workspace=ws1,\n",
" name=\"run-\" + datetime.utcnow().isoformat(\"T\") ,\n",
" description=\"a run in ws_1\",\n",
")\n",
"exec = metadata.Execution(\n",
" name = \"execution\" + datetime.utcnow().isoformat(\"T\") ,\n",
" workspace=ws1,\n",
" run=r,\n",
" description=\"execution example\",\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Log data set, model and its evaluation"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"data_set = exec.log_input(\n",
" metadata.DataSet(\n",
" description=\"an example data\",\n",
" name=\"mytable-dump\",\n",
" owner=\"owner@my-company.org\",\n",
" uri=\"file://path/to/dataset\",\n",
" version=\"v1.0.0\",\n",
" query=\"SELECT * FROM mytable\"))\n",
"model = exec.log_output(\n",
" metadata.Model(\n",
" name=\"MNIST\",\n",
" description=\"model to recognize handwritten digits\",\n",
" owner=\"someone@kubeflow.org\",\n",
" uri=\"gcs://my-bucket/mnist\",\n",
" model_type=\"neural network\",\n",
" training_framework={\n",
" \"name\": \"tensorflow\",\n",
" \"version\": \"v1.0\"\n",
" },\n",
" hyperparameters={\n",
" \"learning_rate\": 0.5,\n",
" \"layers\": [10, 3, 1],\n",
" \"early_stop\": True\n",
" },\n",
" version=\"v0.0.1\",\n",
" labels={\"mylabel\": \"l1\"}))\n",
"metrics = exec.log_output(\n",
" metadata.Metrics(\n",
" name=\"MNIST-evaluation\",\n",
" description=\"validating the MNIST model to recognize handwritten digits\",\n",
" owner=\"someone@kubeflow.org\",\n",
" uri=\"gcs://my-bucket/mnist-eval.csv\",\n",
" data_set_id=data_set.id,\n",
" model_id=model.id,\n",
" metrics_type=metadata.Metrics.VALIDATION,\n",
" values={\"accuracy\": 0.95},\n",
" labels={\"mylabel\": \"l1\"}))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"List all the models in the workspace"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>workspace</th>\n",
" <th>run</th>\n",
" <th>create_time</th>\n",
" <th>description</th>\n",
" <th>model_type</th>\n",
" <th>name</th>\n",
" <th>owner</th>\n",
" <th>version</th>\n",
" <th>uri</th>\n",
" <th>training_framework</th>\n",
" <th>hyperparameters</th>\n",
" <th>labels</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>8</td>\n",
" <td>ws1</td>\n",
" <td>run-2020-02-18T00:48:10.734939</td>\n",
" <td>2020-02-18T00:48:13.273533Z</td>\n",
" <td>model to recognize handwritten digits</td>\n",
" <td>neural network</td>\n",
" <td>MNIST</td>\n",
" <td>someone@kubeflow.org</td>\n",
" <td>v0.0.1</td>\n",
" <td>gcs://my-bucket/mnist</td>\n",
" <td>{'name': 'tensorflow', 'version': 'v1.0'}</td>\n",
" <td>{'learning_rate': 0.5, 'layers': [10, 3, 1], '...</td>\n",
" <td>{'mylabel': 'l1'}</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id workspace run create_time \\\n",
"0 8 ws1 run-2020-02-18T00:48:10.734939 2020-02-18T00:48:13.273533Z \n",
"\n",
" description model_type name \\\n",
"0 model to recognize handwritten digits neural network MNIST \n",
"\n",
" owner version uri \\\n",
"0 someone@kubeflow.org v0.0.1 gcs://my-bucket/mnist \n",
"\n",
" training_framework \\\n",
"0 {'name': 'tensorflow', 'version': 'v1.0'} \n",
"\n",
" hyperparameters labels \n",
"0 {'learning_rate': 0.5, 'layers': [10, 3, 1], '... {'mylabel': 'l1'} "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pandas.DataFrame.from_dict(ws1.list(metadata.Model.ARTIFACT_TYPE_NAME))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Get basic lineage"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"model id is 8\n",
"\n"
]
}
],
"source": [
"print(\"model id is %s\\n\" % model.id)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Find the execution that produces this model."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3\n"
]
}
],
"source": [
"output_events = ws1.client.list_events2(model.id).events\n",
"assert len(output_events) == 1\n",
"execution_id = output_events[0].execution_id\n",
"print(execution_id)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Find all events related to that execution."
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"All events related to this model:\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>artifact_id</th>\n",
" <th>execution_id</th>\n",
" <th>path</th>\n",
" <th>type</th>\n",
" <th>milliseconds_since_epoch</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7</td>\n",
" <td>3</td>\n",
" <td>None</td>\n",
" <td>INPUT</td>\n",
" <td>1581986893248</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>8</td>\n",
" <td>3</td>\n",
" <td>None</td>\n",
" <td>OUTPUT</td>\n",
" <td>1581986893273</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>9</td>\n",
" <td>3</td>\n",
" <td>None</td>\n",
" <td>OUTPUT</td>\n",
" <td>1581986893298</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" artifact_id execution_id path type milliseconds_since_epoch\n",
"0 7 3 None INPUT 1581986893248\n",
"1 8 3 None OUTPUT 1581986893273\n",
"2 9 3 None OUTPUT 1581986893298"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"all_events = ws1.client.list_events(execution_id).events\n",
"assert len(all_events) == 3\n",
"print(\"\\nAll events related to this model:\")\n",
"pandas.DataFrame.from_dict([e.to_dict() for e in all_events])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
================================================
FILE: ch06/Metadata.py
================================================
#!/usr/bin/env python
# coding: utf-8
# # Installation and imports
# In[1]:
get_ipython().system('pip install kfmd --upgrade --user')
get_ipython().system('pip install pandas --upgrade --user')
from kfmd import metadata
import pandas
from datetime import datetime
# Create a workspace, run and execution
# In[2]:
ws1 = metadata.Workspace(
# Connect to metadata-service in namesapce kubeflow in k8s cluster.
backend_url_prefix="metadata-service.kubeflow.svc.cluster.local:8080",
name="ws1",
description="a workspace for testing",
labels={"n1": "v1"})
r = metadata.Run(
workspace=ws1,
name="run-" + datetime.utcnow().isoformat("T"),
description="a run in ws_1",
)
exec = metadata.Execution(
name="execution" + datetime.utcnow().isoformat("T"),
workspace=ws1,
run=r,
description="execution example",
)
# Log data set, model and its evaluation
# In[3]:
data_set = exec.log_input(
metadata.DataSet(description="an example data",
name="mytable-dump",
owner="owner@my-company.org",
uri="file://path/to/dataset",
version="v1.0.0",
query="SELECT * FROM mytable"))
model = exec.log_output(
metadata.Model(name="MNIST",
description="model to recognize handwritten digits",
owner="someone@kubeflow.org",
uri="gcs://my-bucket/mnist",
model_type="neural network",
training_framework={
"name": "tensorflow",
"version": "v1.0"
},
hyperparameters={
"learning_rate": 0.5,
"layers": [10, 3, 1],
"early_stop": True
},
version="v0.0.1",
labels={"mylabel": "l1"}))
metrics = exec.log_output(
metadata.Metrics(
name="MNIST-evaluation",
description=
"validating the MNIST model to recognize handwritten digits",
owner="someone@kubeflow.org",
uri="gcs://my-bucket/mnist-eval.csv",
data_set_id=data_set.id,
model_id=model.id,
metrics_type=metadata.Metrics.VALIDATION,
values={"accuracy": 0.95},
labels={"mylabel": "l1"}))
# List all the models in the workspace
# In[4]:
pandas.DataFrame.from_dict(ws1.list(metadata.Model.ARTIFACT_TYPE_NAME))
# Get basic lineage
# In[5]:
print("model id is %s\n" % model.id)
# Find the execution that produces this model.
# In[6]:
output_events = ws1.client.list_events2(model.id).events
assert len(output_events) == 1
execution_id = output_events[0].execution_id
print(execution_id)
# Find all events related to that execution.
# In[7]:
all_events = ws1.client.list_events(execution_id).events
assert len(all_events) == 3
print("\nAll events related to this model:")
pandas.DataFrame.from_dict([e.to_dict() for e in all_events])
# In[ ]:
================================================
FILE: ch06/docker/Dockerfile
================================================
# from https://github.com/flmu/mlflow-tracking-server
FROM python:3.7
RUN pip3 install --upgrade pip && \
pip3 install mlflow --upgrade && \
pip3 install awscli --upgrade && \
pip3 install boto3 --upgrade
ENV PORT 5000
ENV AWS_BUCKET bucket
ENV AWS_ACCESS_KEY_ID aws_id
ENV AWS_SECRET_ACCESS_KEY aws_key
ENV FILE_DIR /tmp/mlflow
RUN mkdir -p /opt/mlflow
COPY run.sh /opt/mlflow
RUN chmod -R 777 /opt/mlflow/
ENTRYPOINT ["/opt/mlflow/run.sh"]
================================================
FILE: ch06/docker/build.sh
================================================
#!/bin/bash
img='lightbend/mlflow'
tag='0.1'
docker build -t $img:$tag .
================================================
FILE: ch06/docker/run.sh
================================================
#!/bin/sh
set -e
if [ -z "${AWS_BUCKET}" ]; then
echo >&2 "AWS_BUCKET must be set"
exit 1
fi
if [ -z "${AWS_ACCESS_KEY_ID}" ]; then
echo >&2 "AWS_ACCESS_KEY_ID must be set"
exit 1
fi
if [ -z "${AWS_SECRET_ACCESS_KEY}" ]; then
echo >&2 "AWS_SECRET_ACCESS_KEY must be set"
exit 1
fi
mkdir -p "${FILE_DIR}"
mlflow server \
--backend-store-uri "file://$FILE_DIR" \
--default-artifact-root "s3://$AWS_BUCKET/mlflow/artifacts" \
--host 0.0.0.0 \
--port "$PORT"
================================================
FILE: ch06/install/mlflowchart/.helmignore
================================================
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*~
# Various IDEs
.project
.idea/
*.tmproj
================================================
FILE: ch06/install/mlflowchart/Chart.yaml
================================================
apiVersion: v1
appVersion: 0.1
description: MLFlow
maintainers:
- name: Boris Lublinsky
name: MLFLOW tracking server
version: 0.1
================================================
FILE: ch06/install/mlflowchart/templates/NOTES.txt
================================================
ML Flow tracking server is installed
================================================
FILE: ch06/install/mlflowchart/templates/_helpers.tpl
================================================
{{/* vim: set filetype=mustache: */}}
{{/*
Expand the name of the chart.
*/}}
{{- define "modelserverchart.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
*/}}
{{- define "modelserverchart.fullname" -}}
{{- $name := default .Chart.Name .Values.nameOverride -}}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
{{- end -}}
================================================
FILE: ch06/install/mlflowchart/templates/mlflow.yaml
================================================
apiVersion: apps/v1
kind: Deployment
metadata:
namespace: kubeflow
name: mlflowserver
labels:
app: mlflowserver
spec:
replicas: 1
selector:
matchLabels:
app: mlflowserver
strategy:
type: RollingUpdate
template:
metadata:
labels:
app: mlflowserver
spec:
containers:
- name: server
image: "{{ .Values.image.server }}:{{ .Values.image.version }}"
imagePullPolicy: "{{ .Values.image.pullPolicy }}"
ports:
- containerPort: 5000
name: serving
protocol: TCP
env:
- name: "MLFLOW_S3_ENDPOINT_URL"
value: "http://minio-service.kubeflow.svc.cluster.local:9000"
- name: "AWS_ACCESS_KEY_ID"
valueFrom: { secretKeyRef: { name: "minioaccess", key: "AWS_ACCESS_KEY_ID" } }
- name: "AWS_SECRET_ACCESS_KEY"
valueFrom: { secretKeyRef: { name: "minioaccess", key: "AWS_SECRET_ACCESS_KEY" } }
- name: "AWS_BUCKET"
value: "mlflow"
volumes:
- name: secret-volume
secret:
secretName: minioaccess
---
apiVersion: v1
kind: Service
metadata:
namespace: kubeflow
name: mlflowserver
spec:
selector:
app: mlflowserver
ports:
- protocol: TCP
port: 5000
targetPort: 5000
---
apiVersion: networking.istio.io/v1alpha3
kind: VirtualService
metadata:
name: mlflow-server
namespace: kubeflow
spec:
gateways:
- kubeflow-gateway
hosts:
- '*'
http:
- match:
- uri:
prefix: /mlflow/
rewrite:
uri: /
route:
- destination:
host: mlflowserver.kubeflow.svc.cluster.local
port:
number: 5000
================================================
FILE: ch06/install/mlflowchart/values.yaml
================================================
# application name is a namespace
# docker images
image:
server: lightbend/mlflow
pullPolicy: Always
version: 0.1
================================================
FILE: ch10/experiment.yaml
================================================
Name: random-example
Namespace: kubeflow
Labels: controller-tools.k8s.io=1.0
Annotations: <none>
API Version: kubeflow.org/v1alpha3
Kind: Experiment
Metadata:
Creation Timestamp: 2019-12-22T22:53:25Z
Finalizers:
update-prometheus-metrics
Generation: 2
Resource Version: 720692
Self Link: /apis/kubeflow.org/v1alpha3/namespaces/kubeflow/experiments/random-example
UID: dc6bc15a-250d-11ea-8cae-42010a80010f
Spec:
Algorithm:
Algorithm Name: random
Algorithm Settings: <nil>
Max Failed Trial Count: 3
Max Trial Count: 12
Metrics Collector Spec:
Collector:
Kind: StdOut
Objective:
Additional Metric Names:
accuracy
Goal: 0.99
Objective Metric Name: Validation-accuracy
Type: maximize
Parallel Trial Count: 3
Parameters:
Feasible Space:
Max: 0.03
Min: 0.01
Name: --lr
Parameter Type: double
Feasible Space:
Max: 5
Min: 2
Name: --num-layers
Parameter Type: int
Feasible Space:
List:
sgd
adam
ftrl
Name: --optimizer
Parameter Type: categorical
Trial Template:
Go Template:
Raw Template: apiVersion: batch/v1
kind: Job
metadata:
name: {{.Trial}}
namespace: {{.NameSpace}}
spec:
template:
spec:
containers:
- name: {{.Trial}}
image: docker.io/kubeflowkatib/mxnet-mnist-example
command:
- "python"
- "/mxnet/example/image-classification/train_mnist.py"
- "--batch-size=64"
{{- with .HyperParameters}}
{{- range .}}
- "{{.Name}}={{.Value}}"
{{- end}}
{{- end}}
restartPolicy: Never
Status:
Conditions:
Last Transition Time: 2019-12-22T22:53:25Z
Last Update Time: 2019-12-22T22:53:25Z
Message: Experiment is created
Reason: ExperimentCreated
Status: True
Type: Created
Last Transition Time: 2019-12-22T22:55:10Z
Last Update Time: 2019-12-22T22:55:10Z
Message: Experiment is running
Reason: ExperimentRunning
Status: True
Type: Running
Current Optimal Trial:
Observation:
Metrics:
Name: Validation-accuracy
Value: 0.981091
Parameter Assignments:
Name: --lr
Value: 0.025139701133432946
Name: --num-layers
Value: 4
Name: --optimizer
Value: sgd
Start Time: 2019-12-22T22:53:25Z
Trials: 12
Trials Running: 2
Trials Succeeded: 10
Events: <none>Type something here!
================================================
FILE: ch10/hptuning.py
================================================
# Initialize search space
# Initialize model
while not objective_reached and not bugdget_exhausted:
# Obtain new hyperparameters
suggestion = GetSuggestions()
# Run trial with new hyperparameters; collect metrics
metrics = RunTrial(suggestion)
# Report metrics
Report(metrics)
================================================
FILE: ch10/random.yaml
================================================
apiVersion: "kubeflow.org/v1alpha3"
kind: Experiment
metadata:
namespace: kubeflow
labels:
controller-tools.k8s.io: "1.0"
name: random-example
spec:
objective:
type: maximize
goal: 0.99
objectiveMetricName: Validation-accuracy
additionalMetricNames:
- Train-accuracy
algorithm:
algorithmName: random
parallelTrialCount: 3
maxTrialCount: 12
maxFailedTrialCount: 3
parameters:
- name: --lr
parameterType: double
feasibleSpace:
min: "0.01"
max: "0.03"
- name: --num-layers
parameterType: int
feasibleSpace:
min: "2"
max: "5"
- name: --optimizer
parameterType: categorical
feasibleSpace:
list:
- sgd
- adam
- ftrl
trialTemplate:
goTemplate:
rawTemplate: |-
apiVersion: batch/v1
kind: Job
metadata:
name: {{.Trial}}
namespace: {{.NameSpace}}
spec:
template:
spec:
containers:
- name: {{.Trial}}
image: docker.io/kubeflowkatib/mxnet-mnist
command:
- "python3"
- "/opt/mxnet-mnist/mnist.py"
- "--batch-size=64"
{{- with .HyperParameters}}
{{- range .}}
- "{{.Name}}={{.Value}}"
{{- end}}
{{- end}}
restartPolicy: NeverType something here!
================================================
FILE: ch2/Dockerfile
================================================
FROM gcr.io/kubeflow-images-public/tensorflow-2.1.0-notebook-cpu:1.0.0
================================================
FILE: ch2/build-and-push.sh
================================================
#!/bin/bash
#tag::buildandpush[]
IMAGE="${CONTAINER_REGISTRY}/kubeflow/test:v1"
docker build -t "${IMAGE}" -f Dockerfile .
docker push "${IMAGE}"
#end::buildandpush[]
================================================
FILE: ch2/query-endpoint.py
================================================
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#tag::scriptSetup[]
import requests
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
from matplotlib import pyplot as plt
def download_mnist():
return input_data.read_data_sets("MNIST_data/", one_hot=True)
def gen_image(arr):
two_d = (np.reshape(arr, (28, 28)) * 255).astype(np.uint8)
plt.imshow(two_d, cmap=plt.cm.gray_r, interpolation='nearest')
return plt
#end::scriptSetup[]
AMBASSADOR_API_IP = "10.53.148.167:30134"
#tag::scriptGuts[]
mnist = download_mnist()
batch_xs, batch_ys = mnist.train.next_batch(1)
chosen = 0
gen_image(batch_xs[chosen]).show()
data = batch_xs[chosen].reshape((1, 784))
features = ["X" + str(i + 1) for i in range(0, 784)]
request = {"data": {"names": features, "ndarray": data.tolist()}}
deploymentName = "mnist-classifier"
uri = "http://" + AMBASSADOR_API_IP + "/seldon/" + \
deploymentName + "/api/v0.1/predictions"
response = requests.post(uri, json=request)
#end::scriptGuts[]
print(response.status_code)
================================================
FILE: ch2_seldon_examples/pipeline_role.yaml
================================================
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
namespace: kubeflow
name: pipeline-runner
rules:
- apiGroups: ["machinelearning.seldon.io"]
resources: ["seldondeployments"]
verbs: ["*"]
================================================
FILE: ch2_seldon_examples/pipeline_rolebinding.yaml
================================================
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: pipeline-runner
namespace: kubeflow
subjects:
- kind: ServiceAccount
name: pipeline-runner
namespace: kubeflow
roleRef:
kind: Role
name: pipeline-runner
apiGroup: rbac.authorization.k8s.io
================================================
FILE: ch2_seldon_examples/pv-claim.yaml
================================================
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: "nfs-1"
spec:
storageClassName: manual
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 3Gi
================================================
FILE: ch2_seldon_examples/pv-volume.yaml
================================================
kind: PersistentVolume
apiVersion: v1
metadata:
name: task-pv-volume
labels:
type: local
spec:
storageClassName: manual
capacity:
storage: 10Gi
accessModes:
- ReadWriteOnce
hostPath:
path: "/mnt/data"
================================================
FILE: ch2_seldon_examples/request_example.ipynb
================================================
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting matplotlib\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/57/4f/dd381ecf6c6ab9bcdaa8ea912e866dedc6e696756156d8ecc087e20817e2/matplotlib-3.1.1-cp36-cp36m-manylinux1_x86_64.whl (13.1MB)\n",
"\u001b[K 100% |████████████████████████████████| 13.1MB 2.7MB/s eta 0:00:01\n",
"\u001b[?25hRequirement already satisfied: python-dateutil>=2.1 in /opt/conda/lib/python3.6/site-packages (from matplotlib) (2.8.0)\n",
"Collecting cycler>=0.10 (from matplotlib)\n",
" Downloading https://files.pythonhosted.org/packages/f7/d2/e07d3ebb2bd7af696440ce7e754c59dd546ffe1bbe732c8ab68b9c834e61/cycler-0.10.0-py2.py3-none-any.whl\n",
"Collecting kiwisolver>=1.0.1 (from matplotlib)\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/f8/a1/5742b56282449b1c0968197f63eae486eca2c35dcd334bab75ad524e0de1/kiwisolver-1.1.0-cp36-cp36m-manylinux1_x86_64.whl (90kB)\n",
"\u001b[K 100% |████████████████████████████████| 92kB 32.5MB/s ta 0:00:01\n",
"\u001b[?25hCollecting pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 (from matplotlib)\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/11/fa/0160cd525c62d7abd076a070ff02b2b94de589f1a9789774f17d7c54058e/pyparsing-2.4.2-py2.py3-none-any.whl (65kB)\n",
"\u001b[K 100% |████████████████████████████████| 71kB 25.6MB/s ta 0:00:01\n",
"\u001b[?25hRequirement already satisfied: numpy>=1.11 in /opt/conda/lib/python3.6/site-packages (from matplotlib) (1.16.2)\n",
"Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.6/site-packages (from python-dateutil>=2.1->matplotlib) (1.12.0)\n",
"Requirement already satisfied: setuptools in /opt/conda/lib/python3.6/site-packages (from kiwisolver>=1.0.1->matplotlib) (40.9.0)\n",
"Installing collected packages: cycler, kiwisolver, pyparsing, matplotlib\n",
"Successfully installed cycler-0.10.0 kiwisolver-1.1.0 matplotlib-3.1.1 pyparsing-2.4.2\n",
"\u001b[33mYou are using pip version 19.0.1, however version 19.2.3 is available.\n",
"You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n"
]
}
],
"source": [
"!pip install matplotlib"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import numpy as np\n",
"\n",
"from tensorflow.examples.tutorials.mnist import input_data\n",
"from matplotlib import pyplot as plt\n",
"\n",
"\n",
"def download_mnist():\n",
" return input_data.read_data_sets(\"MNIST_data/\", one_hot = True)\n",
"\n",
"def gen_image(arr):\n",
" two_d = (np.reshape(arr, (28, 28)) * 255).astype(np.uint8)\n",
" plt.imshow(two_d,cmap=plt.cm.gray_r, interpolation='nearest')\n",
" return plt\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"WARNING:tensorflow:From <ipython-input-3-0613226129c0>:9: read_data_sets (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Please use alternatives such as official/mnist/dataset.py from tensorflow/models.\n",
"WARNING:tensorflow:From /opt/conda/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:260: maybe_download (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Please write your own downloading logic.\n",
"WARNING:tensorflow:From /opt/conda/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/base.py:252: _internal_retry.<locals>.wrap.<locals>.wrapped_fn (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Please use urllib or similar directly.\n",
"Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.\n",
"WARNING:tensorflow:From /opt/conda/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:262: extract_images (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Please use tf.data to implement this functionality.\n",
"Extracting MNIST_data/train-images-idx3-ubyte.gz\n",
"Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.\n",
"WARNING:tensorflow:From /opt/conda/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:267: extract_labels (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Please use tf.data to implement this functionality.\n",
"Extracting MNIST_data/train-labels-idx1-ubyte.gz\n",
"WARNING:tensorflow:From /opt/conda/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:110: dense_to_one_hot (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Please use tf.one_hot on tensors.\n",
"Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.\n",
"Extracting MNIST_data/t10k-images-idx3-ubyte.gz\n",
"Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.\n",
"Extracting MNIST_data/t10k-labels-idx1-ubyte.gz\n",
"WARNING:tensorflow:From /opt/conda/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:290: DataSet.__init__ (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Please use alternatives such as official/mnist/dataset.py from tensorflow/models.\n"
]
}
],
"source": [
"mnist = download_mnist()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAPsAAAD4CAYAAAAq5pAIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAANMklEQVR4nO3dXaxV9ZnH8d9Ppr0REmE4ORDAgakYo2Ok5EhMahonZIgvIdgYTblATMxQXyCtaeIYJ1ovvMAJ0BQzklAlpaRDbWwVYohTB5uY3hCPBoUjaX0JBghyDhqiqFiVZy7Osjni2Wsf9lr7RZ7vJznZe69nrb2erPBj7b3+e++/I0IAzn3ndbsBAJ1B2IEkCDuQBGEHkiDsQBL/0MmdTZ8+PebOndvJXQKpHDx4UMePH/d4tUpht32tpF9ImiTp8YhYW7b+3LlzNTg4WGWXAEoMDAw0rLX8Mt72JEn/Lek6SZdKWm770lafD0B7VXnPvkjSmxHxdkT8TdJvJS2rpy0AdasS9lmSDo15fLhY9hW2V9ketD04MjJSYXcAqmj71fiI2BwRAxEx0NfX1+7dAWigStiPSJoz5vHsYhmAHlQl7C9Jmm97nu1vS/qhpJ31tAWgbi0PvUXE57ZXS/pfjQ69bYmIodo6A1CrSuPsEbFL0q6aegHQRnxcFkiCsANJEHYgCcIOJEHYgSQIO5AEYQeSIOxAEoQdSIKwA0kQdiAJwg4kQdiBJAg7kARhB5Ig7EAShB1IgrADSRB2IAnCDiRB2IEkCDuQBGEHkiDsQBKEHUiCsANJEHYgCcIOJEHYgSQIO5BEpSmbbR+U9KGkLyR9HhEDdTQFoH6Vwl7414g4XsPzAGgjXsYDSVQNe0j6o+2Xba8abwXbq2wP2h4cGRmpuDsAraoa9qsjYqGk6yTdbfv7Z64QEZsjYiAiBvr6+iruDkCrKoU9Io4Ut8OSnpa0qI6mANSv5bDbPt/2lC/vS1oiaX9djQGoV5Wr8f2Snrb95fP8T0Q8V0tXAGrXctgj4m1JV9TYC4A2YugNSIKwA0kQdiAJwg4kQdiBJOr4Igy+wbZt21Za/+STTzrUydl74IEHSuvr1q1rWFuxYkXd7fQ8zuxAEoQdSIKwA0kQdiAJwg4kQdiBJAg7kATj7OeANWvWNKzt2bOndNu9e/eW1j/77LPSen9/f8vbnz59unTbEydOlNabKb5+jQJndiAJwg4kQdiBJAg7kARhB5Ig7EAShB1IgnH2Djh27Fhp/eGHH670/Dt27GhYO3ToUKXnvuOOO0rrt912W2n9o48+aljbtGlT6bZPPfVUaX3+/Pml9SuvvLK0ng1ndiAJwg4kQdiBJAg7kARhB5Ig7EAShB1IgnH2Grz11lul9WXLlpXWh4aGKu1/8uTJDWu33npr6bbr168vrU+bNq20ft555eeLxx9/vGFtcHCwdNvLLrustP7cc+UzhM+ePbu0nk3TM7vtLbaHbe8fs2ya7edtv1HcTm1vmwCqmsjL+F9JuvaMZfdJ2h0R8yXtLh4D6GFNwx4RL0p6/4zFyyRtLe5vlXRjzX0BqFmrF+j6I+Jocf9dSQ1/iMz2KtuDtgdHRkZa3B2AqipfjY+IkBQl9c0RMRARA319fVV3B6BFrYb9mO2ZklTcDtfXEoB2aDXsOyWtLO6vlNT4O5YAekLTcXbb2yVdI2m67cOSfiZpraTf2b5d0juSbmlnk72ubJxbkmbNmlVarzrO/sgjjzSs3XXXXZWeu5n33nuvtL5hw4aGtZMnT5Zue/PNN5fWGUc/O03DHhHLG5QW19wLgDbi47JAEoQdSIKwA0kQdiAJwg4kwVdca9Bs2uKNGzeW1i+55JJK+2/2k8rt9Nhjj5XWDxw40LDW7Ou3N9xwQ0s9YXyc2YEkCDuQBGEHkiDsQBKEHUiCsANJEHYgCcbZO+Ciiy4qra9Zs6a0/uijj9bZzlk5depUab3Z13NnzJjRsHbnnXeWbnvVVVeV1nF2OLMDSRB2IAnCDiRB2IEkCDuQBGEHkiDsQBKMs3fApEmTSuurV68urS9durS0vnDhwrPuaaJOnDhRWn/yySdL60uWLGlYYxy9szizA0kQdiAJwg4kQdiBJAg7kARhB5Ig7EASjLP3gIsvvrhSvZ2eeeaZru0b9Wp6Zre9xfaw7f1jlj1k+4jtvcXf9e1tE0BVE3kZ/ytJ146z/OcRsaD421VvWwDq1jTsEfGipPc70AuANqpygW617deKl/lTG61ke5XtQduDIyMjFXYHoIpWw75J0nckLZB0VNL6RitGxOaIGIiIgb6+vhZ3B6CqlsIeEcci4ouIOC3pl5IW1dsWgLq1FHbbM8c8/IGk/Y3WBdAbmo6z294u6RpJ020flvQzSdfYXiApJB2U9KM29ogu2rWrfKDlnnvuKa0/+OCDdbaDCpqGPSKWj7P4iTb0AqCN+LgskARhB5Ig7EAShB1IgrADSfAV1+SGh4dL659++mlpffLkyaX1Cy644Kx7QntwZgeSIOxAEoQdSIKwA0kQdiAJwg4kQdiBJBhnT+7yyy8vrX/88cel9XvvvbfOdtBGnNmBJAg7kARhB5Ig7EAShB1IgrADSRB2IAnG2VGq2ffVFy9e3KFOUBVndiAJwg4kQdiBJAg7kARhB5Ig7EAShB1IgnH2c9z27dtL6x988EFpfcaMGXW2gy5qema3Pcf2n2y/bnvI9o+L5dNsP2/7jeJ2avvbBdCqibyM/1zSTyPiUklXSbrb9qWS7pO0OyLmS9pdPAbQo5qGPSKORsQrxf0PJR2QNEvSMklbi9W2SrqxXU0CqO6sLtDZnivpu5L2SOqPiKNF6V1J/Q22WWV70PbgyMhIhVYBVDHhsNueLOn3kn4SEV+5qhMRISnG2y4iNkfEQEQM9PX1VWoWQOsmFHbb39Jo0H8TEX8oFh+zPbOoz5RUPh0ogK5qOvRm25KekHQgIjaMKe2UtFLS2uJ2R1s6RCVDQ0Ol9VOnTpXWN27cWGc76KKJjLN/T9IKSfts7y2W3a/RkP/O9u2S3pF0S3taBFCHpmGPiD9LcoMyv1wAfEPwcVkgCcIOJEHYgSQIO5AEYQeSIOxAEoQdSIKwA0kQdiAJwg4kQdiBJAg7kARhB5Lgp6TPAWvXrm1Y27lzZ+m2CxYsKK1fccUVLfWE3sOZHUiCsANJEHYgCcIOJEHYgSQIO5AEYQeSYJz9HPDCCy80rO3bt6/Sc7/66qul9QsvvLDS86NzOLMDSRB2IAnCDiRB2IEkCDuQBGEHkiDsQBITmZ99jqRfS+qXFJI2R8QvbD8k6d8ljRSr3h8Ru9rVKBqbN29e255727ZtpfWlS5e2bd+o10Q+VPO5pJ9GxCu2p0h62fbzRe3nEbGufe0BqMtE5mc/Kulocf9D2wckzWp3YwDqdVbv2W3PlfRdSXuKRattv2Z7i+2pDbZZZXvQ9uDIyMh4qwDogAmH3fZkSb+X9JOI+EDSJknfkbRAo2f+9eNtFxGbI2IgIgb6+vpqaBlAKyYUdtvf0mjQfxMRf5CkiDgWEV9ExGlJv5S0qH1tAqiqadhtW9ITkg5ExIYxy2eOWe0HkvbX3x6Aukzkavz3JK2QtM/23mLZ/ZKW216g0eG4g5J+1JYO0dS6dY0HRI4cOVK67eLFi0vrN910U0s9ofdM5Gr8nyV5nBJj6sA3CJ+gA5Ig7EAShB1IgrADSRB2IAnCDiTBT0mfA6ZMmdKw9uyzz3awE/QyzuxAEoQdSIKwA0kQdiAJwg4kQdiBJAg7kIQjonM7s0ckvTNm0XRJxzvWwNnp1d56tS+J3lpVZ2//FBHj/v5bR8P+tZ3bgxEx0LUGSvRqb73al0RvrepUb7yMB5Ig7EAS3Q775i7vv0yv9tarfUn01qqO9NbV9+wAOqfbZ3YAHULYgSS6Enbb19r+i+03bd/XjR4asX3Q9j7be20PdrmXLbaHbe8fs2ya7edtv1HcjjvHXpd6e8j2keLY7bV9fZd6m2P7T7Zftz1k+8fF8q4eu5K+OnLcOv6e3fYkSX+V9G+SDkt6SdLyiHi9o400YPugpIGI6PoHMGx/X9JJSb+OiH8plv2XpPcjYm3xH+XUiPiPHuntIUknuz2NdzFb0cyx04xLulHSberisSvp6xZ14Lh148y+SNKbEfF2RPxN0m8lLetCHz0vIl6U9P4Zi5dJ2lrc36rRfywd16C3nhARRyPileL+h5K+nGa8q8eupK+O6EbYZ0k6NObxYfXWfO8h6Y+2X7a9qtvNjKM/Io4W99+V1N/NZsbRdBrvTjpjmvGeOXatTH9eFRfovu7qiFgo6TpJdxcvV3tSjL4H66Wx0wlN490p40wz/nfdPHatTn9eVTfCfkTSnDGPZxfLekJEHCluhyU9rd6bivrYlzPoFrfDXe7n73ppGu/xphlXDxy7bk5/3o2wvyRpvu15tr8t6YeSdnahj6+xfX5x4US2z5e0RL03FfVOSSuL+ysl7ehiL1/RK9N4N5pmXF0+dl2f/jwiOv4n6XqNXpF/S9J/dqOHBn39s6RXi7+hbvcmabtGX9Z9ptFrG7dL+kdJuyW9Ien/JE3rod62Sdon6TWNBmtml3q7WqMv0V+TtLf4u77bx66kr44cNz4uCyTBBTogCcIOJEHYgSQIO5AEYQeSIOxAEoQdSOL/AQe88PwDu2A0AAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"401\n"
]
}
],
"source": [
"batch_xs, batch_ys = mnist.train.next_batch(1)\n",
"chosen=0\n",
"gen_image(batch_xs[chosen]).show()\n",
"data = batch_xs[chosen].reshape((1,784))\n",
"features = [\"X\"+str(i+1) for i in range (0,784)]\n",
"request = {\"data\":{\"names\":features,\"ndarray\":data.tolist()}}\n",
"deploymentName = \"mnist-classifier\"\n",
"uri = \"http://istio-ingressgateway.istio-system.svc.cluster.local/seldon/\"+deploymentName+\"/api/v0.1/predictions\"\n",
"\n",
"response = requests.post(\n",
" uri,\n",
" json=request)\n",
"\n",
"print(response.status_code)\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Origin authentication failed.\n"
]
}
],
"source": [
"print(response.text)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
================================================
FILE: ch2_seldon_examples/run_example.sh
================================================
#!/bin/bash
#tag::buildPipeline[]
dsl-compile --py train_pipeline.py --output job.yaml
#end::buildPipeline[]
#tag::connectToWebUI[]
# If you're on minikube and not using a loadbalancer:
minikube service --url -n istio-system istio-ingressgateway
# If your on GCP https://<kf_app_name>.endpoints.<gcp_project_name>.cloud.goog/
# If you're on vanilla K8s
INGRESS_HOST=$(kubectl -n istio-system get service istio-ingressgateway \
-o jsonpath='{.status.loadBalancer.ingress[0].ip}')
export INGRESS_HOST
INGRESS_PORT=$(kubectl -n istio-system get service istio-ingressgateway \
-o jsonpath='{.spec.ports[?(@.name=="http2")].port}')
export INGRESS_PORT
SECURE_INGRESS_PORT=$(kubectl -n istio-system get service istio-ingressgateway \
-o jsonpath='{.spec.ports[?(@.name=="https")].port}')
export SECURE_INGRESS_PORT
kubectl get svc istio-ingressgateway -n istio-system
#end::connectToWebUI[]
================================================
FILE: ch2_seldon_examples/setup_example.sh
================================================
#!/bin/bash
set -ex
echo "Setting up example"
unset ch2_example_path
ch2_example_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
echo "Using path ${ch2_example_path} for our example path"
example_path=$(dirname "${ch2_example_path}")
#tag::generate_kf_app_p1[]
# Pick the correct config file for your platform from
# https://github.com/kubeflow/manifests/tree/[version]/kfdef
# You can download & edit the configuration at this point if you need to.
# For generic k8s with istio:
MANIFEST_BRANCH=${MANIFEST_BRANCH:-v1.0-branch}
export MANIFEST_BRANCH
MANIFEST_VERSION=${MANIFEST_VERSION:-v1.0.1}
export MANIFEST_VERSION
KF_PROJECT_NAME=${KF_PROJECT_NAME:-hello-kf-${PLATFORM}}
export KF_PROJECT_NAME
mkdir "${KF_PROJECT_NAME}"
pushd "${KF_PROJECT_NAME}"
manifest_root=https://raw.githubusercontent.com/kubeflow/manifests/
# On most enviroments this will create a "vanilla" kubeflow install using istio.
KFDEF=${manifest_root}${MANIFEST_BRANCH}/kfdef/kfctl_k8s_istio.${MANIFEST_VERSION}.yaml
#end::generate_kf_app_p1[]
# On GCP this will create a cluster with basic authentication
if [ "$PLATFORM" == "gcp" ]; then
KFDEF=${manifest_root}${MANIFEST_BRANCH}/kfdef/kfctl_gcp_iap.${MANIFEST_VERSION}.yaml
# Temp hack
cp "${example_path}/kfctl_gcp_iap.v1.0.1.yaml" ./
KFDEF=./kfctl_gcp_iap.v1.0.1.yaml
# Set up IAP
# TODO(holden)
# Set up environment variables for GCP
export PROJECT=${PROJECT:-"<your GCP project name>"}
gcloud config set project "${PROJECT}"
export ZONE=${ZONE:-"<your GCP zone>"}
gcloud config set compute/zone "${ZONE}"
fi
pwd
#tag::generate_kf_app_p2[]
kfctl apply -f $KFDEF -V
echo $?
popd
#end::generate_kf_app_p2[]
# TODO(trevor): what version/tag?
#tag::cloneSeldonExample[]
# Clone the base seldon example
git clone https://github.com/kubeflow/example-seldon
#end::cloneSeldonExample[]
================================================
FILE: ch2_seldon_examples/tf_mnist_no_seldon_pipeline.py
================================================
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Kubeflow Pipelines MNIST example
Run this script to compile pipeline
"""
import kfp.dsl as dsl
import kfp.gcp as gcp
import kfp.onprem as onprem
gcs_or_pvc = 'PVC'
@dsl.pipeline(name='MNIST',
description='A pipeline to train and serve the MNIST example.')
def mnist_pipeline(gcs_bucket=None,
train_steps='200',
learning_rate='0.01',
batch_size='100'):
"""
Pipeline with three stages:
1. train an MNIST classifier
2. deploy a tf-serving instance to the cluster
3. deploy a web-ui to interact with it
"""
vop = None
volume = None
if gcs_or_pvc == "PVC":
vop = dsl.VolumeOp(name="create_pvc",
resource_name="nfs-1",
modes=dsl.VOLUME_MODE_RWO,
size="10G")
volume = vop.volume
train = dsl.ContainerOp(
name='train',
image=
'gcr.io/kubeflow-examples/mnist/model:v20190304-v0.2-176-g15d997b',
arguments=[
"/opt/model.py", "--tf-export-dir", gcs_bucket or "/mnt",
"--tf-train-steps", train_steps, "--tf-batch-size", batch_size,
"--tf-learning-rate", learning_rate
])
serve_args = [
'--model-export-path', gcs_bucket or "/mnt", '--server-name',
"mnist-service"
]
if gcs_or_pvc != 'GCS':
serve_args.extend(
['--cluster-name', "mnist-pipeline", '--pvc-name', volume])
serve = dsl.ContainerOp(
name='serve',
image='gcr.io/ml-pipeline/ml-pipeline-kubeflow-deployer:'
'7775692adf28d6f79098e76e839986c9ee55dd61',
arguments=serve_args)
serve.after(train)
webui_args = [
'--image', 'gcr.io/kubeflow-examples/mnist/web-ui:'
'v20190304-v0.2-176-g15d997b-pipelines', '--name', 'web-ui',
'--container-port', '5000', '--service-port', '80', '--service-type',
"LoadBalancer"
]
web_ui = dsl.ContainerOp(
name='web-ui',
image='gcr.io/kubeflow-examples/mnist/deploy-service:latest',
arguments=webui_args)
web_ui.after(serve)
steps = [train, serve, web_ui]
for step in steps:
if gcs_or_pvc == 'GCS':
step.apply(gcp.use_gcp_secret('user-gcp-sa'))
else:
step.after(vop)
step.add_pvolumes({"/mnt": volume})
if __name__ == '__main__':
import kfp.compiler as compiler
compiler.Compiler().compile(mnist_pipeline, __file__ + '.tar.gz')
================================================
FILE: ch2_seldon_examples/tiller_rbac.yaml
================================================
apiVersion: v1
kind: ServiceAccount
metadata:
name: tiller
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: tiller
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cluster-admin
subjects:
- kind: ServiceAccount
name: tiller
namespace: kube-system
================================================
FILE: ch2_seldon_examples/train_pipeline.py
================================================
import kfp.dsl as dsl
import kfp.gcp as gcp
import kfp.onprem as onprem
from string import Template
import json
@dsl.pipeline(name='Simple sci-kit KF Pipeline',
description='A simple end to end sci-kit seldon kf pipeline')
def mnist_train_pipeline(docker_org="index.docker.io/seldonio",
train_container_version="0.2",
serve_container_version="0.1"):
vop = dsl.VolumeOp(name="create_pvc",
resource_name="nfs-1",
modes=dsl.VOLUME_MODE_RWO,
size="10G")
volume = vop.volume
train = dsl.ContainerOp(
name='sk-train',
image=
f"{docker_org}/skmnistclassifier_trainer:{train_container_version}",
pvolumes={"/data": volume})
seldon_serving_json_template = Template("""
{
"apiVersion": "machinelearning.seldon.io/v1alpha2",
"kind": "SeldonDeployment",
"metadata": {
"labels": {
"app": "seldon"
},
"name": "mnist-classifier"
},
"spec": {
"annotations": {
"deployment_version": "v1",
"project_name": "MNIST Example"
},
"name": "mnist-classifier",
"predictors": [
{
"annotations": {
"predictor_version": "v1"
},
"componentSpecs": [
{
"spec": {
"containers": [
{
"image": "$dockerreposerving:$dockertagserving",
"imagePullPolicy": "Always",
"name": "mnist-classifier",
"volumeMounts": [
{
"mountPath": "/data",
"name": "persistent-storage"
}
]
}
],
"terminationGracePeriodSeconds": 1,
"volumes": [
{
"name": "persistent-storage",
"persistentVolumeClaim": {
"claimName": "$modelpvc"
}
}
]
}
}
],
"graph": {
"children": [],
"endpoint": {
"type": "REST"
},
"name": "mnist-classifier",
"type": "MODEL"
},
"name": "mnist-classifier",
"replicas": 1
}
]
}
}
""")
seldon_serving_json = seldon_serving_json_template.substitute({
'dockerreposerving':
f"{docker_org}/skmnistclassifier_runtime",
'dockertagserving':
str(serve_container_version),
'modelpvc':
vop.outputs["name"]
})
seldon_deployment = json.loads(seldon_serving_json)
serve = dsl.ResourceOp(
name='serve',
k8s_resource=seldon_deployment,
success_condition='status.state == Available').after(train)
# If we're called directly create an expirement and run
if __name__ == '__main__':
pipeline_func = mnist_train_pipeline
pipeline_filename = pipeline_func.__name__ + '.pipeline.zip'
import kfp.compiler as compiler
compiler.Compiler().compile(pipeline_func, pipeline_filename)
expirement_name = "cheese"
experiment = client.create_experiment(expirement_name)
run_name = pipeline_func.__name__ + ' run'
run_result = client.run_pipeline(experiment.id, run_name,
pipeline_filename, arguments)
print(run_result)
================================================
FILE: ch9/ctscans/DICOM Denoising Pipeline.ipynb
================================================
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Defaulting to user installation because normal site-packages is not writeable\n",
"Collecting kfp\n",
" Downloading kfp-0.5.1.tar.gz (119 kB)\n",
"\u001b[K |████████████████████████████████| 119 kB 3.5 MB/s eta 0:00:01\n",
"\u001b[?25hRequirement already satisfied: PyYAML in /usr/local/lib/python3.6/dist-packages (from kfp) (5.3)\n",
"Requirement already satisfied: google-cloud-storage>=1.13.0 in /usr/local/lib/python3.6/dist-packages (from kfp) (1.25.0)\n",
"Requirement already satisfied: kubernetes<12.0.0,>=8.0.0 in /usr/local/lib/python3.6/dist-packages (from kfp) (10.0.1)\n",
"Requirement already satisfied: google-auth>=1.6.1 in /usr/local/lib/python3.6/dist-packages (from kfp) (1.11.0)\n",
"Collecting requests_toolbelt>=0.8.0\n",
" Downloading requests_toolbelt-0.9.1-py2.py3-none-any.whl (54 kB)\n",
"\u001b[K |████████████████████████████████| 54 kB 4.0 MB/s eta 0:00:01\n",
"\u001b[?25hRequirement already satisfied: cloudpickle in /usr/local/lib/python3.6/dist-packages (from kfp) (1.2.2)\n",
"Collecting kfp-server-api<0.6.0,>=0.2.5\n",
" Downloading kfp-server-api-0.5.0.tar.gz (39 kB)\n",
"Requirement already satisfied: jsonschema>=3.0.1 in /usr/local/lib/python3.6/dist-packages (from kfp) (3.2.0)\n",
"Collecting tabulate\n",
" Downloading tabulate-0.8.7-py3-none-any.whl (24 kB)\n",
"Collecting click\n",
" Downloading click-7.1.2-py2.py3-none-any.whl (82 kB)\n",
"\u001b[K |████████████████████████████████| 82 kB 1.5 MB/s eta 0:00:01\n",
"\u001b[?25hCollecting Deprecated\n",
" Downloading Deprecated-1.2.9-py2.py3-none-any.whl (8.6 kB)\n",
"Collecting strip-hints\n",
" Downloading strip-hints-0.1.9.tar.gz (30 kB)\n",
"Requirement already satisfied: google-cloud-core<2.0dev,>=1.2.0 in /usr/local/lib/python3.6/dist-packages (from google-cloud-storage>=1.13.0->kfp) (1.3.0)\n",
"Requirement already satisfied: google-resumable-media<0.6dev,>=0.5.0 in /usr/local/lib/python3.6/dist-packages (from google-cloud-storage>=1.13.0->kfp) (0.5.0)\n",
"Requirement already satisfied: six>=1.9.0 in /usr/lib/python3/dist-packages (from kubernetes<12.0.0,>=8.0.0->kfp) (1.11.0)\n",
"Requirement already satisfied: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /usr/local/lib/python3.6/dist-packages (from kubernetes<12.0.0,>=8.0.0->kfp) (0.57.0)\n",
"Requirement already satisfied: python-dateutil>=2.5.3 in /usr/local/lib/python3.6/dist-packages (from kubernetes<12.0.0,>=8.0.0->kfp) (2.8.1)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from kubernetes<12.0.0,>=8.0.0->kfp) (2.22.0)\n",
"Requirement already satisfied: requests-oauthlib in /usr/local/lib/python3.6/dist-packages (from kubernetes<12.0.0,>=8.0.0->kfp) (1.3.0)\n",
"Requirement already satisfied: setuptools>=21.0.0 in /usr/local/lib/python3.6/dist-packages (from kubernetes<12.0.0,>=8.0.0->kfp) (45.1.0)\n",
"Requirement already satisfied: urllib3>=1.24.2 in /usr/local/lib/python3.6/dist-packages (from kubernetes<12.0.0,>=8.0.0->kfp) (1.25.8)\n",
"Requirement already satisfied: certifi>=14.05.14 in /usr/local/lib/python3.6/dist-packages (from kubernetes<12.0.0,>=8.0.0->kfp) (2019.11.28)\n",
"Requirement already satisfied: rsa<4.1,>=3.1.4 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.6.1->kfp) (4.0)\n",
"Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.6.1->kfp) (4.0.0)\n",
"Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.6.1->kfp) (0.2.8)\n",
"Requirement already satisfied: attrs>=17.4.0 in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kfp) (19.3.0)\n",
"Requirement already satisfied: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kfp) (1.4.0)\n",
"Requirement already satisfied: pyrsistent>=0.14.0 in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kfp) (0.15.7)\n",
"Requirement already satisfied: wrapt<2,>=1.10 in /usr/local/lib/python3.6/dist-packages (from Deprecated->kfp) (1.11.2)\n",
"Requirement already satisfied: wheel in /usr/lib/python3/dist-packages (from strip-hints->kfp) (0.30.0)\n",
"Requirement already satisfied: google-api-core<2.0.0dev,>=1.16.0 in /usr/local/lib/python3.6/dist-packages (from google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (1.16.0)\n",
"Requirement already satisfied: idna<2.9,>=2.5 in /usr/lib/python3/dist-packages (from requests->kubernetes<12.0.0,>=8.0.0->kfp) (2.6)\n",
"Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->kubernetes<12.0.0,>=8.0.0->kfp) (3.0.4)\n",
"Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib->kubernetes<12.0.0,>=8.0.0->kfp) (3.1.0)\n",
"Requirement already satisfied: pyasn1>=0.1.3 in /usr/local/lib/python3.6/dist-packages (from rsa<4.1,>=3.1.4->google-auth>=1.6.1->kfp) (0.4.8)\n",
"Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.6/dist-packages (from importlib-metadata; python_version < \"3.8\"->jsonschema>=3.0.1->kfp) (2.1.0)\n",
"Requirement already satisfied: protobuf>=3.4.0 in /usr/local/lib/python3.6/dist-packages (from google-api-core<2.0.0dev,>=1.16.0->google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (3.11.2)\n",
"Requirement already satisfied: pytz in /usr/local/lib/python3.6/dist-packages (from google-api-core<2.0.0dev,>=1.16.0->google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (2019.3)\n",
"Requirement already satisfied: googleapis-common-protos<2.0dev,>=1.6.0 in /usr/local/lib/python3.6/dist-packages (from google-api-core<2.0.0dev,>=1.16.0->google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (1.51.0)\n",
"Building wheels for collected packages: kfp, kfp-server-api, strip-hints\n",
" Building wheel for kfp (setup.py) ... \u001b[?25ldone\n",
"\u001b[?25h Created wheel for kfp: filename=kfp-0.5.1-py3-none-any.whl size=163151 sha256=da5b540ae9834d37659146f0576997ffd8f7a7e2b305e1eb7b2a99dd4745930b\n",
" Stored in directory: /home/jovyan/.cache/pip/wheels/2f/26/f9/e3836cb6e6cabd63ef912304e18a852ac29cb870a4a0b85f98\n",
" Building wheel for kfp-server-api (setup.py) ... \u001b[?25ldone\n",
"\u001b[?25h Created wheel for kfp-server-api: filename=kfp_server_api-0.5.0-py3-none-any.whl size=106319 sha256=84f55948cc254c0f836dffdfd51574a828ae8a503a2ca9198acf7a27ca2aaea7\n",
" Stored in directory: /home/jovyan/.cache/pip/wheels/73/36/4e/bfe2efeeea4f74f04984ebe1d44136202b72191302f4760951\n",
" Building wheel for strip-hints (setup.py) ... \u001b[?25ldone\n",
"\u001b[?25h Created wheel for strip-hints: filename=strip_hints-0.1.9-py2.py3-none-any.whl size=24671 sha256=3bcfd573a91f5f4c46d23509ac3fee9a0cf351b414e00ed505a8f71d0e6a1141\n",
" Stored in directory: /home/jovyan/.cache/pip/wheels/21/6d/fa/7ed7c0560e1ef39ebabd5cc0241e7fca711660bae1ad752e2b\n",
"Successfully built kfp kfp-server-api strip-hints\n",
"Installing collected packages: requests-toolbelt, kfp-server-api, tabulate, click, Deprecated, strip-hints, kfp\n",
"\u001b[33m WARNING: The script tabulate is installed in '/home/jovyan/.local/bin' which is not on PATH.\n",
" Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\n",
"\u001b[33m WARNING: The script strip-hints is installed in '/home/jovyan/.local/bin' which is not on PATH.\n",
" Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\n",
"\u001b[33m WARNING: The scripts dsl-compile and kfp are installed in '/home/jovyan/.local/bin' which is not on PATH.\n",
" Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\n",
"Successfully installed Deprecated-1.2.9 click-7.1.2 kfp-0.5.1 kfp-server-api-0.5.0 requests-toolbelt-0.9.1 strip-hints-0.1.9 tabulate-0.8.7\n",
"\u001b[33mWARNING: You are using pip version 20.0.2; however, version 20.1 is available.\n",
"You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.\u001b[0m\n"
]
}
],
"source": [
"!pip3 install kfp\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import kfp\n",
"import kubernetes"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"\n",
"container_manifest = {\n",
" \"apiVersion\": \"sparkoperator.k8s.io/v1beta2\",\n",
" \"kind\": \"SparkApplication\",\n",
" \"metadata\": {\n",
" \"name\": \"spark-app\",\n",
" \"namespace\": \"kubeflow\"\n",
" },\n",
" \"spec\": {\n",
" \"type\": \"Scala\",\n",
" \"mode\": \"cluster\",\n",
" \"image\": \"docker.io/rawkintrevo/covid-basis-vectors:0.2.0\",\n",
" \"imagePullPolicy\": \"Always\",\n",
" \"hadoopConf\": {\n",
" \"fs.gs.project.id\": \"kubeflow-hacky-hacky\",\n",
" \"fs.gs.system.bucket\": \"covid-dicoms\",\n",
" \"fs.gs.impl\" : \"com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem\",\n",
" \"google.cloud.auth.service.account.enable\": \"true\",\n",
" \"google.cloud.auth.service.account.json.keyfile\": \"/mnt/secrets/user-gcp-sa.json\",\n",
" },\n",
" \"mainClass\": \"org.rawkintrevo.covid.App\",\n",
" \"mainApplicationFile\": \"local:///covid-0.1-jar-with-dependencies.jar\", # See the Dockerfile\n",
" \"arguments\": [\"245\", \"15\", \"1\"],\n",
" \"sparkVersion\": \"2.4.5\",\n",
" \"restartPolicy\": {\n",
" \"type\": \"Never\"\n",
" },\n",
" \"driver\": {\n",
" \"cores\": 1,\n",
" \"secrets\": [\n",
" {\"name\": \"user-gcp-sa\",\n",
" \"path\": \"/mnt/secrets\",\n",
" \"secretType\": \"GCPServiceAccount\"\n",
" }\n",
" ],\n",
"\n",
" \"coreLimit\": \"1200m\",\n",
" \"memory\": \"512m\",\n",
" \"labels\": {\n",
" \"version\": \"2.4.5\",\n",
" },\n",
" \"serviceAccount\": \"spark-operatoroperator-sa\", # also try spark-operatoroperator-sa\n",
" },\n",
" \"executor\": {\n",
" \"cores\": 1,\n",
" \"secrets\": [\n",
" {\"name\": \"user-gcp-sa\",\n",
" \"path\": \"/mnt/secrets\",\n",
" \"secretType\": \"GCPServiceAccount\"\n",
" }\n",
" ],\n",
" \"instances\": 4,\n",
" \"memory\": \"4084m\"\n",
" },\n",
" \"labels\": {\n",
" \"version\": \"2.4.5\"\n",
" },\n",
"\n",
" }\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"from kfp.gcp import use_gcp_secret\n",
"@kfp.dsl.pipeline(\n",
" name=\"Covid DICOM Pipe v2\",\n",
" description=\"Create Basis Vectors for Lung Images\"\n",
")\n",
"def covid_dicom_pipeline():\n",
" vop = kfp.dsl.VolumeOp(\n",
" name=\"requisition-PVC\",\n",
" resource_name=\"datapvc\",\n",
" size=\"20Gi\", #10 Gi blows up...\n",
" modes=kfp.dsl.VOLUME_MODE_RWO\n",
" )\n",
" step1 = kfp.dsl.ContainerOp(\n",
" name=\"download-dicom\",\n",
" image=\"rawkintrevo/download-dicom:0.0.0.4\",\n",
" command=[\"/run.sh\"],\n",
" pvolumes={\"/data\": vop.volume}\n",
" )\n",
" step2 = kfp.dsl.ContainerOp(\n",
" name=\"convert-dicoms-to-vectors\",\n",
" image=\"rawkintrevo/covid-prep-dicom:0.9.5\",\n",
" arguments=[\n",
" '--bucket_name', \"covid-dicoms\",\n",
" ],\n",
" command=[\"python\", \"/program.py\"],\n",
" pvolumes={\"/mnt/data\": step1.pvolume}\n",
" ).apply(kfp.gcp.use_gcp_secret(secret_name='user-gcp-sa'))\n",
" rop = kfp.dsl.ResourceOp(\n",
" name=\"calculate-basis-vectors\",\n",
" k8s_resource=container_manifest,\n",
" action=\"create\",\n",
" success_condition=\"status.applicationState.state == COMPLETED\"\n",
" ).after(step2)\n",
" pyviz = kfp.dsl.ContainerOp(\n",
" name=\"visualize-slice-of-dicom\",\n",
" image=\"rawkintrevo/visualize-dicom-output:0.0.11\",\n",
" command=[\"python\", \"/program.py\"],\n",
" arguments=[\n",
" '--bucket_name', \"covid-dicoms\",\n",
" ],\n",
" ).apply(kfp.gcp.use_gcp_secret(secret_name='user-gcp-sa')).after(rop)\n",
" \n",
"\n",
"kfp.compiler.Compiler().compile(covid_dicom_pipeline,\"dicom-pipeline-2.zip\")\n",
"client = kfp.Client()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"Experiment link <a href=\"/pipeline/#/experiments/details/a7292089-5186-4e53-b0bb-9264dfbb9775\" target=\"_blank\" >here</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Run link <a href=\"/pipeline/#/runs/details/0f3f3d01-f6c4-4216-8e03-396c49fa040f\" target=\"_blank\" >here</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"\n",
"my_experiment = client.create_experiment(name='my-experiments')\n",
"my_run = client.run_pipeline(my_experiment.id, 'my-run1', 'dicom-pipeline-2.zip')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
================================================
FILE: ch9/ctscans/calculate-basis-vectors/Dockerfile
================================================
FROM gcr.io/spark-operator/spark:v2.4.5-gcs-prometheus
COPY target/covid-0.1-jar-with-dependencies.jar /
## Someday soon we'll live in a world where this hack is unnessecary
# https://github.com/GoogleCloudDataproc/hadoop-connectors/issues/323
CMD rm /opt/spark/jars/gcs-connector-latest-hadoop2.jar
ADD https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop2-2.0.1.jar $SPARK_HOME/jars
ENTRYPOINT ["/opt/entrypoint.sh"]
================================================
FILE: ch9/ctscans/calculate-basis-vectors/build-component.sh
================================================
#!/usr/bin/env bash
image_name=rawkintrevo/covid-basis-vectors # Specify the image name here
image_tag=0.2.0
full_image_name=${image_name}:${image_tag}
cd "$(dirname "$0")"
docker build -t "${full_image_name}" .
docker push "$full_image_name"
================================================
FILE: ch9/ctscans/calculate-basis-vectors/pom.xml
================================================
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.rawkintrevo</groupId>
<artifactId>covid</artifactId>
<version>0.1</version>
<inceptionYear>2020</inceptionYear>
<properties>
<scala.version>2.11.12</scala.version>
</properties>
<repositories>
<repository>
<id>scala-tools.org</id>
<name>Scala-Tools Maven2 Repository</name>
<url>http://scala-tools.org/repo-releases</url>
</repository>
</repositories>
<pluginRepositories>
<pluginRepository>
<id>scala-tools.org</id>
<name>Scala-Tools Maven2 Repository</name>
<url>http://scala-tools.org/repo-releases</url>
</pluginRepository>
</pluginRepositories>
<dependencies>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.4</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.specs</groupId>
<artifactId>specs</artifactId>
<version>1.2.5</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.mahout</groupId>
<artifactId>mahout-core_2.11</artifactId>
<version>14.1-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.mahout</groupId>
<artifactId>mahout-hdfs_2.11</artifactId>
<version>14.1-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.mahout</groupId>
<artifactId>mahout-spark_2.11</artifactId>
<version>14.1-SNAPSHOT</version>
</dependency>
</dependencies>
<build>
<sourceDirectory>src/main/scala</sourceDirectory>
<testSourceDirectory>src/test/scala</testSourceDirectory>
<plugins>
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>compile</goal>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
<configuration>
<scalaVersion>${scala.version}</scalaVersion>
<args>
<arg>-target:jvm-1.5</arg>
</args>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-eclipse-plugin</artifactId>
<configuration>
<downloadSources>true</downloadSources>
<buildcommands>
<buildcommand>ch.epfl.lamp.sdt.core.scalabuilder</buildcommand>
</buildcommands>
<additionalProjectnatures>
<projectnature>ch.epfl.lamp.sdt.core.scalanature</projectnature>
</additionalProjectnatures>
<classpathContainers>
<classpathContainer>org.eclipse.jdt.launching.JRE_CONTAINER</classpathContainer>
<classpathContainer>ch.epfl.lamp.sdt.launching.SCALA_CONTAINER</classpathContainer>
</classpathContainers>
</configuration>
</plugin>
<!-- This builds the fat JAR -->
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<archive>
<manifest>
<mainClass>org.rawkintrevo.covid.App</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
<reporting>
<plugins>
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<configuration>
<scalaVersion>${scala.version}</scalaVersion>
</configuration>
</plugin>
</plugins>
</reporting>
</project>
================================================
FILE: ch9/ctscans/calculate-basis-vectors/src/main/scala/org/rawkintrevo/covid/App.scala
================================================
package org.rawkintrevo.covid
import org.apache.mahout.math._
import org.apache.mahout.math.scalabindings._
import org.apache.mahout.math.drm._
import org.apache.mahout.math.scalabindings.RLikeOps._
import org.apache.mahout.math.drm.RLikeDrmOps._
import org.apache.mahout.sparkbindings._
import org.apache.mahout.math.decompositions._
import org.apache.mahout.math.scalabindings.MahoutCollections._
import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.SparkFiles
object App {
def main(args: Array[String]) {
val conf:SparkConf = new SparkConf()
.setAppName("Calculate CT Scan Basis Vectors")
.set("spark.kryo.referenceTracking", "false")
.set("spark.kryo.registrator", "org.apache.mahout.sparkbindings.io.MahoutKryoRegistrator")
.set("spark.kryoserializer.buffer", "32")
.set("spark.kryoserializer.buffer.max" , "600m")
.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
//create spark context object
val sc = new SparkContext(conf)
implicit val sdc: org.apache.mahout.sparkbindings.SparkDistributedContext = sc2sdc(sc)
val pathToMatrix = "gs://covid-dicoms/s.csv" // todo make this an arg.
val voxelRDD:DrmRdd[Int] = sc.textFile(pathToMatrix)
.map(s => dvec( s.split(",")
.map(f => f.toDouble)))
.zipWithIndex
.map(o => (o._2.toInt, o._1))
val voxelDRM = drmWrap(voxelRDD)
// k, p, q should all be cli parameters
// k is rank of the output e.g. the number of eigenfaces we want out.
// p is oversampling parameter,
// and q is the number of additional power iterations
// Read https://mahout.apache.org/users/dim-reduction/ssvd.html
val k = args(0).toInt
val p = args(1).toInt
val q = args(2).toInt
val(drmU, drmV, s) = dssvd(voxelDRM.t, k, p, q)
val V = drmV.checkpoint().rdd.saveAsTextFile("gs://covid-dicoms/drmV")
val U = drmU.t.checkpoint().rdd.saveAsTextFile("gs://covid-dicoms/drmU")
sc.parallelize(s.toArray,1).saveAsTextFile("gs://covid-dicoms/s")
println("The job is done!")
}
}
// $SPARK_HOME/bin/spark-submit --driver-memory 4G --executor-memory 4G --class org.rawkintrevo.book.App *jar
================================================
FILE: ch9/ctscans/download-dicom/Dockerfile
================================================
FROM gcr.io/google.com/cloudsdktool/cloud-sdk:latest
#
## install gsutil lightly
#RUN apt update \
# && apt install -y wget
#RUN wget https://storage.googleapis.com/pub/gsutil.tar.gz
#RUN tar xfz gsutil.tar.gz -C $HOME
#ENV PATH="${PATH}:$HOME/gsutil"
COPY ./run.sh /run.sh
================================================
FILE: ch9/ctscans/download-dicom/build-component.sh
================================================
#!/usr/bin/env bash
image_name=rawkintrevo/download-dicom # Specify the image name here
image_tag=0.0.0.4
full_image_name=${image_name}:${image_tag}
cd "$(dirname "$0")"
docker build -t "${full_image_name}" .
docker push "$full_image_name"
================================================
FILE: ch9/ctscans/download-dicom/run.sh
================================================
#!/usr/bin/env bash
set -e
# 1st arg- case number (leading zero required if < 10), defaults to case1
if [ -z "${1}" ]
then
CASE="01"
else
CASE="${1}"
fi
echo "Downloading DICOMs"
# If not on GCP need to download this
gsutil cp gs://covid-dicoms/covid-dicoms.tar.gz /tmp/covid-dicoms.tar.gz
tar -xzf /tmp/covid-dicoms.tar.gz -C /tmp
mv "/tmp/case0${CASE}/axial" /data/dicom
================================================
FILE: ch9/ctscans/process-dicoms-into-vectors/Dockerfile
================================================
FROM pydicom/dicom:v3.6.5
# From https://github.com/HealthplusAI/python3-gdcm
RUN apt update && apt install -y python-vtk6 libvtk6-dev cmake-curses-gui swig python3-dev libpython3.7-dev
## checkinstall missing...
RUN ln -s /opt/conda/bin/* /usr/local/bin
RUN
gitextract_v2ir0_h2/
├── .circleci/
│ └── config.yml
├── .gitignore
├── .travis.yaml
├── LICENSE
├── README.md
├── autopep_stuff.sh
├── ch03/
│ ├── example_secret.yaml
│ ├── linux_install.sh
│ ├── mac_install.sh
│ └── minio.sh
├── ch04/
│ ├── code/
│ │ ├── ControlStructures.ipynb
│ │ ├── ControlStructures.py
│ │ ├── Lightweight Pipeline.ipynb
│ │ ├── Lightweight Pipeline.py
│ │ ├── RecommenderPipeline.ipynb
│ │ ├── RecommenderPipeline.py
│ │ └── download_components.sh
│ └── install/
│ ├── deployment.yaml
│ └── virtualservice.yaml
├── ch06/
│ ├── MLflow.ipynb
│ ├── MLflow.py
│ ├── Metadata.ipynb
│ ├── Metadata.py
│ ├── docker/
│ │ ├── Dockerfile
│ │ ├── build.sh
│ │ └── run.sh
│ └── install/
│ └── mlflowchart/
│ ├── .helmignore
│ ├── Chart.yaml
│ ├── templates/
│ │ ├── NOTES.txt
│ │ ├── _helpers.tpl
│ │ └── mlflow.yaml
│ └── values.yaml
├── ch10/
│ ├── experiment.yaml
│ ├── hptuning.py
│ └── random.yaml
├── ch2/
│ ├── Dockerfile
│ ├── build-and-push.sh
│ └── query-endpoint.py
├── ch2_seldon_examples/
│ ├── pipeline_role.yaml
│ ├── pipeline_rolebinding.yaml
│ ├── pv-claim.yaml
│ ├── pv-volume.yaml
│ ├── request_example.ipynb
│ ├── run_example.sh
│ ├── setup_example.sh
│ ├── tf_mnist_no_seldon_pipeline.py
│ ├── tiller_rbac.yaml
│ └── train_pipeline.py
├── ch9/
│ └── ctscans/
│ ├── DICOM Denoising Pipeline.ipynb
│ ├── calculate-basis-vectors/
│ │ ├── Dockerfile
│ │ ├── build-component.sh
│ │ ├── pom.xml
│ │ └── src/
│ │ └── main/
│ │ └── scala/
│ │ └── org/
│ │ └── rawkintrevo/
│ │ └── covid/
│ │ └── App.scala
│ ├── download-dicom/
│ │ ├── Dockerfile
│ │ ├── build-component.sh
│ │ └── run.sh
│ ├── process-dicoms-into-vectors/
│ │ ├── Dockerfile
│ │ ├── build-component.sh
│ │ ├── data/
│ │ │ └── s.150.csv
│ │ ├── process-dicoms-into-vectors.yaml
│ │ └── src/
│ │ └── program.py
│ └── visualize-basis-vectors/
│ ├── Dockerfile
│ ├── build-component.sh
│ └── src/
│ └── program.py
├── ci.sh
├── convert_notebooks.sh
├── data-extraction/
│ ├── README.md
│ ├── github_comments_query.bsql
│ ├── github_issues_query.bsql
│ ├── iot/
│ │ ├── basic.yaml
│ │ └── build.sh
│ ├── python-notebook/
│ │ ├── AddSpamassassinDockerfile
│ │ ├── MailingListDataPrep.ipynb
│ │ ├── MailingListDataPrep.py
│ │ └── RunNBDockerfile
│ ├── python-spark/
│ │ ├── Dockerfile
│ │ ├── LaunchSparkJobs.ipynb
│ │ ├── LaunchSparkJobs.py
│ │ ├── fake_job.py
│ │ └── requirements.txt
│ ├── python-spark-notebook/
│ │ ├── AddGCSDockerfile
│ │ ├── AddPython3.6Dockerfile
│ │ ├── Dockerfile
│ │ ├── SparkMailingListForKF.ipynb
│ │ ├── SparkMailingListForKF.py
│ │ ├── build.sh
│ │ ├── dr.yaml
│ │ ├── no-saprk-tls.yaml
│ │ ├── spark-driver-service.yaml
│ │ └── virt_service.yaml
│ ├── spark-hello-world/
│ │ ├── Dockerfile
│ │ ├── README.md
│ │ ├── hello_world_pipeline.py
│ │ ├── lr_demo/
│ │ │ ├── .gitignore
│ │ │ ├── .travis.yml
│ │ │ ├── README.md
│ │ │ ├── build.sbt
│ │ │ ├── project/
│ │ │ │ ├── build.properties
│ │ │ │ └── plugins.sbt
│ │ │ ├── sample.csv
│ │ │ ├── sbt/
│ │ │ │ └── sbt
│ │ │ └── src/
│ │ │ ├── main/
│ │ │ │ └── scala/
│ │ │ │ └── com/
│ │ │ │ └── introtomlwithkubeflow/
│ │ │ │ └── spark/
│ │ │ │ └── demo/
│ │ │ │ └── lr/
│ │ │ │ ├── TrainingApp.scala
│ │ │ │ └── TrainingPipeline.scala
│ │ │ └── test/
│ │ │ └── scala/
│ │ │ └── com/
│ │ │ └── introtomlwithkubeflow/
│ │ │ └── spark/
│ │ │ └── demo/
│ │ │ └── lr/
│ │ │ └── TrainingPipelineTest.scala
│ │ ├── setup.sh
│ │ ├── spark-pi-min.yaml
│ │ └── spark-pi.yaml
│ ├── stack_overflow_questions.bsql
│ └── tfx/
│ ├── TFDV.ipynb
│ ├── TFDV.py
│ ├── install_tfx.sh
│ ├── requirements.txt
│ └── run_on_dataflow_ex.py
├── dev-setup/
│ ├── install-argo.sh
│ ├── install-kf-pipeline-sdk.sh
│ ├── install-kf.sh
│ ├── install-kubectl.sh
│ ├── install-kustomize.sh
│ ├── install-microk8s.sh
│ └── jsonnet.sh
├── feature-prep/
│ ├── README.md
│ ├── spark/
│ │ ├── SparkMailingListFeaturePrep.ipynb
│ │ └── SparkMailingListFeaturePrep.py
│ └── tft/
│ ├── requirements.txt
│ └── transform.py
├── gcp-setup/
│ ├── cloudshell_scrip.sh
│ └── setup-gcp.sh
├── kfctl_gcp_iap.v1.0.1.yaml
├── pipelines/
│ ├── ControlStructures.ipynb
│ ├── Lightweight Pipeline.ipynb
│ ├── RecommenderPipeline.ipynb
│ └── download_components.sh
├── recommender/
│ ├── Dockerfile
│ ├── Recommender_Kubeflow.ipynb
│ ├── Recommender_Kubeflow.py
│ ├── docker/
│ │ ├── Dockerfile
│ │ └── build.sh
│ └── tfservingchart/
│ ├── .helmignore
│ ├── Chart.yaml
│ ├── templates/
│ │ ├── NOTES.txt
│ │ ├── _helpers.tpl
│ │ ├── minioaccess.yaml
│ │ ├── tfserving.yaml
│ │ └── tfserving1.yaml
│ └── values.yaml
├── runthrough.sh
└── scikitLearn/
└── python/
└── IncomePrediction.ipynb
SYMBOL INDEX (60 symbols across 17 files)
FILE: ch04/code/ControlStructures.py
function get_random_int_op (line 22) | def get_random_int_op(minimum: int, maximum: int) -> int:
function process_small_op (line 31) | def process_small_op(data: int):
function process_medium_op (line 38) | def process_medium_op(data: int):
function process_large_op (line 45) | def process_large_op(data: int):
function conditional_pipeline (line 58) | def conditional_pipeline():
FILE: ch04/code/Lightweight Pipeline.py
function add (line 22) | def add(a: float, b: float) -> float:
function my_divmod (line 40) | def my_divmod(
function calc_pipeline (line 80) | def calc_pipeline(
FILE: ch04/code/RecommenderPipeline.py
function recommender_pipeline (line 46) | def recommender_pipeline():
FILE: ch06/MLflow.py
function evaluation_model (line 120) | def evaluation_model(y_test, y_pred):
function train_knnmodel (line 142) | def train_knnmodel(parameters, inputs, tags, log=False):
function train_mlpmodel (line 211) | def train_mlpmodel(parameters, inputs, tags, log=False):
class PTG (line 286) | class PTG:
method __init__ (line 287) | def __init__(self, thresholds_x0, thresholds_a, thresholds_b):
method get_ptgmodel (line 292) | def get_ptgmodel(self, x, a, b, x0):
method fit (line 296) | def fit(self, dfx, y):
method predict (line 317) | def predict(self, dfx):
function train_ptgmodel (line 328) | def train_ptgmodel(parameters, inputs, tags, log=False):
FILE: ch2/query-endpoint.py
function download_mnist (line 25) | def download_mnist():
function gen_image (line 29) | def gen_image(arr):
FILE: ch2_seldon_examples/tf_mnist_no_seldon_pipeline.py
function mnist_pipeline (line 29) | def mnist_pipeline(gcs_bucket=None,
FILE: ch2_seldon_examples/train_pipeline.py
function mnist_train_pipeline (line 11) | def mnist_train_pipeline(docker_org="index.docker.io/seldonio",
FILE: ch9/ctscans/process-dicoms-into-vectors/src/program.py
function create_3d_matrix (line 25) | def create_3d_matrix(path):
function upload_blob (line 53) | def upload_blob(bucket_name, source_file_name, destination_blob_name):
FILE: ch9/ctscans/visualize-basis-vectors/src/program.py
function read_mahout_drm (line 19) | def read_mahout_drm(path):
function plot_3d_matrix (line 35) | def plot_3d_matrix(img3d, img_shape, ax_aspect, sag_aspect, cor_aspect):
function plot_2_3d_matrices (line 51) | def plot_2_3d_matrices(img1, img2, aspect, slice, cmap):
function upload_blob (line 61) | def upload_blob(bucket_name, source_file_name, destination_blob_name):
function download_folder (line 77) | def download_folder(bucket_name='your-bucket-name',
FILE: data-extraction/python-notebook/MailingListDataPrep.py
function scrapeMailArchives (line 36) | def scrapeMailArchives(mailingList: str, year: int, month: int):
function extract_links (line 69) | def extract_links(body):
function extract_domains (line 75) | def extract_domains(links):
function contains_python_stack_trace (line 91) | def contains_python_stack_trace(body):
function contains_probably_java_stack_trace (line 95) | def contains_probably_java_stack_trace(body):
function contains_exception_in_task (line 106) | def contains_exception_in_task(body):
function makeDomainsAList (line 147) | def makeDomainsAList(d):
function download_data (line 189) | def download_data(year: int) -> str:
function download_tld_data (line 252) | def download_tld_data() -> str:
function clean_data (line 278) | def clean_data(input_path: str) -> str:
function prepare_features (line 313) | def prepare_features(input_path: str, tld_info_path: str):
function my_pipeline_mini (line 430) | def my_pipeline_mini(year: int):
function my_pipeline2 (line 466) | def my_pipeline2(year: int):
function train_func (line 535) | def train_func(input_path: String):
FILE: data-extraction/python-spark-notebook/SparkMailingListForKF.py
function download_emails (line 88) | def download_emails(date):
function extract_date_from_email_datefield (line 165) | def extract_date_from_email_datefield(datefield):
function is_ok (line 202) | def is_ok(post):
FILE: data-extraction/python-spark/LaunchSparkJobs.py
function local_pipeline (line 61) | def local_pipeline():
FILE: data-extraction/spark-hello-world/hello_world_pipeline.py
function spark_hello_world_pipeline (line 11) | def spark_hello_world_pipeline(jar_location="gcs://....", tf_job_image="...
FILE: data-extraction/tfx/TFDV.py
function pipeline_with_dl (line 42) | def pipeline_with_dl():
function tfdv_pipeline (line 69) | def tfdv_pipeline():
function tfx_pipeline (line 145) | def tfx_pipeline():
FILE: feature-prep/spark/SparkMailingListFeaturePrep.py
function extract_links (line 133) | def extract_links(body):
function extract_domains (line 140) | def extract_domains(links):
function contains_python_stack_trace (line 156) | def contains_python_stack_trace(body):
function contains_probably_java_stack_trace (line 160) | def contains_probably_java_stack_trace(body):
function contains_exception_in_task (line 171) | def contains_exception_in_task(body):
FILE: feature-prep/tft/transform.py
function preprocessing_fn (line 10) | def preprocessing_fn(inputs):
FILE: recommender/Recommender_Kubeflow.py
class DeepCollaborativeFiltering (line 233) | class DeepCollaborativeFiltering(Model):
method __init__ (line 234) | def __init__(self, n_customers, n_products, n_factors, p_dropout=0.2):
method rate (line 256) | def rate(self, customer_idxs, product_idxs):
Condensed preview — 147 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (563K chars).
[
{
"path": ".circleci/config.yml",
"chars": 412,
"preview": "version: 2\n\napt-run: &apt-install\n name: Install apt packages\n command: |\n sudo apt-get -qq update\n sudo apt-ge"
},
{
"path": ".gitignore",
"chars": 1261,
"preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packagi"
},
{
"path": ".travis.yaml",
"chars": 95,
"preview": "language: generic\nsudo: true\naddons:\n apt:\n packages:\n - shellcheck\nscript:\n - ./ci.sh"
},
{
"path": "LICENSE",
"chars": 11357,
"preview": " Apache License\n Version 2.0, January 2004\n "
},
{
"path": "README.md",
"chars": 85,
"preview": "# intro-to-ml-with-kubeflow-examples\nExamples for the Intro to ML with Kubeflow book\n"
},
{
"path": "autopep_stuff.sh",
"chars": 448,
"preview": "#!/bin/bash\n# autopep8 a bunch of things that we can\nautopep8 -i -r ./ \\\n\t --select E101,E202,E201,E203,E211,E221,E222,E"
},
{
"path": "ch03/example_secret.yaml",
"chars": 168,
"preview": "apiVersion: v1\nkind: Secret\nmetadata:\n name: minioaccess\n namespace: mynamespace\ndata:\n AWS_ACCESS_KEY_ID: xxxxxxxxxx"
},
{
"path": "ch03/linux_install.sh",
"chars": 139,
"preview": "#!/bin/bash\n#tag::installMCLinux[]\npushd ~/bin\nwget https://dl.min.io/client/mc/release/linux-amd64/mc\nchmod a+x mc\n#end"
},
{
"path": "ch03/mac_install.sh",
"chars": 86,
"preview": "#!/bin/bash\n#tag::installMCMac[]\nbrew install minio/stable/minio\n#end::installMCMac[]\n"
},
{
"path": "ch03/minio.sh",
"chars": 600,
"preview": "#!/bin/bash\nset -ex\n\n# Minio runs on port 9000 (both UI and service) so expose locally to use cli or UI\n#tag::fwdMinio[]"
},
{
"path": "ch04/code/ControlStructures.ipynb",
"chars": 11142,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# Simple Control structure\\n\",\n "
},
{
"path": "ch04/code/ControlStructures.py",
"chars": 1597,
"preview": "#!/usr/bin/env python\n# coding: utf-8\n\n# # Simple Control structure\n#\n# Shows how to use conditional execution\n\n# In[1]:"
},
{
"path": "ch04/code/Lightweight Pipeline.ipynb",
"chars": 13249,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# Setup\"\n ]\n },\n {\n \"cell_typ"
},
{
"path": "ch04/code/Lightweight Pipeline.py",
"chars": 2703,
"preview": "#!/usr/bin/env python\n# coding: utf-8\n\n# # Setup\n\n# In[1]:\n\nget_ipython().system('pip install kfp --upgrade --user')\n\nim"
},
{
"path": "ch04/code/RecommenderPipeline.ipynb",
"chars": 16063,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# Kubeflow pipeline\\n\",\n \"This i"
},
{
"path": "ch04/code/RecommenderPipeline.py",
"chars": 3772,
"preview": "#!/usr/bin/env python\n# coding: utf-8\n\n# # Kubeflow pipeline\n# This is a fairly simple pipeline, containing sequential s"
},
{
"path": "ch04/code/download_components.sh",
"chars": 150,
"preview": "#!/bin/bash\n#tag::dlPipelineRelease[]\nwget https://github.com/kubeflow/pipelines/archive/0.2.5.tar.gz\ntar -xvf 0.2.5.tar"
},
{
"path": "ch04/install/deployment.yaml",
"chars": 2518,
"preview": "apiVersion: extensions/v1beta1\nkind: Deployment\nmetadata:\n labels:\n app: argo-ui\n app.kubernetes.io/component: ar"
},
{
"path": "ch04/install/virtualservice.yaml",
"chars": 394,
"preview": "apiVersion: networking.istio.io/v1alpha3\nkind: VirtualService\nmetadata:\n name: argo-ui\n namespace: kubeflow\nspec:\n ga"
},
{
"path": "ch06/MLflow.ipynb",
"chars": 41579,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# mlflow-energyforecast\\n\",\n \"\\n"
},
{
"path": "ch06/MLflow.py",
"chars": 12024,
"preview": "#!/usr/bin/env python\n# coding: utf-8\n\n# # mlflow-energyforecast\n#\n# This is a showcase for ML Flow capabilities, based "
},
{
"path": "ch06/Metadata.ipynb",
"chars": 11547,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# Installation and imports\"\n ]\n "
},
{
"path": "ch06/Metadata.py",
"chars": 3019,
"preview": "#!/usr/bin/env python\n# coding: utf-8\n\n# # Installation and imports\n\n# In[1]:\n\nget_ipython().system('pip install kfmd --"
},
{
"path": "ch06/docker/Dockerfile",
"chars": 461,
"preview": "# from https://github.com/flmu/mlflow-tracking-server\n\nFROM python:3.7\n\nRUN pip3 install --upgrade pip && \\\n pip3 ins"
},
{
"path": "ch06/docker/build.sh",
"chars": 75,
"preview": "#!/bin/bash\n\nimg='lightbend/mlflow'\ntag='0.1'\ndocker build -t $img:$tag .\n\n"
},
{
"path": "ch06/docker/run.sh",
"chars": 489,
"preview": "#!/bin/sh\n\nset -e\n\nif [ -z \"${AWS_BUCKET}\" ]; then\n echo >&2 \"AWS_BUCKET must be set\"\n exit 1\nfi\n\nif [ -z \"${AWS_ACCES"
},
{
"path": "ch06/install/mlflowchart/.helmignore",
"chars": 333,
"preview": "# Patterns to ignore when building packages.\n# This supports shell glob matching, relative path matching, and\n# negation"
},
{
"path": "ch06/install/mlflowchart/Chart.yaml",
"chars": 129,
"preview": "apiVersion: v1\nappVersion: 0.1\ndescription: MLFlow\nmaintainers:\n- name: Boris Lublinsky\nname: MLFLOW tracking server\nver"
},
{
"path": "ch06/install/mlflowchart/templates/NOTES.txt",
"chars": 37,
"preview": "ML Flow tracking server is installed\n"
},
{
"path": "ch06/install/mlflowchart/templates/_helpers.tpl",
"chars": 550,
"preview": "{{/* vim: set filetype=mustache: */}}\n{{/*\nExpand the name of the chart.\n*/}}\n{{- define \"modelserverchart.name\" -}}\n{{-"
},
{
"path": "ch06/install/mlflowchart/templates/mlflow.yaml",
"chars": 1779,
"preview": "apiVersion: apps/v1\nkind: Deployment\nmetadata:\n namespace: kubeflow\n name: mlflowserver\n labels:\n app: mlflowserve"
},
{
"path": "ch06/install/mlflowchart/values.yaml",
"chars": 120,
"preview": "# application name is a namespace\n# docker images\nimage:\n server: lightbend/mlflow\n pullPolicy: Always\n version: 0.1\n"
},
{
"path": "ch10/experiment.yaml",
"chars": 2871,
"preview": "Name: random-example\nNamespace: kubeflow\nLabels: controller-tools.k8s.io=1.0\nAnnotations: <none>\nAPI V"
},
{
"path": "ch10/hptuning.py",
"chars": 303,
"preview": "# Initialize search space\n# Initialize model\nwhile not objective_reached and not bugdget_exhausted:\n # Obtain new hyp"
},
{
"path": "ch10/random.yaml",
"chars": 1524,
"preview": "apiVersion: \"kubeflow.org/v1alpha3\"\nkind: Experiment\nmetadata:\n namespace: kubeflow\n labels:\n controller-tools.k8s."
},
{
"path": "ch2/Dockerfile",
"chars": 70,
"preview": "FROM gcr.io/kubeflow-images-public/tensorflow-2.1.0-notebook-cpu:1.0.0"
},
{
"path": "ch2/build-and-push.sh",
"chars": 168,
"preview": "#!/bin/bash\n#tag::buildandpush[]\nIMAGE=\"${CONTAINER_REGISTRY}/kubeflow/test:v1\"\ndocker build -t \"${IMAGE}\" -f Dockerfil"
},
{
"path": "ch2/query-endpoint.py",
"chars": 1784,
"preview": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements. See the NOTICE f"
},
{
"path": "ch2_seldon_examples/pipeline_role.yaml",
"chars": 208,
"preview": "apiVersion: rbac.authorization.k8s.io/v1\nkind: Role\nmetadata:\n namespace: kubeflow\n name: pipeline-runner\nrules:\n- api"
},
{
"path": "ch2_seldon_examples/pipeline_rolebinding.yaml",
"chars": 278,
"preview": "apiVersion: rbac.authorization.k8s.io/v1\nkind: RoleBinding\nmetadata:\n name: pipeline-runner\n namespace: kubeflow\nsubje"
},
{
"path": "ch2_seldon_examples/pv-claim.yaml",
"chars": 194,
"preview": "kind: PersistentVolumeClaim\r\napiVersion: v1\r\nmetadata:\r\n name: \"nfs-1\"\r\nspec:\r\n storageClassName: manual\r\n accessMode"
},
{
"path": "ch2_seldon_examples/pv-volume.yaml",
"chars": 241,
"preview": "kind: PersistentVolume\r\napiVersion: v1\r\nmetadata:\r\n name: task-pv-volume\r\n labels:\r\n type: local\r\nspec:\r\n storageC"
},
{
"path": "ch2_seldon_examples/request_example.ipynb",
"chars": 13000,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"execution_count\": 2,\n \"metadata\": {},\n \"outputs\": [\n {\n \"name\":"
},
{
"path": "ch2_seldon_examples/run_example.sh",
"chars": 911,
"preview": "#!/bin/bash\n#tag::buildPipeline[]\ndsl-compile --py train_pipeline.py --output job.yaml\n#end::buildPipeline[]\n#tag::conne"
},
{
"path": "ch2_seldon_examples/setup_example.sh",
"chars": 1862,
"preview": "#!/bin/bash\n\nset -ex\n\necho \"Setting up example\"\n\nunset ch2_example_path\nch2_example_path=\"$( cd \"$( dirname \"${BASH_SOUR"
},
{
"path": "ch2_seldon_examples/tf_mnist_no_seldon_pipeline.py",
"chars": 3113,
"preview": "# Copyright 2019 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ch2_seldon_examples/tiller_rbac.yaml",
"chars": 348,
"preview": "apiVersion: v1\nkind: ServiceAccount\nmetadata:\n name: tiller\n namespace: kube-system\n---\napiVersion: rbac.authorization"
},
{
"path": "ch2_seldon_examples/train_pipeline.py",
"chars": 3117,
"preview": "import kfp.dsl as dsl\nimport kfp.gcp as gcp\nimport kfp.onprem as onprem\n\nfrom string import Template\nimport json\n\n\n@dsl."
},
{
"path": "ch9/ctscans/DICOM Denoising Pipeline.ipynb",
"chars": 15394,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"execution_count\": 1,\n \"metadata\": {},\n \"outputs\": [\n {\n \"name\":"
},
{
"path": "ch9/ctscans/calculate-basis-vectors/Dockerfile",
"chars": 436,
"preview": "FROM gcr.io/spark-operator/spark:v2.4.5-gcs-prometheus\n\nCOPY target/covid-0.1-jar-with-dependencies.jar /\n\n## Someday so"
},
{
"path": "ch9/ctscans/calculate-basis-vectors/build-component.sh",
"chars": 245,
"preview": "#!/usr/bin/env bash\n\nimage_name=rawkintrevo/covid-basis-vectors # Specify the image name here\nimage_tag=0.2.0\nfull_image"
},
{
"path": "ch9/ctscans/calculate-basis-vectors/pom.xml",
"chars": 4325,
"preview": "<project xmlns=\"http://maven.apache.org/POM/4.0.0\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocat"
},
{
"path": "ch9/ctscans/calculate-basis-vectors/src/main/scala/org/rawkintrevo/covid/App.scala",
"chars": 2222,
"preview": "package org.rawkintrevo.covid\n\nimport org.apache.mahout.math._\nimport org.apache.mahout.math.scalabindings._\nimport org."
},
{
"path": "ch9/ctscans/download-dicom/Dockerfile",
"chars": 277,
"preview": "FROM gcr.io/google.com/cloudsdktool/cloud-sdk:latest\n#\n## install gsutil lightly\n#RUN apt update \\\n# && apt install -y"
},
{
"path": "ch9/ctscans/download-dicom/build-component.sh",
"chars": 242,
"preview": "#!/usr/bin/env bash\n\nimage_name=rawkintrevo/download-dicom # Specify the image name here\nimage_tag=0.0.0.4\nfull_image_na"
},
{
"path": "ch9/ctscans/download-dicom/run.sh",
"chars": 394,
"preview": "#!/usr/bin/env bash\nset -e\n\n# 1st arg- case number (leading zero required if < 10), defaults to case1\n\nif [ -z \"${1}\" ]\n"
},
{
"path": "ch9/ctscans/process-dicoms-into-vectors/Dockerfile",
"chars": 1431,
"preview": "FROM pydicom/dicom:v3.6.5\n\n# From https://github.com/HealthplusAI/python3-gdcm\nRUN apt update && apt install -y python-v"
},
{
"path": "ch9/ctscans/process-dicoms-into-vectors/build-component.sh",
"chars": 242,
"preview": "#!/usr/bin/env bash\n\nimage_name=rawkintrevo/covid-prep-dicom # Specify the image name here\nimage_tag=0.9.5\nfull_image_na"
},
{
"path": "ch9/ctscans/process-dicoms-into-vectors/data/s.150.csv",
"chars": 0,
"preview": ""
},
{
"path": "ch9/ctscans/process-dicoms-into-vectors/process-dicoms-into-vectors.yaml",
"chars": 413,
"preview": "name: Process DICOMs into Vectors\ndescription: Take a number of COVID DICOMs - output a list of vectors for DS-SVD.\ninpu"
},
{
"path": "ch9/ctscans/process-dicoms-into-vectors/src/program.py",
"chars": 2319,
"preview": "from os import listdir\nimport numpy as np\nimport pydicom\n\nimport argparse\nfrom google.cloud import storage\n\nparser = arg"
},
{
"path": "ch9/ctscans/visualize-basis-vectors/Dockerfile",
"chars": 171,
"preview": "FROM python:3-buster\n\nRUN pip install numpy\nRUN pip install matplotlib\nRUN pip install google-cloud-storage\nCOPY src/pro"
},
{
"path": "ch9/ctscans/visualize-basis-vectors/build-component.sh",
"chars": 249,
"preview": "#!/usr/bin/env bash\n\nimage_name=rawkintrevo/visualize-dicom-output # Specify the image name here\nimage_tag=0.0.11\nfull_i"
},
{
"path": "ch9/ctscans/visualize-basis-vectors/src/program.py",
"chars": 4333,
"preview": "from ast import literal_eval\n\nfrom os import listdir\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nimport argpars"
},
{
"path": "ci.sh",
"chars": 613,
"preview": "#!/bin/bash\n\nset -ex\n\n# Check all the shell scripts\nfind ./ -iregex '^.+\\.sh$' -type f -print0 | \\\n xargs -0 shellcheck"
},
{
"path": "convert_notebooks.sh",
"chars": 95,
"preview": "#!/bin/bash\nfind . -name \"*ipynb\" |grep -v venv | xargs -d '\\n' ipython3 nbconvert --to script\n"
},
{
"path": "data-extraction/README.md",
"chars": 503,
"preview": "## Data Extraction\n\nTo successfully construct a machine learning pipeline we need to collect the data we are going to tr"
},
{
"path": "data-extraction/github_comments_query.bsql",
"chars": 988,
"preview": "SELECT pull_request_url,\n ANY_VALUE(pull_patch_url) as pull_patch_url,\n ARRAY_AGG(comment_position) as comments_position"
},
{
"path": "data-extraction/github_issues_query.bsql",
"chars": 236,
"preview": "SELECT repo.name, JSON_EXTRACT(payload, '$.issue.url') \nAS url FROM (\n SELECT *, JSON_EXTRACT(payload, '$.action') AS a"
},
{
"path": "data-extraction/iot/basic.yaml",
"chars": 564,
"preview": "apiVersion: batch/v1\nkind: Job\nmetadata:\n name: iot-data-extraction\n namespace: kubeflow\nspec:\n template:\n spec:\n "
},
{
"path": "data-extraction/iot/build.sh",
"chars": 331,
"preview": "#!/bin/bash\n\nCONTAINER_REGISTRY=\"gcr.io/${PROJECT_NAME}\"\n#tag::buildandpush[]\nTARGET=\"${CONTAINER_REGISTRY}/kf-steps/iot"
},
{
"path": "data-extraction/python-notebook/AddSpamassassinDockerfile",
"chars": 283,
"preview": "ARG base\nFROM $base\n# Run as root for updates\nUSER root\n# Install Spamassassin\nRUN apt-get update && \\\n apt-get insta"
},
{
"path": "data-extraction/python-notebook/MailingListDataPrep.ipynb",
"chars": 27981,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"Here we can install some packages o"
},
{
"path": "data-extraction/python-notebook/MailingListDataPrep.py",
"chars": 20347,
"preview": "#!/usr/bin/env python\n# coding: utf-8\n\n# Here we can install some packages our notebook needs. We can also install them "
},
{
"path": "data-extraction/python-notebook/RunNBDockerfile",
"chars": 582,
"preview": "# Since we used Jupyter notebooks to do the first pass extraction, we can try directly use that notebook with\n# Kubeflow"
},
{
"path": "data-extraction/python-spark/Dockerfile",
"chars": 289,
"preview": "# Use the spark operator image as base\nFROM gcr.io/spark-operator/spark-py:v2.4.5\n# Install Python requirements\nCOPY req"
},
{
"path": "data-extraction/python-spark/LaunchSparkJobs.ipynb",
"chars": 3503,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"execution_count\": null,\n \"metadata\": {},\n \"outputs\": [],\n \"source\": "
},
{
"path": "data-extraction/python-spark/LaunchSparkJobs.py",
"chars": 1859,
"preview": "#!/usr/bin/env python\n# coding: utf-8\n\n# In[ ]:\n\nget_ipython().system('pip3 install --upgrade --user kfp')\n\n# In[ ]:\n\nim"
},
{
"path": "data-extraction/python-spark/fake_job.py",
"chars": 333,
"preview": "# Yes we need both these imports\n#tag::imports[]\nfrom pyspark.sql import SparkSession\nfrom pyspark.sql.functions import "
},
{
"path": "data-extraction/python-spark/requirements.txt",
"chars": 7,
"preview": "pandas\n"
},
{
"path": "data-extraction/python-spark-notebook/AddGCSDockerfile",
"chars": 822,
"preview": "ARG base\nFROM $base\n\n# Set an enviroment variable for where we are going to put spark\nENV SPARK_HOME /opt/spark\n\n# Run a"
},
{
"path": "data-extraction/python-spark-notebook/AddPython3.6Dockerfile",
"chars": 776,
"preview": "ARG base\nFROM $base\n\nUSER root\n\n# Install libraries we need to build Python 3.6\nRUN apt-get update && \\\n DEBIAN_FRONT"
},
{
"path": "data-extraction/python-spark-notebook/Dockerfile",
"chars": 1808,
"preview": "#tag::include[]\n# See https://www.kubeflow.org/docs/notebooks/custom-notebook/\nARG base\nFROM $base\nARG sparkversion\nARG "
},
{
"path": "data-extraction/python-spark-notebook/SparkMailingListForKF.ipynb",
"chars": 11599,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"execution_count\": null,\n \"metadata\": {},\n \"outputs\": [],\n \"source\": "
},
{
"path": "data-extraction/python-spark-notebook/SparkMailingListForKF.py",
"chars": 6921,
"preview": "#!/usr/bin/env python\n# coding: utf-8\n\n# In[ ]:\n\n# Yes we need both these imports\nfrom pyspark.sql import SparkSession\nf"
},
{
"path": "data-extraction/python-spark-notebook/build.sh",
"chars": 2276,
"preview": "#!/bin/bash\n# Build a notebook with Spark 3\n# Note when Spark 3 is fully released we can use gcr.io/spark-operator/spark"
},
{
"path": "data-extraction/python-spark-notebook/dr.yaml",
"chars": 206,
"preview": "apiVersion: networking.istio.io/v1alpha3\nkind: DestinationRule\nmetadata:\n name: default\n namespace: kubeflow-programme"
},
{
"path": "data-extraction/python-spark-notebook/no-saprk-tls.yaml",
"chars": 143,
"preview": " apiVersion: \"authentication.istio.io/v1alpha1\"\n kind: \"Policy\"\n metadata:\n name: spark-no-tls\n spec:\n targets:\n -"
},
{
"path": "data-extraction/python-spark-notebook/spark-driver-service.yaml",
"chars": 298,
"preview": "apiVersion: v1\nkind: Service\nmetadata:\n name: spark-driver\n namespace: kubeflow-programmerboo\nspec:\n selector:\n no"
},
{
"path": "data-extraction/python-spark-notebook/virt_service.yaml",
"chars": 957,
"preview": "apiVersion: networking.istio.io/v1alpha3\nkind: VirtualService\nmetadata:\n creationTimestamp: \"2019-10-14T20:09:50Z\"\n ge"
},
{
"path": "data-extraction/spark-hello-world/Dockerfile",
"chars": 0,
"preview": ""
},
{
"path": "data-extraction/spark-hello-world/README.md",
"chars": 224,
"preview": "This directory will walk you through running a Spark Hello world example with kubeflow.\nIt (currently) uses the master b"
},
{
"path": "data-extraction/spark-hello-world/hello_world_pipeline.py",
"chars": 1008,
"preview": "import kfp.dsl as dsl\nimport kfp.gcp as gcp\nimport kfp.onprem as onprem\n\nfrom string import Template\nimport json\n\n\n@dsl."
},
{
"path": "data-extraction/spark-hello-world/lr_demo/.gitignore",
"chars": 1489,
"preview": "*.class\n*.log\nbuild.sbt_back\n\n# sbt specific\ndist/*\ntarget/\nlib_managed/\nsrc_managed/\nproject/boot/\nproject/plugins/proj"
},
{
"path": "data-extraction/spark-hello-world/lr_demo/.travis.yml",
"chars": 301,
"preview": "language: scala\n\n# These directories are cached to S3 at the end of the build\ncache:\n directories:\n - $HOME/.ivy2/ca"
},
{
"path": "data-extraction/spark-hello-world/lr_demo/README.md",
"chars": 38,
"preview": "A simple, bad, LR example with Spark.\n"
},
{
"path": "data-extraction/spark-hello-world/lr_demo/build.sbt",
"chars": 4541,
"preview": "val sparkVersion = \"2.3.1\"\n\nlazy val root = (project in file(\".\")).\n\n settings(\n inThisBuild(List(\n organizatio"
},
{
"path": "data-extraction/spark-hello-world/lr_demo/project/build.properties",
"chars": 18,
"preview": "sbt.version=1.2.8\n"
},
{
"path": "data-extraction/spark-hello-world/lr_demo/project/plugins.sbt",
"chars": 369,
"preview": "addSbtPlugin(\"org.scalastyle\" %% \"scalastyle-sbt-plugin\" % \"1.0.0\")\n\nresolvers += \"sonatype-releases\" at \"https://oss.so"
},
{
"path": "data-extraction/spark-hello-world/lr_demo/sample.csv",
"chars": 40,
"preview": "e1,e2,label\n1.0, 0.0, 1.0\n2.0, 2.1, 2.0\n"
},
{
"path": "data-extraction/spark-hello-world/lr_demo/sbt/sbt",
"chars": 2150,
"preview": "#!/bin/bash\n\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements. S"
},
{
"path": "data-extraction/spark-hello-world/lr_demo/src/main/scala/com/introtomlwithkubeflow/spark/demo/lr/TrainingApp.scala",
"chars": 608,
"preview": "package com.introtomlwithkubeflow.spark.demo.lr\n\nimport org.apache.spark.{SparkConf, SparkContext}\n\n\n/**\n * Use this wh"
},
{
"path": "data-extraction/spark-hello-world/lr_demo/src/main/scala/com/introtomlwithkubeflow/spark/demo/lr/TrainingPipeline.scala",
"chars": 1901,
"preview": "package com.introtomlwithkubeflow.spark.demo.lr\n\nimport java.nio.file.{Files, Paths}\n\n\nimport ml.combust.bundle.BundleFi"
},
{
"path": "data-extraction/spark-hello-world/lr_demo/src/test/scala/com/introtomlwithkubeflow/spark/demo/lr/TrainingPipelineTest.scala",
"chars": 1027,
"preview": "package com.introtomlwithkubeflow.spark.demo.lr\n\n/**\n * A simple test for the training pipeline\n */\n\nimport com.holdenka"
},
{
"path": "data-extraction/spark-hello-world/setup.sh",
"chars": 1641,
"preview": "#!/bin/bash\nset -ex\n\nSPARK_DEMO_DIR=${SPARK_DEMO_DIR:=~/spark_demo_3}\nSPARK_DEMO_GCS=${SPARK_DEMO_GCS:=gs://boo-spark-kf"
},
{
"path": "data-extraction/spark-hello-world/spark-pi-min.yaml",
"chars": 849,
"preview": "apiVersion: \"sparkoperator.k8s.io/v1beta2\"\nkind: SparkApplication\nmetadata:\n name: spark-pi\n namespace: kubeflow\nspec:"
},
{
"path": "data-extraction/spark-hello-world/spark-pi.yaml",
"chars": 895,
"preview": "apiVersion: \"sparkoperator.k8s.io/v1beta2\"\nkind: SparkApplication\nmetadata:\n name: spark-pi\n namespace: kubeflow\nspec:"
},
{
"path": "data-extraction/stack_overflow_questions.bsql",
"chars": 7,
"preview": "SELECT "
},
{
"path": "data-extraction/tfx/TFDV.ipynb",
"chars": 8587,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"We start by downloading a specific "
},
{
"path": "data-extraction/tfx/TFDV.py",
"chars": 5225,
"preview": "#!/usr/bin/env python\n# coding: utf-8\n\n# We start by downloading a specific release of the components because running fr"
},
{
"path": "data-extraction/tfx/install_tfx.sh",
"chars": 88,
"preview": "#!/bin/bash\n#tag::install[]\npip3 install tfx tensorflow-data-validation\n#end::install[]\n"
},
{
"path": "data-extraction/tfx/requirements.txt",
"chars": 4,
"preview": "tfx\n"
},
{
"path": "data-extraction/tfx/run_on_dataflow_ex.py",
"chars": 501,
"preview": "#tag::example[]\ngenerated_output_uri = root_output_uri + kfp.dsl.EXECUTION_ID_PLACEHOLDER\nbeam_pipeline_args = [\n '--"
},
{
"path": "dev-setup/install-argo.sh",
"chars": 227,
"preview": "#!/bin/bash\n# Download the binary\ncurl -sLO https://github.com/argoproj/argo/releases/download/v2.8.1/argo-linux-amd64\n\n"
},
{
"path": "dev-setup/install-kf-pipeline-sdk.sh",
"chars": 471,
"preview": "#!/bin/bash\n# Put as inside a venv\npushd /tmp\n#tag::venv[]\nvirtualenv kfvenv --python python3\nsource kfvenv/bin/activate"
},
{
"path": "dev-setup/install-kf.sh",
"chars": 724,
"preview": "#!/bin/bash\nset -ex\n#tag::install[]\nPLATFORM=$(uname) # Either Linux or Darwin\nexport PLATFORM\nmkdir -p ~/bin\n#Configura"
},
{
"path": "dev-setup/install-kubectl.sh",
"chars": 1250,
"preview": "#!/bin/bash\n#tag::ubuntu-kubectl[]\nsudo snap install kubectl --classic\n#end::ubuntu-kubectl[]\n#tag::debian-kubectl[]\nsud"
},
{
"path": "dev-setup/install-kustomize.sh",
"chars": 576,
"preview": "#!/bin/bash\n#tag::kustomize[]\nPLATFORM=$(uname) # Either Linux or Darwin\nexport PLATFORM\nmkdir -p ~/bin\nKUSTOMIZE_URL=$("
},
{
"path": "dev-setup/install-microk8s.sh",
"chars": 934,
"preview": "#!/bin/bash\n#tag::installmicrok8s[]\nsudo snap install microk8s --classic\n#end::installmicrok8s[]\n#tag::setupmicrok8s[]\n#"
},
{
"path": "dev-setup/jsonnet.sh",
"chars": 397,
"preview": "#!/bin/bash\nset -e\nset -x\n#tag::snap[]\nsudo snap install jsonnet\n#end::snap[]\n#tag::manual[]\nexport JSONNET_VERSION=0.12"
},
{
"path": "feature-prep/README.md",
"chars": 381,
"preview": "Feature preparation is the task of converting the data into features\nsuitable for our machine algorithms. What makes a \""
},
{
"path": "feature-prep/spark/SparkMailingListFeaturePrep.ipynb",
"chars": 14207,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"execution_count\": null,\n \"metadata\": {},\n \"outputs\": [],\n \"source\": "
},
{
"path": "feature-prep/spark/SparkMailingListFeaturePrep.py",
"chars": 9754,
"preview": "#!/usr/bin/env python\n# coding: utf-8\n\n# In[ ]:\n\n# Yes we need both these imports\nfrom pyspark.sql import SparkSession\nf"
},
{
"path": "feature-prep/tft/requirements.txt",
"chars": 27,
"preview": "tfx\ntensorflow\napache-beam\n"
},
{
"path": "feature-prep/tft/transform.py",
"chars": 481,
"preview": "#tag::imports[]\nimport tensorflow as tf\nimport tensorflow_transform as tft\nfrom tensorflow_transform.tf_metadata import "
},
{
"path": "gcp-setup/cloudshell_scrip.sh",
"chars": 342,
"preview": "#!/bin/bash\n# Note: this only works inside of cloudshell!\n#tag::cloudshell_script[]\nG_SOURCES=\"https://source.developers"
},
{
"path": "gcp-setup/setup-gcp.sh",
"chars": 1176,
"preview": "#!/bin/bash\n#tag::ubuntu[]\napt-get install google-cloud-sdk\n#end::ubuntu[]\napt-get remove google-cloud-sdk\n#tag::general"
},
{
"path": "kfctl_gcp_iap.v1.0.1.yaml",
"chars": 9886,
"preview": "apiVersion: kfdef.apps.kubeflow.org/v1\nkind: KfDef\nmetadata:\n namespace: kubeflow\nspec:\n applications:\n - kustomizeCo"
},
{
"path": "pipelines/ControlStructures.ipynb",
"chars": 11142,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# Simple Control structure\\n\",\n "
},
{
"path": "pipelines/Lightweight Pipeline.ipynb",
"chars": 13249,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# Setup\"\n ]\n },\n {\n \"cell_typ"
},
{
"path": "pipelines/RecommenderPipeline.ipynb",
"chars": 16063,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# Kubeflow pipeline\\n\",\n \"This i"
},
{
"path": "pipelines/download_components.sh",
"chars": 150,
"preview": "#!/bin/bash\n#tag::dlPipelineRelease[]\nwget https://github.com/kubeflow/pipelines/archive/0.2.5.tar.gz\ntar -xvf 0.2.5.tar"
},
{
"path": "recommender/Dockerfile",
"chars": 311,
"preview": "FROM tensorflow/tensorflow:1.12.0-devel-py3\nRUN pip3 install --upgrade pip\nRUN pip3 install pandas --upgrade\nRUN pip3 i"
},
{
"path": "recommender/Recommender_Kubeflow.ipynb",
"chars": 62264,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# This is implementation of the Rec"
},
{
"path": "recommender/Recommender_Kubeflow.py",
"chars": 12874,
"preview": "#!/usr/bin/env python\n# coding: utf-8\n\n# # This is implementation of the Recommender training\n#\n# This implementation ta"
},
{
"path": "recommender/docker/Dockerfile",
"chars": 375,
"preview": "FROM tensorflow/tensorflow:1.15.0-py3\nRUN pip3 install --upgrade pip\nRUN pip3 install pandas --upgrade\nRUN pip3 install"
},
{
"path": "recommender/docker/build.sh",
"chars": 86,
"preview": "#!/bin/bash\n\nimg='lightbend/ml-tf-recommender'\ntag='0.1'\ndocker build -t $img:$tag .\n\n"
},
{
"path": "recommender/tfservingchart/.helmignore",
"chars": 333,
"preview": "# Patterns to ignore when building packages.\n# This supports shell glob matching, relative path matching, and\n# negation"
},
{
"path": "recommender/tfservingchart/Chart.yaml",
"chars": 151,
"preview": "apiVersion: v1\nappVersion: 1.14.0\ndescription: TF Serving\nmaintainers:\n- name: Boris Lublinsky\nname: TF Serving Recommen"
},
{
"path": "recommender/tfservingchart/templates/NOTES.txt",
"chars": 59,
"preview": "Kubeflow Model serving components : tfserving is installed\n"
},
{
"path": "recommender/tfservingchart/templates/_helpers.tpl",
"chars": 550,
"preview": "{{/* vim: set filetype=mustache: */}}\n{{/*\nExpand the name of the chart.\n*/}}\n{{- define \"modelserverchart.name\" -}}\n{{-"
},
{
"path": "recommender/tfservingchart/templates/minioaccess.yaml",
"chars": 153,
"preview": "apiVersion: v1\nkind: Secret\nmetadata:\n name: minioaccess\n namespace: kubeflow\ndata:\n AWS_ACCESS_KEY_ID: bWluaW8=\n AW"
},
{
"path": "recommender/tfservingchart/templates/tfserving.yaml",
"chars": 2364,
"preview": "apiVersion: apps/v1\nkind: Deployment\nmetadata:\n namespace: kubeflow\n name: recommendermodelserver\n labels:\n app: r"
},
{
"path": "recommender/tfservingchart/templates/tfserving1.yaml",
"chars": 2372,
"preview": "apiVersion: apps/v1\nkind: Deployment\nmetadata:\n namespace: kubeflow\n name: recommendermodelserver1\n labels:\n app: "
},
{
"path": "recommender/tfservingchart/values.yaml",
"chars": 125,
"preview": "# application name is a namespace\n# docker images\nimage:\n server: tensorflow/serving\n pullPolicy: Always\n version: 1."
},
{
"path": "runthrough.sh",
"chars": 645,
"preview": "#!/bin/bash\nset -ex\nexample_repo_home=\"$( cd \"$( dirname \"${BASH_SOURCE[0]}\" )\" >/dev/null 2>&1 && pwd )\"\nKF_PLATFORM=${"
},
{
"path": "scikitLearn/python/IncomePrediction.ipynb",
"chars": 32527,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# Income prediction\\n\",\n \"based "
}
]
About this extraction
This page contains the full source code of the intro-to-ml-with-kubeflow/intro-to-ml-with-kubeflow-examples GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 147 files (501.5 KB), approximately 155.7k tokens, and a symbol index with 60 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.