master b00b44a88011 cached
147 files
501.5 KB
155.7k tokens
60 symbols
1 requests
Download .txt
Showing preview only (544K chars total). Download the full file or copy to clipboard to get everything.
Repository: intro-to-ml-with-kubeflow/intro-to-ml-with-kubeflow-examples
Branch: master
Commit: b00b44a88011
Files: 147
Total size: 501.5 KB

Directory structure:
gitextract_v2ir0_h2/

├── .circleci/
│   └── config.yml
├── .gitignore
├── .travis.yaml
├── LICENSE
├── README.md
├── autopep_stuff.sh
├── ch03/
│   ├── example_secret.yaml
│   ├── linux_install.sh
│   ├── mac_install.sh
│   └── minio.sh
├── ch04/
│   ├── code/
│   │   ├── ControlStructures.ipynb
│   │   ├── ControlStructures.py
│   │   ├── Lightweight Pipeline.ipynb
│   │   ├── Lightweight Pipeline.py
│   │   ├── RecommenderPipeline.ipynb
│   │   ├── RecommenderPipeline.py
│   │   └── download_components.sh
│   └── install/
│       ├── deployment.yaml
│       └── virtualservice.yaml
├── ch06/
│   ├── MLflow.ipynb
│   ├── MLflow.py
│   ├── Metadata.ipynb
│   ├── Metadata.py
│   ├── docker/
│   │   ├── Dockerfile
│   │   ├── build.sh
│   │   └── run.sh
│   └── install/
│       └── mlflowchart/
│           ├── .helmignore
│           ├── Chart.yaml
│           ├── templates/
│           │   ├── NOTES.txt
│           │   ├── _helpers.tpl
│           │   └── mlflow.yaml
│           └── values.yaml
├── ch10/
│   ├── experiment.yaml
│   ├── hptuning.py
│   └── random.yaml
├── ch2/
│   ├── Dockerfile
│   ├── build-and-push.sh
│   └── query-endpoint.py
├── ch2_seldon_examples/
│   ├── pipeline_role.yaml
│   ├── pipeline_rolebinding.yaml
│   ├── pv-claim.yaml
│   ├── pv-volume.yaml
│   ├── request_example.ipynb
│   ├── run_example.sh
│   ├── setup_example.sh
│   ├── tf_mnist_no_seldon_pipeline.py
│   ├── tiller_rbac.yaml
│   └── train_pipeline.py
├── ch9/
│   └── ctscans/
│       ├── DICOM Denoising Pipeline.ipynb
│       ├── calculate-basis-vectors/
│       │   ├── Dockerfile
│       │   ├── build-component.sh
│       │   ├── pom.xml
│       │   └── src/
│       │       └── main/
│       │           └── scala/
│       │               └── org/
│       │                   └── rawkintrevo/
│       │                       └── covid/
│       │                           └── App.scala
│       ├── download-dicom/
│       │   ├── Dockerfile
│       │   ├── build-component.sh
│       │   └── run.sh
│       ├── process-dicoms-into-vectors/
│       │   ├── Dockerfile
│       │   ├── build-component.sh
│       │   ├── data/
│       │   │   └── s.150.csv
│       │   ├── process-dicoms-into-vectors.yaml
│       │   └── src/
│       │       └── program.py
│       └── visualize-basis-vectors/
│           ├── Dockerfile
│           ├── build-component.sh
│           └── src/
│               └── program.py
├── ci.sh
├── convert_notebooks.sh
├── data-extraction/
│   ├── README.md
│   ├── github_comments_query.bsql
│   ├── github_issues_query.bsql
│   ├── iot/
│   │   ├── basic.yaml
│   │   └── build.sh
│   ├── python-notebook/
│   │   ├── AddSpamassassinDockerfile
│   │   ├── MailingListDataPrep.ipynb
│   │   ├── MailingListDataPrep.py
│   │   └── RunNBDockerfile
│   ├── python-spark/
│   │   ├── Dockerfile
│   │   ├── LaunchSparkJobs.ipynb
│   │   ├── LaunchSparkJobs.py
│   │   ├── fake_job.py
│   │   └── requirements.txt
│   ├── python-spark-notebook/
│   │   ├── AddGCSDockerfile
│   │   ├── AddPython3.6Dockerfile
│   │   ├── Dockerfile
│   │   ├── SparkMailingListForKF.ipynb
│   │   ├── SparkMailingListForKF.py
│   │   ├── build.sh
│   │   ├── dr.yaml
│   │   ├── no-saprk-tls.yaml
│   │   ├── spark-driver-service.yaml
│   │   └── virt_service.yaml
│   ├── spark-hello-world/
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   ├── hello_world_pipeline.py
│   │   ├── lr_demo/
│   │   │   ├── .gitignore
│   │   │   ├── .travis.yml
│   │   │   ├── README.md
│   │   │   ├── build.sbt
│   │   │   ├── project/
│   │   │   │   ├── build.properties
│   │   │   │   └── plugins.sbt
│   │   │   ├── sample.csv
│   │   │   ├── sbt/
│   │   │   │   └── sbt
│   │   │   └── src/
│   │   │       ├── main/
│   │   │       │   └── scala/
│   │   │       │       └── com/
│   │   │       │           └── introtomlwithkubeflow/
│   │   │       │               └── spark/
│   │   │       │                   └── demo/
│   │   │       │                       └── lr/
│   │   │       │                           ├── TrainingApp.scala
│   │   │       │                           └── TrainingPipeline.scala
│   │   │       └── test/
│   │   │           └── scala/
│   │   │               └── com/
│   │   │                   └── introtomlwithkubeflow/
│   │   │                       └── spark/
│   │   │                           └── demo/
│   │   │                               └── lr/
│   │   │                                   └── TrainingPipelineTest.scala
│   │   ├── setup.sh
│   │   ├── spark-pi-min.yaml
│   │   └── spark-pi.yaml
│   ├── stack_overflow_questions.bsql
│   └── tfx/
│       ├── TFDV.ipynb
│       ├── TFDV.py
│       ├── install_tfx.sh
│       ├── requirements.txt
│       └── run_on_dataflow_ex.py
├── dev-setup/
│   ├── install-argo.sh
│   ├── install-kf-pipeline-sdk.sh
│   ├── install-kf.sh
│   ├── install-kubectl.sh
│   ├── install-kustomize.sh
│   ├── install-microk8s.sh
│   └── jsonnet.sh
├── feature-prep/
│   ├── README.md
│   ├── spark/
│   │   ├── SparkMailingListFeaturePrep.ipynb
│   │   └── SparkMailingListFeaturePrep.py
│   └── tft/
│       ├── requirements.txt
│       └── transform.py
├── gcp-setup/
│   ├── cloudshell_scrip.sh
│   └── setup-gcp.sh
├── kfctl_gcp_iap.v1.0.1.yaml
├── pipelines/
│   ├── ControlStructures.ipynb
│   ├── Lightweight Pipeline.ipynb
│   ├── RecommenderPipeline.ipynb
│   └── download_components.sh
├── recommender/
│   ├── Dockerfile
│   ├── Recommender_Kubeflow.ipynb
│   ├── Recommender_Kubeflow.py
│   ├── docker/
│   │   ├── Dockerfile
│   │   └── build.sh
│   └── tfservingchart/
│       ├── .helmignore
│       ├── Chart.yaml
│       ├── templates/
│       │   ├── NOTES.txt
│       │   ├── _helpers.tpl
│       │   ├── minioaccess.yaml
│       │   ├── tfserving.yaml
│       │   └── tfserving1.yaml
│       └── values.yaml
├── runthrough.sh
└── scikitLearn/
    └── python/
        └── IncomePrediction.ipynb

================================================
FILE CONTENTS
================================================

================================================
FILE: .circleci/config.yml
================================================
version: 2

apt-run:  &apt-install
  name: Install apt packages
  command: |
    sudo apt-get -qq update
    sudo apt-get install -y \
      shellcheck

jobs:
  build:
    working_directory: ~/mermaid-starter
    docker:
      - image: circleci/python:3.6-jessie-node-browsers-legacy
    steps:
      - checkout
      - run: *apt-install
      - run:
        name: Run our basic shell CI
        command: ./ci.sh

================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.idea
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec


# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/

# Emacs
*~

# Ignore kfctl's downloaded
kfctl*.t*z

================================================
FILE: .travis.yaml
================================================
language: generic
sudo: true
addons:
  apt:
    packages:
     - shellcheck
script:
  - ./ci.sh

================================================
FILE: LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: README.md
================================================
# intro-to-ml-with-kubeflow-examples
Examples for the Intro to ML with Kubeflow book


================================================
FILE: autopep_stuff.sh
================================================
#!/bin/bash
# autopep8 a bunch of things that we can
autopep8 -i -r ./ \
	 --select E101,E202,E201,E203,E211,E221,E222,E223,E224,E225,E226,E227,\
	 E228,E231,E241,E242,E251,E252,E262,E271,E272,E273,E274,E301,E302,E303,\
	 E304,E305,E306,E501,E502,E711,E712,E713,E714,E721,E722,E731,W291,W293,\
	 W391,W601,W602,W603,W604,W690\
	 -j 0 --exclude "*venv*"
# Then we use YAPF because it does a better job on long-lines
yapf -i -r ./ --exclude "*venv*"


================================================
FILE: ch03/example_secret.yaml
================================================
apiVersion: v1
kind: Secret
metadata:
  name: minioaccess
  namespace: mynamespace
data:
  AWS_ACCESS_KEY_ID: xxxxxxxxxx
  AWS_SECRET_ACCESS_KEY: xxxxxxxxxxxxxxxxxxxxx


================================================
FILE: ch03/linux_install.sh
================================================
#!/bin/bash
#tag::installMCLinux[]
pushd ~/bin
wget https://dl.min.io/client/mc/release/linux-amd64/mc
chmod a+x mc
#end::installMCLinux[]


================================================
FILE: ch03/mac_install.sh
================================================
#!/bin/bash
#tag::installMCMac[]
brew install minio/stable/minio
#end::installMCMac[]


================================================
FILE: ch03/minio.sh
================================================
#!/bin/bash
set -ex

# Minio runs on port 9000 (both UI and service) so expose locally to use cli or UI
#tag::fwdMinio[]
kubectl port-forward -n kubeflow svc/minio-service 9000:9000 &
#end::fwdMinio[]

# Give it a spell to settle
sleep 10

# Kubeflow creates a minio user with password minio123 at install
#tag::configMC[]
mc config host add minio http://localhost:9000 minio minio123
#end::configMC[]

#tag::listMC[]
mc ls minio
#end::listMC[]
# Output [2018-12-13 18:23:41 CST]     0B mlpipeline/

# Make a new bucket for our work
#tag::makeBucket[]
mc mb minio/kf-book-examples
#end::makeBucket[]


================================================
FILE: ch04/code/ControlStructures.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Simple Control structure\n",
    "\n",
    "Shows how to use conditional execution"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already up-to-date: kfp in ./.local/lib/python3.6/site-packages (0.2.2.1)\n",
      "Requirement already satisfied, skipping upgrade: jsonschema>=3.0.1 in /usr/local/lib/python3.6/dist-packages (from kfp) (3.2.0)\n",
      "Requirement already satisfied, skipping upgrade: google-cloud-storage>=1.13.0 in /usr/local/lib/python3.6/dist-packages (from kfp) (1.25.0)\n",
      "Requirement already satisfied, skipping upgrade: python-dateutil in /usr/local/lib/python3.6/dist-packages (from kfp) (2.8.1)\n",
      "Requirement already satisfied, skipping upgrade: google-auth>=1.6.1 in /usr/local/lib/python3.6/dist-packages (from kfp) (1.11.0)\n",
      "Requirement already satisfied, skipping upgrade: argo-models==2.2.1a in ./.local/lib/python3.6/site-packages (from kfp) (2.2.1a0)\n",
      "Requirement already satisfied, skipping upgrade: urllib3<1.25,>=1.15 in ./.local/lib/python3.6/site-packages (from kfp) (1.24.3)\n",
      "Requirement already satisfied, skipping upgrade: cloudpickle==1.1.1 in ./.local/lib/python3.6/site-packages (from kfp) (1.1.1)\n",
      "Requirement already satisfied, skipping upgrade: kubernetes<=10.0.0,>=8.0.0 in ./.local/lib/python3.6/site-packages (from kfp) (10.0.0)\n",
      "Requirement already satisfied, skipping upgrade: click==7.0 in ./.local/lib/python3.6/site-packages (from kfp) (7.0)\n",
      "Requirement already satisfied, skipping upgrade: certifi in /usr/local/lib/python3.6/dist-packages (from kfp) (2019.11.28)\n",
      "Requirement already satisfied, skipping upgrade: PyYAML in /usr/local/lib/python3.6/dist-packages (from kfp) (5.3)\n",
      "Requirement already satisfied, skipping upgrade: kfp-server-api<=0.1.40,>=0.1.18 in ./.local/lib/python3.6/site-packages (from kfp) (0.1.40)\n",
      "Requirement already satisfied, skipping upgrade: six>=1.10 in /usr/lib/python3/dist-packages (from kfp) (1.11.0)\n",
      "Requirement already satisfied, skipping upgrade: PyJWT>=1.6.4 in ./.local/lib/python3.6/site-packages (from kfp) (1.7.1)\n",
      "Requirement already satisfied, skipping upgrade: Deprecated in ./.local/lib/python3.6/site-packages (from kfp) (1.2.7)\n",
      "Requirement already satisfied, skipping upgrade: requests-toolbelt>=0.8.0 in ./.local/lib/python3.6/site-packages (from kfp) (0.9.1)\n",
      "Requirement already satisfied, skipping upgrade: cryptography>=2.4.2 in ./.local/lib/python3.6/site-packages (from kfp) (2.8)\n",
      "Requirement already satisfied, skipping upgrade: tabulate==0.8.3 in ./.local/lib/python3.6/site-packages (from kfp) (0.8.3)\n",
      "Requirement already satisfied, skipping upgrade: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kfp) (1.4.0)\n",
      "Requirement already satisfied, skipping upgrade: attrs>=17.4.0 in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kfp) (19.3.0)\n",
      "Requirement already satisfied, skipping upgrade: pyrsistent>=0.14.0 in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kfp) (0.15.7)\n",
      "Requirement already satisfied, skipping upgrade: setuptools in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kfp) (45.1.0)\n",
      "Requirement already satisfied, skipping upgrade: google-resumable-media<0.6dev,>=0.5.0 in /usr/local/lib/python3.6/dist-packages (from google-cloud-storage>=1.13.0->kfp) (0.5.0)\n",
      "Requirement already satisfied, skipping upgrade: google-cloud-core<2.0dev,>=1.2.0 in /usr/local/lib/python3.6/dist-packages (from google-cloud-storage>=1.13.0->kfp) (1.3.0)\n",
      "Requirement already satisfied, skipping upgrade: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.6.1->kfp) (4.0.0)\n",
      "Requirement already satisfied, skipping upgrade: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.6.1->kfp) (0.2.8)\n",
      "Requirement already satisfied, skipping upgrade: rsa<4.1,>=3.1.4 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.6.1->kfp) (4.0)\n",
      "Requirement already satisfied, skipping upgrade: requests-oauthlib in /usr/local/lib/python3.6/dist-packages (from kubernetes<=10.0.0,>=8.0.0->kfp) (1.3.0)\n",
      "Requirement already satisfied, skipping upgrade: requests in /usr/local/lib/python3.6/dist-packages (from kubernetes<=10.0.0,>=8.0.0->kfp) (2.22.0)\n",
      "Requirement already satisfied, skipping upgrade: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /usr/local/lib/python3.6/dist-packages (from kubernetes<=10.0.0,>=8.0.0->kfp) (0.57.0)\n",
      "Requirement already satisfied, skipping upgrade: wrapt<2,>=1.10 in /usr/local/lib/python3.6/dist-packages (from Deprecated->kfp) (1.11.2)\n",
      "Requirement already satisfied, skipping upgrade: cffi!=1.11.3,>=1.8 in ./.local/lib/python3.6/site-packages (from cryptography>=2.4.2->kfp) (1.14.0)\n",
      "Requirement already satisfied, skipping upgrade: zipp>=0.5 in /usr/local/lib/python3.6/dist-packages (from importlib-metadata; python_version < \"3.8\"->jsonschema>=3.0.1->kfp) (2.1.0)\n",
      "Requirement already satisfied, skipping upgrade: google-api-core<2.0.0dev,>=1.16.0 in /usr/local/lib/python3.6/dist-packages (from google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (1.16.0)\n",
      "Requirement already satisfied, skipping upgrade: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.6/dist-packages (from pyasn1-modules>=0.2.1->google-auth>=1.6.1->kfp) (0.4.8)\n",
      "Requirement already satisfied, skipping upgrade: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib->kubernetes<=10.0.0,>=8.0.0->kfp) (3.1.0)\n",
      "Requirement already satisfied, skipping upgrade: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->kubernetes<=10.0.0,>=8.0.0->kfp) (3.0.4)\n",
      "Requirement already satisfied, skipping upgrade: idna<2.9,>=2.5 in /usr/lib/python3/dist-packages (from requests->kubernetes<=10.0.0,>=8.0.0->kfp) (2.6)\n",
      "Requirement already satisfied, skipping upgrade: pycparser in ./.local/lib/python3.6/site-packages (from cffi!=1.11.3,>=1.8->cryptography>=2.4.2->kfp) (2.19)\n",
      "Requirement already satisfied, skipping upgrade: protobuf>=3.4.0 in /usr/local/lib/python3.6/dist-packages (from google-api-core<2.0.0dev,>=1.16.0->google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (3.11.2)\n",
      "Requirement already satisfied, skipping upgrade: pytz in /usr/local/lib/python3.6/dist-packages (from google-api-core<2.0.0dev,>=1.16.0->google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (2019.3)\n",
      "Requirement already satisfied, skipping upgrade: googleapis-common-protos<2.0dev,>=1.6.0 in /usr/local/lib/python3.6/dist-packages (from google-api-core<2.0.0dev,>=1.16.0->google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (1.51.0)\n"
     ]
    }
   ],
   "source": [
    "!pip install kfp --upgrade --user\n",
    "\n",
    "import kfp\n",
    "from kfp import dsl\n",
    "from kfp.components import func_to_container_op, InputPath, OutputPath"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "@func_to_container_op\n",
    "def get_random_int_op(minimum: int, maximum: int) -> int:\n",
    "    \"\"\"Generate a random number between minimum and maximum (inclusive).\"\"\"\n",
    "    import random\n",
    "    result = random.randint(minimum, maximum)\n",
    "    print(result)\n",
    "    return result\n",
    "\n",
    "@func_to_container_op\n",
    "def process_small_op(data: int):\n",
    "    \"\"\"Process small numbers.\"\"\"\n",
    "    print(\"Processing small result\", data)\n",
    "    return\n",
    "\n",
    "@func_to_container_op\n",
    "def process_medium_op(data: int):\n",
    "    \"\"\"Process medium numbers.\"\"\"\n",
    "    print(\"Processing medium result\", data)\n",
    "    return\n",
    "\n",
    "@func_to_container_op\n",
    "def process_large_op(data: int):\n",
    "    \"\"\"Process large numbers.\"\"\"\n",
    "    print(\"Processing large result\", data)\n",
    "    return"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Conditional pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "@dsl.pipeline(\n",
    "    name='Conditional execution pipeline',\n",
    "    description='Shows how to use dsl.Condition().'\n",
    ")\n",
    "def conditional_pipeline():\n",
    "    number = get_random_int_op(0, 100).output\n",
    "    with dsl.Condition(number < 10):\n",
    "        process_small_op(number)\n",
    "    with dsl.Condition(number > 10 and number < 50):\n",
    "        process_medium_op(number)\n",
    "    with dsl.Condition(number > 50):\n",
    "        process_large_op(number)\n",
    "        "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Submit the pipeline for execution:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "Experiment link <a href=\"/pipeline/#/experiments/details/2abe16d1-fa2e-4f49-a3a5-acad8d36790d\" target=\"_blank\" >here</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Run link <a href=\"/pipeline/#/runs/details/293a92c5-50b2-4a96-bbd4-ebc85106f337\" target=\"_blank\" >here</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "RunPipelineResult(run_id=293a92c5-50b2-4a96-bbd4-ebc85106f337)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "kfp.Client().create_run_from_pipeline_func(conditional_pipeline, arguments={})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: ch04/code/ControlStructures.py
================================================
#!/usr/bin/env python
# coding: utf-8

# # Simple Control structure
#
# Shows how to use conditional execution

# In[1]:

get_ipython().system('pip install kfp --upgrade --user')

import kfp
from kfp import dsl
from kfp.components import func_to_container_op, InputPath, OutputPath

# # Functions

# In[2]:


@func_to_container_op
def get_random_int_op(minimum: int, maximum: int) -> int:
    """Generate a random number between minimum and maximum (inclusive)."""
    import random
    result = random.randint(minimum, maximum)
    print(result)
    return result


@func_to_container_op
def process_small_op(data: int):
    """Process small numbers."""
    print("Processing small result", data)
    return


@func_to_container_op
def process_medium_op(data: int):
    """Process medium numbers."""
    print("Processing medium result", data)
    return


@func_to_container_op
def process_large_op(data: int):
    """Process large numbers."""
    print("Processing large result", data)
    return


# # Conditional pipeline

# In[3]:


@dsl.pipeline(name='Conditional execution pipeline',
              description='Shows how to use dsl.Condition().')
def conditional_pipeline():
    number = get_random_int_op(0, 100).output
    with dsl.Condition(number < 10):
        process_small_op(number)
    with dsl.Condition(number > 10 and number < 50):
        process_medium_op(number)
    with dsl.Condition(number > 50):
        process_large_op(number)


# # Submit the pipeline for execution:

# In[4]:

kfp.Client().create_run_from_pipeline_func(conditional_pipeline, arguments={})

# In[ ]:


================================================
FILE: ch04/code/Lightweight Pipeline.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already up-to-date: kfp in ./.local/lib/python3.6/site-packages (0.2.2.1)\n",
      "Requirement already satisfied, skipping upgrade: PyYAML in /usr/local/lib/python3.6/dist-packages (from kfp) (5.3)\n",
      "Requirement already satisfied, skipping upgrade: requests-toolbelt>=0.8.0 in ./.local/lib/python3.6/site-packages (from kfp) (0.9.1)\n",
      "Requirement already satisfied, skipping upgrade: click==7.0 in ./.local/lib/python3.6/site-packages (from kfp) (7.0)\n",
      "Requirement already satisfied, skipping upgrade: Deprecated in ./.local/lib/python3.6/site-packages (from kfp) (1.2.7)\n",
      "Requirement already satisfied, skipping upgrade: kubernetes<=10.0.0,>=8.0.0 in ./.local/lib/python3.6/site-packages (from kfp) (10.0.0)\n",
      "Requirement already satisfied, skipping upgrade: six>=1.10 in /usr/lib/python3/dist-packages (from kfp) (1.11.0)\n",
      "Requirement already satisfied, skipping upgrade: certifi in /usr/local/lib/python3.6/dist-packages (from kfp) (2019.11.28)\n",
      "Requirement already satisfied, skipping upgrade: google-cloud-storage>=1.13.0 in /usr/local/lib/python3.6/dist-packages (from kfp) (1.25.0)\n",
      "Requirement already satisfied, skipping upgrade: PyJWT>=1.6.4 in ./.local/lib/python3.6/site-packages (from kfp) (1.7.1)\n",
      "Requirement already satisfied, skipping upgrade: cryptography>=2.4.2 in ./.local/lib/python3.6/site-packages (from kfp) (2.8)\n",
      "Requirement already satisfied, skipping upgrade: google-auth>=1.6.1 in /usr/local/lib/python3.6/dist-packages (from kfp) (1.11.0)\n",
      "Requirement already satisfied, skipping upgrade: cloudpickle==1.1.1 in ./.local/lib/python3.6/site-packages (from kfp) (1.1.1)\n",
      "Requirement already satisfied, skipping upgrade: kfp-server-api<=0.1.40,>=0.1.18 in ./.local/lib/python3.6/site-packages (from kfp) (0.1.40)\n",
      "Requirement already satisfied, skipping upgrade: argo-models==2.2.1a in ./.local/lib/python3.6/site-packages (from kfp) (2.2.1a0)\n",
      "Requirement already satisfied, skipping upgrade: tabulate==0.8.3 in ./.local/lib/python3.6/site-packages (from kfp) (0.8.3)\n",
      "Requirement already satisfied, skipping upgrade: python-dateutil in /usr/local/lib/python3.6/dist-packages (from kfp) (2.8.1)\n",
      "Requirement already satisfied, skipping upgrade: urllib3<1.25,>=1.15 in ./.local/lib/python3.6/site-packages (from kfp) (1.24.3)\n",
      "Requirement already satisfied, skipping upgrade: jsonschema>=3.0.1 in /usr/local/lib/python3.6/dist-packages (from kfp) (3.2.0)\n",
      "Requirement already satisfied, skipping upgrade: requests<3.0.0,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from requests-toolbelt>=0.8.0->kfp) (2.22.0)\n",
      "Requirement already satisfied, skipping upgrade: wrapt<2,>=1.10 in /usr/local/lib/python3.6/dist-packages (from Deprecated->kfp) (1.11.2)\n",
      "Requirement already satisfied, skipping upgrade: setuptools>=21.0.0 in /usr/local/lib/python3.6/dist-packages (from kubernetes<=10.0.0,>=8.0.0->kfp) (45.1.0)\n",
      "Requirement already satisfied, skipping upgrade: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /usr/local/lib/python3.6/dist-packages (from kubernetes<=10.0.0,>=8.0.0->kfp) (0.57.0)\n",
      "Requirement already satisfied, skipping upgrade: requests-oauthlib in /usr/local/lib/python3.6/dist-packages (from kubernetes<=10.0.0,>=8.0.0->kfp) (1.3.0)\n",
      "Requirement already satisfied, skipping upgrade: google-cloud-core<2.0dev,>=1.2.0 in /usr/local/lib/python3.6/dist-packages (from google-cloud-storage>=1.13.0->kfp) (1.3.0)\n",
      "Requirement already satisfied, skipping upgrade: google-resumable-media<0.6dev,>=0.5.0 in /usr/local/lib/python3.6/dist-packages (from google-cloud-storage>=1.13.0->kfp) (0.5.0)\n",
      "Requirement already satisfied, skipping upgrade: cffi!=1.11.3,>=1.8 in ./.local/lib/python3.6/site-packages (from cryptography>=2.4.2->kfp) (1.14.0)\n",
      "Requirement already satisfied, skipping upgrade: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.6.1->kfp) (4.0.0)\n",
      "Requirement already satisfied, skipping upgrade: rsa<4.1,>=3.1.4 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.6.1->kfp) (4.0)\n",
      "Requirement already satisfied, skipping upgrade: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.6.1->kfp) (0.2.8)\n",
      "Requirement already satisfied, skipping upgrade: attrs>=17.4.0 in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kfp) (19.3.0)\n",
      "Requirement already satisfied, skipping upgrade: pyrsistent>=0.14.0 in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kfp) (0.15.7)\n",
      "Requirement already satisfied, skipping upgrade: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kfp) (1.4.0)\n",
      "Requirement already satisfied, skipping upgrade: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests<3.0.0,>=2.0.1->requests-toolbelt>=0.8.0->kfp) (3.0.4)\n",
      "Requirement already satisfied, skipping upgrade: idna<2.9,>=2.5 in /usr/lib/python3/dist-packages (from requests<3.0.0,>=2.0.1->requests-toolbelt>=0.8.0->kfp) (2.6)\n",
      "Requirement already satisfied, skipping upgrade: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib->kubernetes<=10.0.0,>=8.0.0->kfp) (3.1.0)\n",
      "Requirement already satisfied, skipping upgrade: google-api-core<2.0.0dev,>=1.16.0 in /usr/local/lib/python3.6/dist-packages (from google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (1.16.0)\n",
      "Requirement already satisfied, skipping upgrade: pycparser in ./.local/lib/python3.6/site-packages (from cffi!=1.11.3,>=1.8->cryptography>=2.4.2->kfp) (2.19)\n",
      "Requirement already satisfied, skipping upgrade: pyasn1>=0.1.3 in /usr/local/lib/python3.6/dist-packages (from rsa<4.1,>=3.1.4->google-auth>=1.6.1->kfp) (0.4.8)\n",
      "Requirement already satisfied, skipping upgrade: zipp>=0.5 in /usr/local/lib/python3.6/dist-packages (from importlib-metadata; python_version < \"3.8\"->jsonschema>=3.0.1->kfp) (2.1.0)\n",
      "Requirement already satisfied, skipping upgrade: googleapis-common-protos<2.0dev,>=1.6.0 in /usr/local/lib/python3.6/dist-packages (from google-api-core<2.0.0dev,>=1.16.0->google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (1.51.0)\n",
      "Requirement already satisfied, skipping upgrade: pytz in /usr/local/lib/python3.6/dist-packages (from google-api-core<2.0.0dev,>=1.16.0->google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (2019.3)\n",
      "Requirement already satisfied, skipping upgrade: protobuf>=3.4.0 in /usr/local/lib/python3.6/dist-packages (from google-api-core<2.0.0dev,>=1.16.0->google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (3.11.2)\n"
     ]
    }
   ],
   "source": [
    "!pip install kfp --upgrade --user\n",
    "\n",
    "import kfp \n",
    "from kfp import compiler\n",
    "import kfp.dsl as dsl\n",
    "import kfp.notebook\n",
    "import kfp.components as comp\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Simple function that just add two numbers:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Define a Python function\n",
    "def add(a: float, b: float) -> float:\n",
    "   '''Calculates sum of two arguments'''\n",
    "   return a + b"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Convert the function to a pipeline operation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "add_op = comp.func_to_container_op(add)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "A bit more advanced function which demonstrates how to use imports, helper functions and produce multiple outputs."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "from typing import NamedTuple\n",
    "def my_divmod(dividend: float, divisor:float) -> NamedTuple('MyDivmodOutput', [('quotient', float), ('remainder', float)]):\n",
    "    '''Divides two numbers and calculate  the quotient and remainder'''\n",
    "    #Imports inside a component function:\n",
    "    import numpy as np\n",
    "\n",
    "    #This function demonstrates how to use nested functions inside a component function:\n",
    "    def divmod_helper(dividend, divisor):\n",
    "        return np.divmod(dividend, divisor)\n",
    "\n",
    "    (quotient, remainder) = divmod_helper(dividend, divisor)\n",
    "\n",
    "    from collections import namedtuple\n",
    "    divmod_output = namedtuple('MyDivmodOutput', ['quotient', 'remainder'])\n",
    "    return divmod_output(quotient, remainder)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Test running the python function directly"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MyDivmodOutput(quotient=14, remainder=2)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "my_divmod(100, 7)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Convert the function to a pipeline operation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "divmod_op = comp.func_to_container_op(my_divmod, base_image='tensorflow/tensorflow:1.14.0-py3')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Define the pipeline\n",
    "Pipeline function has to be decorated with the @dsl.pipeline decorator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "@dsl.pipeline(\n",
    "   name='Calculation pipeline',\n",
    "   description='A toy pipeline that performs arithmetic calculations.'\n",
    ")\n",
    "def calc_pipeline(\n",
    "   a='a',\n",
    "   b='7',\n",
    "   c='17',\n",
    "):\n",
    "    #Passing pipeline parameter and a constant value as operation arguments\n",
    "    add_task = add_op(a, 4) #Returns a dsl.ContainerOp class instance. \n",
    "    \n",
    "    #Passing a task output reference as operation arguments\n",
    "    #For an operation with a single return value, the output reference can be accessed using `task.output` or `task.outputs['output_name']` syntax\n",
    "    divmod_task = divmod_op(add_task.output, b)\n",
    "\n",
    "    #For an operation with a multiple return values, the output references can be accessed using `task.outputs['output_name']` syntax\n",
    "    result_task = add_op(divmod_task.outputs['quotient'], c)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Submit the pipeline for execution"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "Experiment link <a href=\"/pipeline/#/experiments/details/2abe16d1-fa2e-4f49-a3a5-acad8d36790d\" target=\"_blank\" >here</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Run link <a href=\"/pipeline/#/runs/details/87276776-0c3a-4d4e-99d0-4563b7f42fa5\" target=\"_blank\" >here</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "RunPipelineResult(run_id=87276776-0c3a-4d4e-99d0-4563b7f42fa5)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "client = kfp.Client()\n",
    "\n",
    "#Specify pipeline argument values\n",
    "arguments = {'a': '7', 'b': '8'}\n",
    "\n",
    "#Submit a pipeline run\n",
    "client.create_run_from_pipeline_func(calc_pipeline, arguments=arguments)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: ch04/code/Lightweight Pipeline.py
================================================
#!/usr/bin/env python
# coding: utf-8

# # Setup

# In[1]:

get_ipython().system('pip install kfp --upgrade --user')

import kfp
from kfp import compiler
import kfp.dsl as dsl
import kfp.notebook
import kfp.components as comp

# Simple function that just add two numbers:

# In[2]:


#Define a Python function
def add(a: float, b: float) -> float:
    '''Calculates sum of two arguments'''
    return a + b


# Convert the function to a pipeline operation

# In[3]:

add_op = comp.func_to_container_op(add)

# A bit more advanced function which demonstrates how to use imports, helper functions and produce multiple outputs.

# In[4]:

from typing import NamedTuple


def my_divmod(
    dividend: float, divisor: float
) -> NamedTuple('MyDivmodOutput', [('quotient', float), ('remainder', float)]):
    '''Divides two numbers and calculate  the quotient and remainder'''
    #Imports inside a component function:
    import numpy as np

    #This function demonstrates how to use nested functions inside a component function:
    def divmod_helper(dividend, divisor):
        return np.divmod(dividend, divisor)

    (quotient, remainder) = divmod_helper(dividend, divisor)

    from collections import namedtuple
    divmod_output = namedtuple('MyDivmodOutput', ['quotient', 'remainder'])
    return divmod_output(quotient, remainder)


# Test running the python function directly

# In[5]:

my_divmod(100, 7)

# Convert the function to a pipeline operation

# In[6]:

divmod_op = comp.func_to_container_op(
    my_divmod, base_image='tensorflow/tensorflow:1.14.0-py3')

# Define the pipeline
# Pipeline function has to be decorated with the @dsl.pipeline decorator

# In[7]:


@dsl.pipeline(
    name='Calculation pipeline',
    description='A toy pipeline that performs arithmetic calculations.')
def calc_pipeline(
    a='a',
    b='7',
    c='17',
):
    #Passing pipeline parameter and a constant value as operation arguments
    add_task = add_op(a, 4)  # Returns a dsl.ContainerOp class instance.

    #Passing a task output reference as operation arguments
    #For an operation with a single return value, the output reference can be accessed using `task.output` or `task.outputs['output_name']` syntax
    divmod_task = divmod_op(add_task.output, b)

    #For an operation with a multiple return values, the output references can be accessed using `task.outputs['output_name']` syntax
    result_task = add_op(divmod_task.outputs['quotient'], c)


# Submit the pipeline for execution

# In[8]:

client = kfp.Client()

#Specify pipeline argument values
arguments = {'a': '7', 'b': '8'}

#Submit a pipeline run
client.create_run_from_pipeline_func(calc_pipeline, arguments=arguments)

# In[ ]:


================================================
FILE: ch04/code/RecommenderPipeline.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Kubeflow pipeline\n",
    "This is a fairly simple pipeline, containing sequential steps:\n",
    "\n",
    "1. Update data - implemented by lightbend/recommender-data-update-publisher:0.2 image\n",
    "2. Run model training. Ideally we would run TFJob, but due to the current limitations for pipelines, we will directly use an image implementing training lightbend/ml-tf-recommender:0.1\n",
    "3. Update serving model - implemented by lightbend/recommender-model-publisher:0.2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already up-to-date: kubernetes in ./.local/lib/python3.6/site-packages (10.0.1)\n",
      "Requirement already satisfied, skipping upgrade: pyyaml>=3.12 in /usr/local/lib/python3.6/dist-packages (from kubernetes) (5.3)\n",
      "Requirement already satisfied, skipping upgrade: six>=1.9.0 in /usr/lib/python3/dist-packages (from kubernetes) (1.11.0)\n",
      "Requirement already satisfied, skipping upgrade: urllib3>=1.24.2 in ./.local/lib/python3.6/site-packages (from kubernetes) (1.24.3)\n",
      "Requirement already satisfied, skipping upgrade: requests-oauthlib in /usr/local/lib/python3.6/dist-packages (from kubernetes) (1.3.0)\n",
      "Requirement already satisfied, skipping upgrade: certifi>=14.05.14 in /usr/local/lib/python3.6/dist-packages (from kubernetes) (2019.11.28)\n",
      "Requirement already satisfied, skipping upgrade: python-dateutil>=2.5.3 in /usr/local/lib/python3.6/dist-packages (from kubernetes) (2.8.1)\n",
      "Requirement already satisfied, skipping upgrade: setuptools>=21.0.0 in /usr/local/lib/python3.6/dist-packages (from kubernetes) (45.1.0)\n",
      "Requirement already satisfied, skipping upgrade: requests in /usr/local/lib/python3.6/dist-packages (from kubernetes) (2.22.0)\n",
      "Requirement already satisfied, skipping upgrade: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /usr/local/lib/python3.6/dist-packages (from kubernetes) (0.57.0)\n",
      "Requirement already satisfied, skipping upgrade: google-auth>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from kubernetes) (1.11.0)\n",
      "Requirement already satisfied, skipping upgrade: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib->kubernetes) (3.1.0)\n",
      "Requirement already satisfied, skipping upgrade: idna<2.9,>=2.5 in /usr/lib/python3/dist-packages (from requests->kubernetes) (2.6)\n",
      "Requirement already satisfied, skipping upgrade: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->kubernetes) (3.0.4)\n",
      "Requirement already satisfied, skipping upgrade: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.0.1->kubernetes) (0.2.8)\n",
      "Requirement already satisfied, skipping upgrade: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.0.1->kubernetes) (4.0.0)\n",
      "Requirement already satisfied, skipping upgrade: rsa<4.1,>=3.1.4 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.0.1->kubernetes) (4.0)\n",
      "Requirement already satisfied, skipping upgrade: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.6/dist-packages (from pyasn1-modules>=0.2.1->google-auth>=1.0.1->kubernetes) (0.4.8)\n",
      "Requirement already up-to-date: kfp in ./.local/lib/python3.6/site-packages (0.2.2.1)\n",
      "Requirement already satisfied, skipping upgrade: PyJWT>=1.6.4 in ./.local/lib/python3.6/site-packages (from kfp) (1.7.1)\n",
      "Requirement already satisfied, skipping upgrade: requests-toolbelt>=0.8.0 in ./.local/lib/python3.6/site-packages (from kfp) (0.9.1)\n",
      "Requirement already satisfied, skipping upgrade: python-dateutil in /usr/local/lib/python3.6/dist-packages (from kfp) (2.8.1)\n",
      "Requirement already satisfied, skipping upgrade: PyYAML in /usr/local/lib/python3.6/dist-packages (from kfp) (5.3)\n",
      "Requirement already satisfied, skipping upgrade: kfp-server-api<=0.1.40,>=0.1.18 in ./.local/lib/python3.6/site-packages (from kfp) (0.1.40)\n",
      "Requirement already satisfied, skipping upgrade: google-cloud-storage>=1.13.0 in /usr/local/lib/python3.6/dist-packages (from kfp) (1.25.0)\n",
      "Requirement already satisfied, skipping upgrade: jsonschema>=3.0.1 in /usr/local/lib/python3.6/dist-packages (from kfp) (3.2.0)\n",
      "Requirement already satisfied, skipping upgrade: Deprecated in ./.local/lib/python3.6/site-packages (from kfp) (1.2.7)\n",
      "Requirement already satisfied, skipping upgrade: google-auth>=1.6.1 in /usr/local/lib/python3.6/dist-packages (from kfp) (1.11.0)\n",
      "Collecting kubernetes<=10.0.0,>=8.0.0\n",
      "  Using cached kubernetes-10.0.0-py2.py3-none-any.whl (1.5 MB)\n",
      "Requirement already satisfied, skipping upgrade: argo-models==2.2.1a in ./.local/lib/python3.6/site-packages (from kfp) (2.2.1a0)\n",
      "Requirement already satisfied, skipping upgrade: urllib3<1.25,>=1.15 in ./.local/lib/python3.6/site-packages (from kfp) (1.24.3)\n",
      "Requirement already satisfied, skipping upgrade: certifi in /usr/local/lib/python3.6/dist-packages (from kfp) (2019.11.28)\n",
      "Requirement already satisfied, skipping upgrade: tabulate==0.8.3 in ./.local/lib/python3.6/site-packages (from kfp) (0.8.3)\n",
      "Requirement already satisfied, skipping upgrade: click==7.0 in ./.local/lib/python3.6/site-packages (from kfp) (7.0)\n",
      "Requirement already satisfied, skipping upgrade: cloudpickle==1.1.1 in ./.local/lib/python3.6/site-packages (from kfp) (1.1.1)\n",
      "Requirement already satisfied, skipping upgrade: six>=1.10 in /usr/lib/python3/dist-packages (from kfp) (1.11.0)\n",
      "Requirement already satisfied, skipping upgrade: cryptography>=2.4.2 in ./.local/lib/python3.6/site-packages (from kfp) (2.8)\n",
      "Requirement already satisfied, skipping upgrade: requests<3.0.0,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from requests-toolbelt>=0.8.0->kfp) (2.22.0)\n",
      "Requirement already satisfied, skipping upgrade: google-resumable-media<0.6dev,>=0.5.0 in /usr/local/lib/python3.6/dist-packages (from google-cloud-storage>=1.13.0->kfp) (0.5.0)\n",
      "Requirement already satisfied, skipping upgrade: google-cloud-core<2.0dev,>=1.2.0 in /usr/local/lib/python3.6/dist-packages (from google-cloud-storage>=1.13.0->kfp) (1.3.0)\n",
      "Requirement already satisfied, skipping upgrade: setuptools in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kfp) (45.1.0)\n",
      "Requirement already satisfied, skipping upgrade: attrs>=17.4.0 in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kfp) (19.3.0)\n",
      "Requirement already satisfied, skipping upgrade: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kfp) (1.4.0)\n",
      "Requirement already satisfied, skipping upgrade: pyrsistent>=0.14.0 in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kfp) (0.15.7)\n",
      "Requirement already satisfied, skipping upgrade: wrapt<2,>=1.10 in /usr/local/lib/python3.6/dist-packages (from Deprecated->kfp) (1.11.2)\n",
      "Requirement already satisfied, skipping upgrade: rsa<4.1,>=3.1.4 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.6.1->kfp) (4.0)\n",
      "Requirement already satisfied, skipping upgrade: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.6.1->kfp) (4.0.0)\n",
      "Requirement already satisfied, skipping upgrade: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.6.1->kfp) (0.2.8)\n",
      "Requirement already satisfied, skipping upgrade: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /usr/local/lib/python3.6/dist-packages (from kubernetes<=10.0.0,>=8.0.0->kfp) (0.57.0)\n",
      "Requirement already satisfied, skipping upgrade: requests-oauthlib in /usr/local/lib/python3.6/dist-packages (from kubernetes<=10.0.0,>=8.0.0->kfp) (1.3.0)\n",
      "Requirement already satisfied, skipping upgrade: cffi!=1.11.3,>=1.8 in ./.local/lib/python3.6/site-packages (from cryptography>=2.4.2->kfp) (1.14.0)\n",
      "Requirement already satisfied, skipping upgrade: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests<3.0.0,>=2.0.1->requests-toolbelt>=0.8.0->kfp) (3.0.4)\n",
      "Requirement already satisfied, skipping upgrade: idna<2.9,>=2.5 in /usr/lib/python3/dist-packages (from requests<3.0.0,>=2.0.1->requests-toolbelt>=0.8.0->kfp) (2.6)\n",
      "Requirement already satisfied, skipping upgrade: google-api-core<2.0.0dev,>=1.16.0 in /usr/local/lib/python3.6/dist-packages (from google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (1.16.0)\n",
      "Requirement already satisfied, skipping upgrade: zipp>=0.5 in /usr/local/lib/python3.6/dist-packages (from importlib-metadata; python_version < \"3.8\"->jsonschema>=3.0.1->kfp) (2.1.0)\n",
      "Requirement already satisfied, skipping upgrade: pyasn1>=0.1.3 in /usr/local/lib/python3.6/dist-packages (from rsa<4.1,>=3.1.4->google-auth>=1.6.1->kfp) (0.4.8)\n",
      "Requirement already satisfied, skipping upgrade: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib->kubernetes<=10.0.0,>=8.0.0->kfp) (3.1.0)\n",
      "Requirement already satisfied, skipping upgrade: pycparser in ./.local/lib/python3.6/site-packages (from cffi!=1.11.3,>=1.8->cryptography>=2.4.2->kfp) (2.19)\n",
      "Requirement already satisfied, skipping upgrade: googleapis-common-protos<2.0dev,>=1.6.0 in /usr/local/lib/python3.6/dist-packages (from google-api-core<2.0.0dev,>=1.16.0->google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (1.51.0)\n",
      "Requirement already satisfied, skipping upgrade: protobuf>=3.4.0 in /usr/local/lib/python3.6/dist-packages (from google-api-core<2.0.0dev,>=1.16.0->google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (3.11.2)\n",
      "Requirement already satisfied, skipping upgrade: pytz in /usr/local/lib/python3.6/dist-packages (from google-api-core<2.0.0dev,>=1.16.0->google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (2019.3)\n",
      "Installing collected packages: kubernetes\n",
      "  Attempting uninstall: kubernetes\n",
      "    Found existing installation: kubernetes 10.0.1\n",
      "    Uninstalling kubernetes-10.0.1:\n",
      "      Successfully uninstalled kubernetes-10.0.1\n",
      "Successfully installed kubernetes-10.0.0\n"
     ]
    }
   ],
   "source": [
    "!pip install kubernetes --upgrade --user\n",
    "!pip install kfp --upgrade --user\n",
    "\n",
    "\n",
    "import kfp  # the Pipelines SDK.  This library is included with the notebook image.\n",
    "from kfp import compiler\n",
    "import kfp.dsl as dsl\n",
    "import kfp.notebook\n",
    "from kubernetes import client as k8s_client"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Create/Get an Experiment in the Kubeflow Pipeline System\n",
    "The Kubeflow Pipeline system requires an \"Experiment\" to group pipeline runs. You can create a new experiment, or call client.list_experiments() to get existing ones."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "client = kfp.Client()\n",
    "client.list_experiments()\n",
    "#exp = client.create_experiment(name='mdupdate')\n",
    "exp = client.get_experiment(experiment_name ='mdupdate')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Define a Pipeline\n",
    "Authoring a pipeline is like authoring a normal Python function. The pipeline function describes the topology of the pipeline.\n",
    "\n",
    "Each step in the pipeline is typically a ContainerOp --- a simple class or function describing how to interact with a docker container image. In the pipeline, all the container images referenced in the pipeline are already built."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "@dsl.pipeline(\n",
    "  name='Recommender model update',\n",
    "  description='Demonstrate usage of pipelines for multi-step model update'\n",
    ")\n",
    "def recommender_pipeline():\n",
    "    # Load new data\n",
    "  data = dsl.ContainerOp(\n",
    "      name='updatedata',\n",
    "      image='lightbend/recommender-data-update-publisher:0.2') \\\n",
    "    .add_env_variable(k8s_client.V1EnvVar(name='MINIO_URL',value='http://minio-service.kubeflow.svc.cluster.local:9000')) \\\n",
    "    .add_env_variable(k8s_client.V1EnvVar(name='MINIO_KEY', value='minio')) \\\n",
    "    .add_env_variable(k8s_client.V1EnvVar(name='MINIO_SECRET', value='minio123'))\n",
    "    # Train the model\n",
    "  train = dsl.ContainerOp(\n",
    "      name='trainmodel',\n",
    "      image='lightbend/ml-tf-recommender:0.1') \\\n",
    "    .add_env_variable(k8s_client.V1EnvVar(name='MINIO_URL',value='minio-service.kubeflow.svc.cluster.local:9000')) \\\n",
    "    .add_env_variable(k8s_client.V1EnvVar(name='MINIO_KEY', value='minio')) \\\n",
    "    .add_env_variable(k8s_client.V1EnvVar(name='MINIO_SECRET', value='minio123'))\n",
    "  train.after(data)\n",
    "    # Publish new model model\n",
    "  publish = dsl.ContainerOp(\n",
    "      name='publishmodel',\n",
    "      image='lightbend/recommender-model-publisher:0.2') \\\n",
    "    .add_env_variable(k8s_client.V1EnvVar(name='MINIO_URL',value='http://minio-service.kubeflow.svc.cluster.local:9000')) \\\n",
    "    .add_env_variable(k8s_client.V1EnvVar(name='MINIO_KEY', value='minio')) \\\n",
    "    .add_env_variable(k8s_client.V1EnvVar(name='MINIO_SECRET', value='minio123')) \\\n",
    "    .add_env_variable(k8s_client.V1EnvVar(name='KAFKA_BROKERS', value='cloudflow-kafka-brokers.cloudflow.svc.cluster.local:9092')) \\\n",
    "    .add_env_variable(k8s_client.V1EnvVar(name='DEFAULT_RECOMMENDER_URL', value='http://recommendermodelserver.kubeflow.svc.cluster.local:8501')) \\\n",
    "    .add_env_variable(k8s_client.V1EnvVar(name='ALTERNATIVE_RECOMMENDER_URL', value='http://recommendermodelserver1.kubeflow.svc.cluster.local:8501'))\n",
    "  publish.after(train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Compile pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "compiler.Compiler().compile(recommender_pipeline, 'pipeline.tar.gz')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Submit an experiment run"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "Run link <a href=\"/pipeline/#/runs/details/df24284c-c7a1-480e-91b6-398bd352f164\" target=\"_blank\" >here</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "run = client.run_pipeline(exp.id, 'pipeline1', 'pipeline.tar.gz')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: ch04/code/RecommenderPipeline.py
================================================
#!/usr/bin/env python
# coding: utf-8

# # Kubeflow pipeline
# This is a fairly simple pipeline, containing sequential steps:
#
# 1. Update data - implemented by lightbend/recommender-data-update-publisher:0.2 image
# 2. Run model training. Ideally we would run TFJob, but due to the current limitations for pipelines, we will directly use an image implementing training lightbend/ml-tf-recommender:0.1
# 3. Update serving model - implemented by lightbend/recommender-model-publisher:0.2

# # Setup

# In[1]:

get_ipython().system('pip install kubernetes --upgrade --user')
get_ipython().system('pip install kfp --upgrade --user')

# the Pipelines SDK.  This library is included with the notebook image.
import kfp
from kfp import compiler
import kfp.dsl as dsl
import kfp.notebook
from kubernetes import client as k8s_client

# # Create/Get an Experiment in the Kubeflow Pipeline System
# The Kubeflow Pipeline system requires an "Experiment" to group pipeline runs. You can create a new experiment, or call client.list_experiments() to get existing ones.

# In[3]:

client = kfp.Client()
client.list_experiments()
#exp = client.create_experiment(name='mdupdate')
exp = client.get_experiment(experiment_name='mdupdate')

# # Define a Pipeline
# Authoring a pipeline is like authoring a normal Python function. The pipeline function describes the topology of the pipeline.
#
# Each step in the pipeline is typically a ContainerOp --- a simple class or function describing how to interact with a docker container image. In the pipeline, all the container images referenced in the pipeline are already built.

# In[4]:


@dsl.pipeline(
    name='Recommender model update',
    description='Demonstrate usage of pipelines for multi-step model update')
def recommender_pipeline():
    # Load new data
    data = dsl.ContainerOp(
        name='updatedata',
        image='lightbend/recommender-data-update-publisher:0.2') \
      .add_env_variable(k8s_client.V1EnvVar(name='MINIO_URL', value='http://minio-service.kubeflow.svc.cluster.local:9000')) \
      .add_env_variable(k8s_client.V1EnvVar(name='MINIO_KEY', value='minio')) \
      .add_env_variable(k8s_client.V1EnvVar(name='MINIO_SECRET', value='minio123'))
    # Train the model
    train = dsl.ContainerOp(
        name='trainmodel',
        image='lightbend/ml-tf-recommender:0.1') \
      .add_env_variable(k8s_client.V1EnvVar(name='MINIO_URL', value='minio-service.kubeflow.svc.cluster.local:9000')) \
      .add_env_variable(k8s_client.V1EnvVar(name='MINIO_KEY', value='minio')) \
      .add_env_variable(k8s_client.V1EnvVar(name='MINIO_SECRET', value='minio123'))
    train.after(data)
    # Publish new model model
    publish = dsl.ContainerOp(
        name='publishmodel',
        image='lightbend/recommender-model-publisher:0.2') \
      .add_env_variable(k8s_client.V1EnvVar(name='MINIO_URL', value='http://minio-service.kubeflow.svc.cluster.local:9000')) \
      .add_env_variable(k8s_client.V1EnvVar(name='MINIO_KEY', value='minio')) \
      .add_env_variable(k8s_client.V1EnvVar(name='MINIO_SECRET', value='minio123')) \
      .add_env_variable(k8s_client.V1EnvVar(name='KAFKA_BROKERS', value='cloudflow-kafka-brokers.cloudflow.svc.cluster.local:9092')) \
      .add_env_variable(k8s_client.V1EnvVar(name='DEFAULT_RECOMMENDER_URL', value='http://recommendermodelserver.kubeflow.svc.cluster.local:8501')) \
      .add_env_variable(k8s_client.V1EnvVar(name='ALTERNATIVE_RECOMMENDER_URL', value='http://recommendermodelserver1.kubeflow.svc.cluster.local:8501'))
    publish.after(train)


# # Compile pipeline

# In[5]:

compiler.Compiler().compile(recommender_pipeline, 'pipeline.tar.gz')

# # Submit an experiment run

# In[6]:

run = client.run_pipeline(exp.id, 'pipeline1', 'pipeline.tar.gz')

# In[ ]:


================================================
FILE: ch04/code/download_components.sh
================================================
#!/bin/bash
#tag::dlPipelineRelease[]
wget https://github.com/kubeflow/pipelines/archive/0.2.5.tar.gz
tar -xvf 0.2.5.tar.gz
#end::dlPipelineRelease[]


================================================
FILE: ch04/install/deployment.yaml
================================================
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
  labels:
    app: argo-ui
    app.kubernetes.io/component: argo
    app.kubernetes.io/instance: argo-v2.3.0
    app.kubernetes.io/managed-by: kfctl
    app.kubernetes.io/name: argo
    app.kubernetes.io/part-of: kubeflow
    app.kubernetes.io/version: v2.3.0
    kustomize.component: argo
  name: argo-ui
  namespace: kubeflow
spec:
  progressDeadlineSeconds: 600
  replicas: 1
  revisionHistoryLimit: 10
  selector:
    matchLabels:
      app: argo-ui
      app.kubernetes.io/component: argo
      app.kubernetes.io/instance: argo-v2.3.0
      app.kubernetes.io/managed-by: kfctl
      app.kubernetes.io/name: argo
      app.kubernetes.io/part-of: kubeflow
      app.kubernetes.io/version: v2.3.0
      kustomize.component: argo
  strategy:
    rollingUpdate:
      maxSurge: 25%
      maxUnavailable: 25%
    type: RollingUpdate
  template:
    metadata:
      annotations:
        sidecar.istio.io/inject: "false"
      creationTimestamp: null
      labels:
        app: argo-ui
        app.kubernetes.io/component: argo
        app.kubernetes.io/instance: argo-v2.3.0
        app.kubernetes.io/managed-by: kfctl
        app.kubernetes.io/name: argo
        app.kubernetes.io/part-of: kubeflow
        app.kubernetes.io/version: v2.3.0
        kustomize.component: argo
    spec:
      containers:
        - env:
            - name: ARGO_NAMESPACE
              valueFrom:
                fieldRef:
                  apiVersion: v1
                  fieldPath: metadata.namespace
            - name: IN_CLUSTER
              value: "true"
            - name: ENABLE_WEB_CONSOLE
              value: "true"
            - name: BASE_HREF
              value: /
          image: argoproj/argoui:v2.3.0
          imagePullPolicy: IfNotPresent
          name: argo-ui
          ports:
            - containerPort: 8001
              name: ui
              protocol: TCP
          readinessProbe:
            failureThreshold: 3
            httpGet:
              path: /
              port: 8001
              scheme: HTTP
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 1
          resources: {}
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: File
      dnsPolicy: ClusterFirst
      restartPolicy: Always
      schedulerName: default-scheduler
      securityContext: {}
      serviceAccount: argo-ui
      serviceAccountName: argo-ui
      terminationGracePeriodSeconds: 30

================================================
FILE: ch04/install/virtualservice.yaml
================================================
apiVersion: networking.istio.io/v1alpha3
kind: VirtualService
metadata:
  name: argo-ui
  namespace: kubeflow
spec:
  gateways:
    - kubeflow-gateway
  hosts:
    - '*'
  http:
    - match:
        - uri:
            prefix: /argo/
      rewrite:
        uri: /
      route:
        - destination:
            host: argo-ui.kubeflow.svc.cluster.local
            port:
              number: 80

================================================
FILE: ch06/MLflow.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# mlflow-energyforecast\n",
    "\n",
    "This is a showcase for ML Flow capabilities, based on the article\n",
    "http://the-odd-dataguy.com/be-more-efficient-to-produce-ml-models-with-mlflow\n",
    "and a github https://github.com/jeanmidevacc/mlflow-energyforecast\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting pandas\n",
      "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/12/d1/a6502c2f5c15b50f5dd579fc1c52b47edf6f2e9f682aed917dd7565b3e60/pandas-1.0.0-cp36-cp36m-manylinux1_x86_64.whl (10.1MB)\n",
      "\u001b[K     |████████████████████████████████| 10.1MB 3.2MB/s eta 0:00:01\n",
      "\u001b[?25hRequirement already satisfied, skipping upgrade: numpy>=1.13.3 in ./.local/lib/python3.6/site-packages (from pandas) (1.18.1)\n",
      "Requirement already satisfied, skipping upgrade: python-dateutil>=2.6.1 in /usr/local/lib/python3.6/dist-packages (from pandas) (2.8.0)\n",
      "Requirement already satisfied, skipping upgrade: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas) (2019.2)\n",
      "Requirement already satisfied, skipping upgrade: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.6.1->pandas) (1.11.0)\n",
      "Installing collected packages: pandas\n",
      "  Found existing installation: pandas 0.25.3\n",
      "    Uninstalling pandas-0.25.3:\n",
      "      Successfully uninstalled pandas-0.25.3\n",
      "Successfully installed pandas-1.0.0\n",
      "\u001b[33mWARNING: You are using pip version 19.1.1, however version 20.0.2 is available.\n",
      "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n",
      "Collecting mlflow\n",
      "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/65/33/5fe1559f7eb95e1fa2077df747ada7fd225045bad4e76bcdb53605e4b937/mlflow-1.6.0.tar.gz (15.9MB)\n",
      "\u001b[K     |████████████████████████████████| 15.9MB 3.0MB/s eta 0:00:01\n",
      "\u001b[?25hRequirement already satisfied, skipping upgrade: alembic in ./.local/lib/python3.6/site-packages (from mlflow) (1.3.2)\n",
      "Requirement already satisfied, skipping upgrade: click>=7.0 in /usr/local/lib/python3.6/dist-packages (from mlflow) (7.0)\n",
      "Requirement already satisfied, skipping upgrade: cloudpickle in ./.local/lib/python3.6/site-packages (from mlflow) (1.1.1)\n",
      "Requirement already satisfied, skipping upgrade: databricks-cli>=0.8.7 in ./.local/lib/python3.6/site-packages (from mlflow) (0.9.1)\n",
      "Requirement already satisfied, skipping upgrade: requests>=2.17.3 in /usr/local/lib/python3.6/dist-packages (from mlflow) (2.22.0)\n",
      "Requirement already satisfied, skipping upgrade: six>=1.10.0 in /usr/lib/python3/dist-packages (from mlflow) (1.11.0)\n",
      "Requirement already satisfied, skipping upgrade: Flask in ./.local/lib/python3.6/site-packages (from mlflow) (1.1.1)\n",
      "Requirement already satisfied, skipping upgrade: numpy in ./.local/lib/python3.6/site-packages (from mlflow) (1.18.1)\n",
      "Requirement already satisfied, skipping upgrade: pandas in ./.local/lib/python3.6/site-packages (from mlflow) (1.0.0)\n",
      "Requirement already satisfied, skipping upgrade: python-dateutil in /usr/local/lib/python3.6/dist-packages (from mlflow) (2.8.0)\n",
      "Requirement already satisfied, skipping upgrade: protobuf>=3.6.0 in /usr/local/lib/python3.6/dist-packages (from mlflow) (3.8.0)\n",
      "Requirement already satisfied, skipping upgrade: gitpython>=2.1.0 in ./.local/lib/python3.6/site-packages (from mlflow) (3.0.5)\n",
      "Requirement already satisfied, skipping upgrade: pyyaml in /usr/local/lib/python3.6/dist-packages (from mlflow) (5.1.2)\n",
      "Requirement already satisfied, skipping upgrade: querystring_parser in ./.local/lib/python3.6/site-packages (from mlflow) (1.2.4)\n",
      "Requirement already satisfied, skipping upgrade: simplejson in ./.local/lib/python3.6/site-packages (from mlflow) (3.17.0)\n",
      "Requirement already satisfied, skipping upgrade: docker>=4.0.0 in /usr/local/lib/python3.6/dist-packages (from mlflow) (4.0.2)\n",
      "Requirement already satisfied, skipping upgrade: entrypoints in /usr/local/lib/python3.6/dist-packages (from mlflow) (0.3)\n",
      "Requirement already satisfied, skipping upgrade: sqlparse in ./.local/lib/python3.6/site-packages (from mlflow) (0.3.0)\n",
      "Requirement already satisfied, skipping upgrade: sqlalchemy in ./.local/lib/python3.6/site-packages (from mlflow) (1.3.12)\n",
      "Requirement already satisfied, skipping upgrade: gorilla in ./.local/lib/python3.6/site-packages (from mlflow) (0.3.0)\n",
      "Requirement already satisfied, skipping upgrade: prometheus-flask-exporter in ./.local/lib/python3.6/site-packages (from mlflow) (0.12.1)\n",
      "Requirement already satisfied, skipping upgrade: gunicorn in ./.local/lib/python3.6/site-packages (from mlflow) (20.0.4)\n",
      "Requirement already satisfied, skipping upgrade: Mako in ./.local/lib/python3.6/site-packages (from alembic->mlflow) (1.1.0)\n",
      "Requirement already satisfied, skipping upgrade: python-editor>=0.3 in ./.local/lib/python3.6/site-packages (from alembic->mlflow) (1.0.4)\n",
      "Requirement already satisfied, skipping upgrade: configparser>=0.3.5 in ./.local/lib/python3.6/site-packages (from databricks-cli>=0.8.7->mlflow) (4.0.2)\n",
      "Requirement already satisfied, skipping upgrade: tabulate>=0.7.7 in /usr/local/lib/python3.6/dist-packages (from databricks-cli>=0.8.7->mlflow) (0.8.3)\n",
      "Requirement already satisfied, skipping upgrade: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests>=2.17.3->mlflow) (3.0.4)\n",
      "Requirement already satisfied, skipping upgrade: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests>=2.17.3->mlflow) (2019.9.11)\n",
      "Requirement already satisfied, skipping upgrade: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests>=2.17.3->mlflow) (1.24.3)\n",
      "Requirement already satisfied, skipping upgrade: idna<2.9,>=2.5 in /usr/lib/python3/dist-packages (from requests>=2.17.3->mlflow) (2.6)\n",
      "Requirement already satisfied, skipping upgrade: Jinja2>=2.10.1 in /usr/local/lib/python3.6/dist-packages (from Flask->mlflow) (2.10.1)\n",
      "Requirement already satisfied, skipping upgrade: itsdangerous>=0.24 in ./.local/lib/python3.6/site-packages (from Flask->mlflow) (1.1.0)\n",
      "Requirement already satisfied, skipping upgrade: Werkzeug>=0.15 in /usr/local/lib/python3.6/dist-packages (from Flask->mlflow) (0.15.4)\n",
      "Requirement already satisfied, skipping upgrade: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas->mlflow) (2019.2)\n",
      "Requirement already satisfied, skipping upgrade: setuptools in /usr/local/lib/python3.6/dist-packages (from protobuf>=3.6.0->mlflow) (41.0.1)\n",
      "Requirement already satisfied, skipping upgrade: gitdb2>=2.0.0 in ./.local/lib/python3.6/site-packages (from gitpython>=2.1.0->mlflow) (2.0.6)\n",
      "Requirement already satisfied, skipping upgrade: websocket-client>=0.32.0 in /usr/local/lib/python3.6/dist-packages (from docker>=4.0.0->mlflow) (0.56.0)\n",
      "Requirement already satisfied, skipping upgrade: prometheus-client in /usr/local/lib/python3.6/dist-packages (from prometheus-flask-exporter->mlflow) (0.7.1)\n",
      "Requirement already satisfied, skipping upgrade: MarkupSafe>=0.9.2 in /usr/local/lib/python3.6/dist-packages (from Mako->alembic->mlflow) (1.1.1)\n",
      "Requirement already satisfied, skipping upgrade: smmap2>=2.0.0 in ./.local/lib/python3.6/site-packages (from gitdb2>=2.0.0->gitpython>=2.1.0->mlflow) (2.0.5)\n",
      "Building wheels for collected packages: mlflow\n",
      "  Building wheel for mlflow (setup.py) ... \u001b[?25ldone\n",
      "\u001b[?25h  Stored in directory: /home/jovyan/.cache/pip/wheels/46/4e/83/e58b14b6d2d494783e31690de9572c5777882f675f480374b6\n",
      "Successfully built mlflow\n",
      "Installing collected packages: mlflow\n",
      "  Found existing installation: mlflow 1.5.0\n",
      "    Uninstalling mlflow-1.5.0:\n",
      "      Successfully uninstalled mlflow-1.5.0\n",
      "\u001b[33m  WARNING: The script mlflow is installed in '/home/jovyan/.local/bin' which is not on PATH.\n",
      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\n",
      "Successfully installed mlflow-1.6.0\n",
      "\u001b[33mWARNING: You are using pip version 19.1.1, however version 20.0.2 is available.\n",
      "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n",
      "Requirement already up-to-date: joblib in ./.local/lib/python3.6/site-packages (0.14.1)\n",
      "\u001b[33mWARNING: You are using pip version 19.1.1, however version 20.0.2 is available.\n",
      "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n",
      "Requirement already up-to-date: numpy in ./.local/lib/python3.6/site-packages (1.18.1)\n",
      "\u001b[33mWARNING: You are using pip version 19.1.1, however version 20.0.2 is available.\n",
      "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n",
      "Requirement already up-to-date: scipy in ./.local/lib/python3.6/site-packages (1.4.1)\n",
      "Requirement already satisfied, skipping upgrade: numpy>=1.13.3 in ./.local/lib/python3.6/site-packages (from scipy) (1.18.1)\n",
      "\u001b[33mWARNING: You are using pip version 19.1.1, however version 20.0.2 is available.\n",
      "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n",
      "Requirement already up-to-date: scikit-learn in ./.local/lib/python3.6/site-packages (0.22.1)\n",
      "Requirement already satisfied, skipping upgrade: numpy>=1.11.0 in ./.local/lib/python3.6/site-packages (from scikit-learn) (1.18.1)\n",
      "Requirement already satisfied, skipping upgrade: scipy>=0.17.0 in ./.local/lib/python3.6/site-packages (from scikit-learn) (1.4.1)\n",
      "Requirement already satisfied, skipping upgrade: joblib>=0.11 in ./.local/lib/python3.6/site-packages (from scikit-learn) (0.14.1)\n",
      "\u001b[33mWARNING: You are using pip version 19.1.1, however version 20.0.2 is available.\n",
      "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n",
      "Collecting boto3\n",
      "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/d5/57/e9675a5a8d0ee586594ff19cb9a601334fbf24fa2fb29052d2a900ee5d23/boto3-1.11.9-py2.py3-none-any.whl (128kB)\n",
      "\u001b[K     |████████████████████████████████| 133kB 3.5MB/s eta 0:00:01\n",
      "\u001b[?25hCollecting botocore<1.15.0,>=1.14.9 (from boto3)\n",
      "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/64/4c/b0b0d3b6f84a05f9135051b56d3eb8708012a289c4b82ee21c8c766f47b5/botocore-1.14.9-py2.py3-none-any.whl (5.9MB)\n",
      "\u001b[K     |████████████████████████████████| 5.9MB 11.6MB/s eta 0:00:01\n",
      "\u001b[?25hRequirement already satisfied, skipping upgrade: jmespath<1.0.0,>=0.7.1 in ./.local/lib/python3.6/site-packages (from boto3) (0.9.4)\n",
      "Requirement already satisfied, skipping upgrade: s3transfer<0.4.0,>=0.3.0 in ./.local/lib/python3.6/site-packages (from boto3) (0.3.0)\n",
      "Requirement already satisfied, skipping upgrade: python-dateutil<3.0.0,>=2.1 in /usr/local/lib/python3.6/dist-packages (from botocore<1.15.0,>=1.14.9->boto3) (2.8.0)\n",
      "Requirement already satisfied, skipping upgrade: docutils<0.16,>=0.10 in ./.local/lib/python3.6/site-packages (from botocore<1.15.0,>=1.14.9->boto3) (0.15.2)\n",
      "Requirement already satisfied, skipping upgrade: urllib3<1.26,>=1.20 in /usr/local/lib/python3.6/dist-packages (from botocore<1.15.0,>=1.14.9->boto3) (1.24.3)\n",
      "Requirement already satisfied, skipping upgrade: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil<3.0.0,>=2.1->botocore<1.15.0,>=1.14.9->boto3) (1.11.0)\n",
      "Installing collected packages: botocore, boto3\n",
      "  Found existing installation: botocore 1.14.4\n",
      "    Uninstalling botocore-1.14.4:\n",
      "      Successfully uninstalled botocore-1.14.4\n",
      "  Found existing installation: boto3 1.11.4\n",
      "    Uninstalling boto3-1.11.4:\n",
      "      Successfully uninstalled boto3-1.11.4\n",
      "Successfully installed boto3-1.11.9 botocore-1.14.9\n",
      "\u001b[33mWARNING: You are using pip version 19.1.1, however version 20.0.2 is available.\n",
      "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "!pip install pandas --upgrade --user\n",
    "!pip install mlflow --upgrade --user\n",
    "!pip install joblib --upgrade --user\n",
    "!pip install numpy --upgrade --user \n",
    "!pip install scipy --upgrade --user \n",
    "!pip install scikit-learn --upgrade --user\n",
    "!pip install boto3 --upgrade --user"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import time\n",
    "import json\n",
    "import os\n",
    "from joblib import Parallel, delayed\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import scipy\n",
    "\n",
    "from sklearn.model_selection import train_test_split, KFold\n",
    "from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score\n",
    "from sklearn.exceptions import ConvergenceWarning\n",
    "\n",
    "import mlflow\n",
    "import mlflow.sklearn\n",
    "from  mlflow.tracking import MlflowClient\n",
    "\n",
    "from warnings import simplefilter\n",
    "simplefilter(action='ignore', category = FutureWarning)\n",
    "simplefilter(action='ignore', category = ConvergenceWarning)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Ensure Minio access\n",
    "os.environ['MLFLOW_S3_ENDPOINT_URL'] = 'http://minio-service.kubeflow.svc.cluster.local:9000'\n",
    "os.environ['AWS_ACCESS_KEY_ID'] = 'minio'\n",
    "os.environ['AWS_SECRET_ACCESS_KEY'] = 'minio123'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Data preparation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Collect the data \n",
    "df_nationalconsumption_electricity_daily = pd.read_csv(\"https://raw.githubusercontent.com/jeanmidevacc/mlflow-energyforecast/master/data/rtu_data.csv\")\n",
    "df_nationalconsumption_electricity_daily.set_index([\"day\"], inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Size of the training set :  1081\n",
      "Size of the testing set :  233\n"
     ]
    }
   ],
   "source": [
    "# Prepare the training set and the testing set\n",
    "df_trainvalidate_energyconsumption = df_nationalconsumption_electricity_daily[df_nationalconsumption_electricity_daily[\"datastatus\"] == \"Définitif\"]\n",
    "del df_trainvalidate_energyconsumption[\"datastatus\"]\n",
    "\n",
    "df_test_energyconsumption = df_nationalconsumption_electricity_daily[df_nationalconsumption_electricity_daily[\"datastatus\"] == \"Consolidé\"]\n",
    "del df_test_energyconsumption[\"datastatus\"]\n",
    "\n",
    "print(\"Size of the training set : \",len(df_trainvalidate_energyconsumption))\n",
    "print(\"Size of the testing set : \",len(df_test_energyconsumption))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Output to predict :  dailyconsumption\n",
      "Inputs for the prediction :  ['weekday', 'week', 'month', 'year', 'avg_min_temperature', 'avg_max_temperature', 'avg_mean_temperature', 'wavg_min_temperature', 'wavg_max_temperature', 'wavg_mean_temperature', 'is_holiday']\n"
     ]
    }
   ],
   "source": [
    "# Define the inputs and the output\n",
    "output = \"dailyconsumption\"\n",
    "allinputs = list(df_trainvalidate_energyconsumption.columns)\n",
    "allinputs.remove(output)\n",
    "\n",
    "print(\"Output to predict : \", output)\n",
    "print(\"Inputs for the prediction : \", allinputs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Build different set of featurws for the model\n",
    "possible_inputs = {\n",
    "    \"all\" : allinputs,\n",
    "    \"only_allday_inputs\" : [\"weekday\", \"month\", \"is_holiday\", \"week\"],\n",
    "    \"only_allweatheravg_inputs\" : [\"avg_min_temperature\", \"avg_max_temperature\", \"avg_mean_temperature\",\"wavg_min_temperature\", \"wavg_max_temperature\", \"wavg_mean_temperature\"],\n",
    "    \"only_meanweather_inputs_avg\" : [\"avg_mean_temperature\"],\n",
    "    \"only_meanweather_inputs_wavg\" : [\"wavg_mean_temperature\"],\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Prepare the output of the model\n",
    "array_output_train = np.array(df_trainvalidate_energyconsumption[output])\n",
    "array_output_test = np.array(df_test_energyconsumption[output])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# connect to remote server\n",
    "remote_server_uri = \"http://mlflowserver.kubeflow.svc.cluster.local:5000\"\n",
    "mlflow.set_tracking_uri(remote_server_uri)\n",
    "# Launch the experiment on mlflow\n",
    "experiment_name = \"electricityconsumption-forecast\"\n",
    "mlflow.set_experiment(experiment_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the evaluation function that will do the computation of the different metrics of accuracy (RMSE,MAE,R2)\n",
    "def evaluation_model(y_test, y_pred):\n",
    "\n",
    "    rmse = np.sqrt(mean_squared_error(y_test, y_pred))\n",
    "    mae = mean_absolute_error(y_test, y_pred)\n",
    "    r2 = r2_score(y_test, y_pred)\n",
    "\n",
    "    metrics = {\n",
    "        \"rmse\" : rmse,\n",
    "        \"r2\" : r2,\n",
    "        \"mae\" : mae,\n",
    "    }\n",
    "    \n",
    "    return metrics"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# KNN regressor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.neighbors import KNeighborsRegressor\n",
    "\n",
    "def train_knnmodel(parameters, inputs, tags, log = False):\n",
    "    with mlflow.start_run(nested = True):\n",
    "        \n",
    "        # Prepare the data\n",
    "        array_inputs_train = np.array(df_trainvalidate_energyconsumption[inputs])\n",
    "        array_inputs_test = np.array(df_test_energyconsumption[inputs])\n",
    "        \n",
    "        \n",
    "        # Build the model\n",
    "        tic = time.time()\n",
    "        model = KNeighborsRegressor(parameters[\"nbr_neighbors\"], weights = parameters[\"weight_method\"])\n",
    "        model.fit(array_inputs_train, array_output_train)\n",
    "        duration_training = time.time() - tic\n",
    "\n",
    "        # Make the prediction\n",
    "        tic1 = time.time()\n",
    "        prediction = model.predict(array_inputs_test)\n",
    "        duration_prediction = time.time() - tic1\n",
    "\n",
    "        # Evaluate the model prediction\n",
    "        metrics = evaluation_model(array_output_test, prediction)\n",
    "\n",
    "        # Log in the console\n",
    "        if log:\n",
    "            print(f\"KNN regressor:\")\n",
    "            print(parameters)\n",
    "            print(metrics)\n",
    "\n",
    "        # Log in mlflow (parameter)\n",
    "        mlflow.log_params(parameters)\n",
    "\n",
    "        # Log in mlflow (metrics)\n",
    "        metrics[\"duration_training\"] = duration_training\n",
    "        metrics[\"duration_prediction\"] = duration_prediction\n",
    "        mlflow.log_metrics(metrics)\n",
    "\n",
    "        # log in mlflow (model)\n",
    "        mlflow.sklearn.log_model(model, f\"model\")\n",
    "                \n",
    "        # Tag the model\n",
    "        mlflow.set_tags(tags)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Test the different combinations\n",
    "configurations = []\n",
    "for nbr_neighbors in [1,2,5,10]:\n",
    "    for weight_method in ['uniform','distance']:\n",
    "        for field in possible_inputs:\n",
    "            parameters = {\n",
    "                \"nbr_neighbors\" : nbr_neighbors,\n",
    "                \"weight_method\" : weight_method\n",
    "            }\n",
    "\n",
    "            tags = {\n",
    "                \"model\" : \"knn\",\n",
    "                \"inputs\" : field\n",
    "            }\n",
    "            \n",
    "            configurations.append([parameters, tags])\n",
    "\n",
    "            train_knnmodel(parameters, possible_inputs[field], tags)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# MLP regressor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.neural_network import MLPRegressor\n",
    "\n",
    "def train_mlpmodel(parameters, inputs, tags, log = False):\n",
    "    with mlflow.start_run(nested = True):\n",
    "        \n",
    "        # Prepare the data\n",
    "        array_inputs_train = np.array(df_trainvalidate_energyconsumption[inputs])\n",
    "        array_inputs_test = np.array(df_test_energyconsumption[inputs])\n",
    "        \n",
    "        # Build the model\n",
    "        tic = time.time()\n",
    "\n",
    "        model = MLPRegressor(\n",
    "            hidden_layer_sizes = parameters[\"hidden_layers\"],\n",
    "            activation = parameters[\"activation\"],\n",
    "            solver = parameters[\"solver\"],\n",
    "            max_iter = parameters[\"nbr_iteration\"],\n",
    "            random_state = 0)\n",
    "        \n",
    "        model.fit(array_inputs_train, array_output_train)\n",
    "        duration_training = time.time() - tic\n",
    "\n",
    "        # Make the prediction\n",
    "        tic1 = time.time()\n",
    "        prediction = model.predict(array_inputs_test)\n",
    "        duration_prediction = time.time() - tic1\n",
    "\n",
    "        # Evaluate the model prediction\n",
    "        metrics = evaluation_model(array_output_test, prediction)\n",
    "\n",
    "        # Log in the console\n",
    "        if log:\n",
    "            print(f\"Random forest regressor:\")\n",
    "            print(parameters)\n",
    "            print(metrics)\n",
    "    \n",
    "        # Log in mlflow (parameter)\n",
    "        mlflow.log_params(parameters)\n",
    "\n",
    "        # Log in mlflow (metrics)\n",
    "        metrics[\"duration_training\"] = duration_training\n",
    "        metrics[\"duration_prediction\"] = duration_prediction\n",
    "        mlflow.log_metrics(metrics)\n",
    "\n",
    "        # log in mlflow (model)\n",
    "        mlflow.sklearn.log_model(model, f\"model\")\n",
    "        \n",
    "        # Tag the model\n",
    "        mlflow.set_tags(tags)        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "for hiddenlayers in [4,8,16]:\n",
    "    for activation in [\"identity\",\"logistic\",]:\n",
    "        for solver in [\"lbfgs\"]:\n",
    "            for nbriteration in [10,100,1000]:\n",
    "                for field in possible_inputs:\n",
    "                    parameters = {\n",
    "                        \"hidden_layers\" : hiddenlayers,\n",
    "                        \"activation\" : activation,\n",
    "                        \"solver\" : solver,\n",
    "                        \"nbr_iteration\" : nbriteration\n",
    "                    }\n",
    "\n",
    "                    tags = {\n",
    "                        \"model\" : \"mlp\",\n",
    "                        \"inputs\" : field\n",
    "                    }\n",
    "\n",
    "                    train_mlpmodel(parameters, possible_inputs[field], tags)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Use a handmade model (scipy approach)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "class PTG:\n",
    "    def __init__(self, thresholds_x0, thresholds_a, thresholds_b):\n",
    "        self.thresholds_x0 = thresholds_x0\n",
    "        self.thresholds_a = thresholds_a\n",
    "        self.thresholds_b = thresholds_b\n",
    "        \n",
    "    def get_ptgmodel(self, x, a, b, x0):\n",
    "        return np.piecewise(x, [x < x0, x >= x0], [lambda x: a*x + b , lambda x : a*x0 + b])\n",
    "        \n",
    "    def fit(self, dfx, y):\n",
    "        x = np.array(dfx)\n",
    "        \n",
    "        # Define the bounds\n",
    "        bounds_min = [thresholds_a[0], thresholds_b[0], thresholds_x0[0]]\n",
    "        bounds_max = [thresholds_a[1], thresholds_b[1], thresholds_x0[1]]\n",
    "        bounds = (bounds_min, bounds_max)\n",
    "\n",
    "        # Fit a model\n",
    "        popt, pcov = scipy.optimize.curve_fit(self.get_ptgmodel, x, y, bounds = bounds)\n",
    "\n",
    "        # Get the parameter of the model\n",
    "        a = popt[0]\n",
    "        b = popt[1]\n",
    "        x0 = popt[2]\n",
    "        \n",
    "        self.coefficients = [a, b, x0]\n",
    "        \n",
    "    def predict(self,dfx):\n",
    "        x = np.array(dfx)\n",
    "        predictions = []\n",
    "        for elt in x:\n",
    "            forecast = self.get_ptgmodel(elt, self.coefficients[0], self.coefficients[1], self.coefficients[2])\n",
    "            predictions.append(forecast)\n",
    "        return np.array(predictions)\n",
    "        \n",
    "def train_ptgmodel(parameters, inputs, tags, log = False):\n",
    "    with mlflow.start_run(nested = True):\n",
    "        \n",
    "        # Prepare the data\n",
    "        df_inputs_train = df_trainvalidate_energyconsumption[inputs[0]]\n",
    "        df_inputs_test = df_test_energyconsumption[inputs[0]]\n",
    "        \n",
    "        \n",
    "        # Build the model\n",
    "        tic = time.time()\n",
    "        \n",
    "        model = PTG(parameters[\"thresholds_x0\"], parameters[\"thresholds_a\"], parameters[\"thresholds_b\"])\n",
    "        \n",
    "        model.fit(df_inputs_train, array_output_train)\n",
    "        duration_training = time.time() - tic\n",
    "\n",
    "        # Make the prediction\n",
    "        tic1 = time.time()\n",
    "        prediction = model.predict(df_inputs_test)\n",
    "        duration_prediction = time.time() - tic1\n",
    "\n",
    "        # Evaluate the model prediction\n",
    "        metrics = evaluation_model(array_output_test, prediction)\n",
    "\n",
    "        # Log in the console\n",
    "        if log:\n",
    "            print(f\"PTG:\")\n",
    "            print(parameters)\n",
    "            print(metrics)\n",
    "    \n",
    "        # Log in mlflow (parameter)\n",
    "        mlflow.log_params(parameters)  \n",
    "\n",
    "        # Log in mlflow (metrics)\n",
    "        metrics[\"duration_training\"] = duration_training\n",
    "        metrics[\"duration_prediction\"] = duration_prediction\n",
    "        mlflow.log_metrics(metrics)\n",
    "\n",
    "        # log in mlflow (model)\n",
    "        mlflow.sklearn.log_model(model, f\"model\")\n",
    "        \n",
    "        # Tag the model\n",
    "        mlflow.set_tags(tags)           "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the parameters of the model\n",
    "thresholds_x0 = [0, 20]\n",
    "thresholds_a = [-200000, -50000]\n",
    "thresholds_b = [1000000, 3000000]\n",
    "\n",
    "parameters = {\n",
    "    \"thresholds_x0\" : thresholds_x0,\n",
    "    \"thresholds_a\" : thresholds_a,\n",
    "    \"thresholds_b\" : thresholds_b\n",
    "}\n",
    "\n",
    "for field in [\"only_meanweather_inputs_avg\", \"only_meanweather_inputs_wavg\"]:\n",
    "    \n",
    "    tags = {\n",
    "        \"model\" : \"ptg\",\n",
    "        \"inputs\" : field\n",
    "    }\n",
    "    \n",
    "    train_ptgmodel(parameters, possible_inputs[field], tags, log = False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Evaluate mlflow results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of runs done :  272\n"
     ]
    }
   ],
   "source": [
    "# Select the run of the experiment\n",
    "df_runs = mlflow.search_runs(experiment_ids=\"0\")\n",
    "print(\"Number of runs done : \", len(df_runs))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>run_id</th>\n",
       "      <th>experiment_id</th>\n",
       "      <th>status</th>\n",
       "      <th>artifact_uri</th>\n",
       "      <th>start_time</th>\n",
       "      <th>end_time</th>\n",
       "      <th>metrics.r2</th>\n",
       "      <th>metrics.mae</th>\n",
       "      <th>metrics.duration_prediction</th>\n",
       "      <th>metrics.rmse</th>\n",
       "      <th>...</th>\n",
       "      <th>params.activation</th>\n",
       "      <th>params.nbr_iteration</th>\n",
       "      <th>params.hidden_layers</th>\n",
       "      <th>params.nbr_neighbors</th>\n",
       "      <th>params.weight_method</th>\n",
       "      <th>tags.model</th>\n",
       "      <th>tags.mlflow.source.type</th>\n",
       "      <th>tags.inputs</th>\n",
       "      <th>tags.mlflow.user</th>\n",
       "      <th>tags.mlflow.source.name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>238</th>\n",
       "      <td>50ee6409ad3a4778bb9d8cb59034df5d</td>\n",
       "      <td>0</td>\n",
       "      <td>FINISHED</td>\n",
       "      <td>s3://mlflow/mlflow/artifacts/0/50ee6409ad3a477...</td>\n",
       "      <td>2020-01-17 18:17:47.448000+00:00</td>\n",
       "      <td>2020-01-17 18:17:47.929000+00:00</td>\n",
       "      <td>0.935956</td>\n",
       "      <td>104040.339809</td>\n",
       "      <td>0.003205</td>\n",
       "      <td>134649.399348</td>\n",
       "      <td>...</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>5</td>\n",
       "      <td>distance</td>\n",
       "      <td>knn</td>\n",
       "      <td>LOCAL</td>\n",
       "      <td>all</td>\n",
       "      <td>jovyan</td>\n",
       "      <td>/usr/local/lib/python3.6/dist-packages/ipykern...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>106</th>\n",
       "      <td>614bcf7042ca465c8d86296f12ac9c09</td>\n",
       "      <td>0</td>\n",
       "      <td>FINISHED</td>\n",
       "      <td>s3://mlflow/mlflow/artifacts/0/614bcf7042ca465...</td>\n",
       "      <td>2020-01-31 15:21:29.978000+00:00</td>\n",
       "      <td>2020-01-31 15:21:30.503000+00:00</td>\n",
       "      <td>0.935956</td>\n",
       "      <td>104040.339809</td>\n",
       "      <td>0.003404</td>\n",
       "      <td>134649.399348</td>\n",
       "      <td>...</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>5</td>\n",
       "      <td>distance</td>\n",
       "      <td>knn</td>\n",
       "      <td>LOCAL</td>\n",
       "      <td>all</td>\n",
       "      <td>jovyan</td>\n",
       "      <td>/usr/local/lib/python3.6/dist-packages/ipykern...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>96</th>\n",
       "      <td>b05667486f7d45779d23519eb0dbe24f</td>\n",
       "      <td>0</td>\n",
       "      <td>FINISHED</td>\n",
       "      <td>s3://mlflow/mlflow/artifacts/0/b05667486f7d457...</td>\n",
       "      <td>2020-01-31 15:21:35.424000+00:00</td>\n",
       "      <td>2020-01-31 15:21:35.922000+00:00</td>\n",
       "      <td>0.935111</td>\n",
       "      <td>105833.358681</td>\n",
       "      <td>0.002732</td>\n",
       "      <td>135534.759873</td>\n",
       "      <td>...</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>10</td>\n",
       "      <td>distance</td>\n",
       "      <td>knn</td>\n",
       "      <td>LOCAL</td>\n",
       "      <td>all</td>\n",
       "      <td>jovyan</td>\n",
       "      <td>/usr/local/lib/python3.6/dist-packages/ipykern...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>228</th>\n",
       "      <td>d279d728946e4b74811203a842d79df3</td>\n",
       "      <td>0</td>\n",
       "      <td>FINISHED</td>\n",
       "      <td>s3://mlflow/mlflow/artifacts/0/d279d728946e4b7...</td>\n",
       "      <td>2020-01-17 18:17:52.555000+00:00</td>\n",
       "      <td>2020-01-17 18:17:53.029000+00:00</td>\n",
       "      <td>0.935111</td>\n",
       "      <td>105833.358681</td>\n",
       "      <td>0.002863</td>\n",
       "      <td>135534.759873</td>\n",
       "      <td>...</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>10</td>\n",
       "      <td>distance</td>\n",
       "      <td>knn</td>\n",
       "      <td>LOCAL</td>\n",
       "      <td>all</td>\n",
       "      <td>jovyan</td>\n",
       "      <td>/usr/local/lib/python3.6/dist-packages/ipykern...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>111</th>\n",
       "      <td>88af21719e0a408b91448f7ddd27e84c</td>\n",
       "      <td>0</td>\n",
       "      <td>FINISHED</td>\n",
       "      <td>s3://mlflow/mlflow/artifacts/0/88af21719e0a408...</td>\n",
       "      <td>2020-01-31 15:21:27.338000+00:00</td>\n",
       "      <td>2020-01-31 15:21:27.947000+00:00</td>\n",
       "      <td>0.934465</td>\n",
       "      <td>105793.727897</td>\n",
       "      <td>0.002668</td>\n",
       "      <td>136207.422483</td>\n",
       "      <td>...</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>5</td>\n",
       "      <td>uniform</td>\n",
       "      <td>knn</td>\n",
       "      <td>LOCAL</td>\n",
       "      <td>all</td>\n",
       "      <td>jovyan</td>\n",
       "      <td>/usr/local/lib/python3.6/dist-packages/ipykern...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 25 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                               run_id experiment_id    status  \\\n",
       "238  50ee6409ad3a4778bb9d8cb59034df5d             0  FINISHED   \n",
       "106  614bcf7042ca465c8d86296f12ac9c09             0  FINISHED   \n",
       "96   b05667486f7d45779d23519eb0dbe24f             0  FINISHED   \n",
       "228  d279d728946e4b74811203a842d79df3             0  FINISHED   \n",
       "111  88af21719e0a408b91448f7ddd27e84c             0  FINISHED   \n",
       "\n",
       "                                          artifact_uri  \\\n",
       "238  s3://mlflow/mlflow/artifacts/0/50ee6409ad3a477...   \n",
       "106  s3://mlflow/mlflow/artifacts/0/614bcf7042ca465...   \n",
       "96   s3://mlflow/mlflow/artifacts/0/b05667486f7d457...   \n",
       "228  s3://mlflow/mlflow/artifacts/0/d279d728946e4b7...   \n",
       "111  s3://mlflow/mlflow/artifacts/0/88af21719e0a408...   \n",
       "\n",
       "                          start_time                         end_time  \\\n",
       "238 2020-01-17 18:17:47.448000+00:00 2020-01-17 18:17:47.929000+00:00   \n",
       "106 2020-01-31 15:21:29.978000+00:00 2020-01-31 15:21:30.503000+00:00   \n",
       "96  2020-01-31 15:21:35.424000+00:00 2020-01-31 15:21:35.922000+00:00   \n",
       "228 2020-01-17 18:17:52.555000+00:00 2020-01-17 18:17:53.029000+00:00   \n",
       "111 2020-01-31 15:21:27.338000+00:00 2020-01-31 15:21:27.947000+00:00   \n",
       "\n",
       "     metrics.r2    metrics.mae  metrics.duration_prediction   metrics.rmse  \\\n",
       "238    0.935956  104040.339809                     0.003205  134649.399348   \n",
       "106    0.935956  104040.339809                     0.003404  134649.399348   \n",
       "96     0.935111  105833.358681                     0.002732  135534.759873   \n",
       "228    0.935111  105833.358681                     0.002863  135534.759873   \n",
       "111    0.934465  105793.727897                     0.002668  136207.422483   \n",
       "\n",
       "     ...  params.activation params.nbr_iteration params.hidden_layers  \\\n",
       "238  ...               None                 None                 None   \n",
       "106  ...               None                 None                 None   \n",
       "96   ...               None                 None                 None   \n",
       "228  ...               None                 None                 None   \n",
       "111  ...               None                 None                 None   \n",
       "\n",
       "    params.nbr_neighbors params.weight_method tags.model  \\\n",
       "238                    5             distance        knn   \n",
       "106                    5             distance        knn   \n",
       "96                    10             distance        knn   \n",
       "228                   10             distance        knn   \n",
       "111                    5              uniform        knn   \n",
       "\n",
       "    tags.mlflow.source.type tags.inputs tags.mlflow.user  \\\n",
       "238                   LOCAL         all           jovyan   \n",
       "106                   LOCAL         all           jovyan   \n",
       "96                    LOCAL         all           jovyan   \n",
       "228                   LOCAL         all           jovyan   \n",
       "111                   LOCAL         all           jovyan   \n",
       "\n",
       "                               tags.mlflow.source.name  \n",
       "238  /usr/local/lib/python3.6/dist-packages/ipykern...  \n",
       "106  /usr/local/lib/python3.6/dist-packages/ipykern...  \n",
       "96   /usr/local/lib/python3.6/dist-packages/ipykern...  \n",
       "228  /usr/local/lib/python3.6/dist-packages/ipykern...  \n",
       "111  /usr/local/lib/python3.6/dist-packages/ipykern...  \n",
       "\n",
       "[5 rows x 25 columns]"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Quick sorting to get the best models based on the RMSE metric\n",
    "df_runs.sort_values([\"metrics.rmse\"], ascending = True, inplace = True)\n",
    "df_runs.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'50ee6409ad3a4778bb9d8cb59034df5d'"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Get the best one\n",
    "runid_selected = df_runs.head(1)[\"run_id\"].values[0]\n",
    "runid_selected"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "celltoolbar": "Raw Cell Format",
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: ch06/MLflow.py
================================================
#!/usr/bin/env python
# coding: utf-8

# # mlflow-energyforecast
#
# This is a showcase for ML Flow capabilities, based on the article
# http://the-odd-dataguy.com/be-more-efficient-to-produce-ml-models-with-mlflow
# and a github https://github.com/jeanmidevacc/mlflow-energyforecast
#

# In[2]:

get_ipython().system('pip install pandas --upgrade --user')
get_ipython().system('pip install mlflow --upgrade --user')
get_ipython().system('pip install joblib --upgrade --user')
get_ipython().system('pip install numpy --upgrade --user ')
get_ipython().system('pip install scipy --upgrade --user ')
get_ipython().system('pip install scikit-learn --upgrade --user')
get_ipython().system('pip install boto3 --upgrade --user')

# In[3]:

import time
import json
import os
from joblib import Parallel, delayed

import pandas as pd
import numpy as np
import scipy

from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score
from sklearn.exceptions import ConvergenceWarning

import mlflow
import mlflow.sklearn
from mlflow.tracking import MlflowClient

from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)
simplefilter(action='ignore', category=ConvergenceWarning)

# In[4]:

# Ensure Minio access
os.environ[
    'MLFLOW_S3_ENDPOINT_URL'] = 'http://minio-service.kubeflow.svc.cluster.local:9000'
os.environ['AWS_ACCESS_KEY_ID'] = 'minio'
os.environ['AWS_SECRET_ACCESS_KEY'] = 'minio123'

# # Data preparation

# In[5]:

# Collect the data
df_nationalconsumption_electricity_daily = pd.read_csv(
    "https://raw.githubusercontent.com/jeanmidevacc/mlflow-energyforecast/master/data/rtu_data.csv"
)
df_nationalconsumption_electricity_daily.set_index(["day"], inplace=True)

# In[6]:

# Prepare the training set and the testing set
df_trainvalidate_energyconsumption = df_nationalconsumption_electricity_daily[
    df_nationalconsumption_electricity_daily["datastatus"] == "Définitif"]
del df_trainvalidate_energyconsumption["datastatus"]

df_test_energyconsumption = df_nationalconsumption_electricity_daily[
    df_nationalconsumption_electricity_daily["datastatus"] == "Consolidé"]
del df_test_energyconsumption["datastatus"]

print("Size of the training set : ", len(df_trainvalidate_energyconsumption))
print("Size of the testing set : ", len(df_test_energyconsumption))

# In[7]:

# Define the inputs and the output
output = "dailyconsumption"
allinputs = list(df_trainvalidate_energyconsumption.columns)
allinputs.remove(output)

print("Output to predict : ", output)
print("Inputs for the prediction : ", allinputs)

# In[8]:

# Build different set of featurws for the model
possible_inputs = {
    "all":
    allinputs,
    "only_allday_inputs": ["weekday", "month", "is_holiday", "week"],
    "only_allweatheravg_inputs": [
        "avg_min_temperature", "avg_max_temperature", "avg_mean_temperature",
        "wavg_min_temperature", "wavg_max_temperature", "wavg_mean_temperature"
    ],
    "only_meanweather_inputs_avg": ["avg_mean_temperature"],
    "only_meanweather_inputs_wavg": ["wavg_mean_temperature"],
}

# In[9]:

# Prepare the output of the model
array_output_train = np.array(df_trainvalidate_energyconsumption[output])
array_output_test = np.array(df_test_energyconsumption[output])

# In[10]:

# connect to remote server
remote_server_uri = "http://mlflowserver.kubeflow.svc.cluster.local:5000"
mlflow.set_tracking_uri(remote_server_uri)
# Launch the experiment on mlflow
experiment_name = "electricityconsumption-forecast"
mlflow.set_experiment(experiment_name)

# In[11]:


# Define the evaluation function that will do the computation of the different metrics of accuracy (RMSE,MAE,R2)
def evaluation_model(y_test, y_pred):

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    metrics = {
        "rmse": rmse,
        "r2": r2,
        "mae": mae,
    }

    return metrics


# # KNN regressor

# In[12]:

from sklearn.neighbors import KNeighborsRegressor


def train_knnmodel(parameters, inputs, tags, log=False):
    with mlflow.start_run(nested=True):

        # Prepare the data
        array_inputs_train = np.array(
            df_trainvalidate_energyconsumption[inputs])
        array_inputs_test = np.array(df_test_energyconsumption[inputs])

        # Build the model
        tic = time.time()
        model = KNeighborsRegressor(parameters["nbr_neighbors"],
                                    weights=parameters["weight_method"])
        model.fit(array_inputs_train, array_output_train)
        duration_training = time.time() - tic

        # Make the prediction
        tic1 = time.time()
        prediction = model.predict(array_inputs_test)
        duration_prediction = time.time() - tic1

        # Evaluate the model prediction
        metrics = evaluation_model(array_output_test, prediction)

        # Log in the console
        if log:
            print(f"KNN regressor:")
            print(parameters)
            print(metrics)

        # Log in mlflow (parameter)
        mlflow.log_params(parameters)

        # Log in mlflow (metrics)
        metrics["duration_training"] = duration_training
        metrics["duration_prediction"] = duration_prediction
        mlflow.log_metrics(metrics)

        # log in mlflow (model)
        mlflow.sklearn.log_model(model, f"model")

        # Tag the model
        mlflow.set_tags(tags)


# In[13]:

# Test the different combinations
configurations = []
for nbr_neighbors in [1, 2, 5, 10]:
    for weight_method in ['uniform', 'distance']:
        for field in possible_inputs:
            parameters = {
                "nbr_neighbors": nbr_neighbors,
                "weight_method": weight_method
            }

            tags = {"model": "knn", "inputs": field}

            configurations.append([parameters, tags])

            train_knnmodel(parameters, possible_inputs[field], tags)

# # MLP regressor

# In[14]:

from sklearn.neural_network import MLPRegressor


def train_mlpmodel(parameters, inputs, tags, log=False):
    with mlflow.start_run(nested=True):

        # Prepare the data
        array_inputs_train = np.array(
            df_trainvalidate_energyconsumption[inputs])
        array_inputs_test = np.array(df_test_energyconsumption[inputs])

        # Build the model
        tic = time.time()

        model = MLPRegressor(hidden_layer_sizes=parameters["hidden_layers"],
                             activation=parameters["activation"],
                             solver=parameters["solver"],
                             max_iter=parameters["nbr_iteration"],
                             random_state=0)

        model.fit(array_inputs_train, array_output_train)
        duration_training = time.time() - tic

        # Make the prediction
        tic1 = time.time()
        prediction = model.predict(array_inputs_test)
        duration_prediction = time.time() - tic1

        # Evaluate the model prediction
        metrics = evaluation_model(array_output_test, prediction)

        # Log in the console
        if log:
            print(f"Random forest regressor:")
            print(parameters)
            print(metrics)

        # Log in mlflow (parameter)
        mlflow.log_params(parameters)

        # Log in mlflow (metrics)
        metrics["duration_training"] = duration_training
        metrics["duration_prediction"] = duration_prediction
        mlflow.log_metrics(metrics)

        # log in mlflow (model)
        mlflow.sklearn.log_model(model, f"model")

        # Tag the model
        mlflow.set_tags(tags)


# In[15]:

for hiddenlayers in [4, 8, 16]:
    for activation in [
            "identity",
            "logistic",
    ]:
        for solver in ["lbfgs"]:
            for nbriteration in [10, 100, 1000]:
                for field in possible_inputs:
                    parameters = {
                        "hidden_layers": hiddenlayers,
                        "activation": activation,
                        "solver": solver,
                        "nbr_iteration": nbriteration
                    }

                    tags = {"model": "mlp", "inputs": field}

                    train_mlpmodel(parameters, possible_inputs[field], tags)

# # Use a handmade model (scipy approach)

# In[16]:


class PTG:
    def __init__(self, thresholds_x0, thresholds_a, thresholds_b):
        self.thresholds_x0 = thresholds_x0
        self.thresholds_a = thresholds_a
        self.thresholds_b = thresholds_b

    def get_ptgmodel(self, x, a, b, x0):
        return np.piecewise(x, [x < x0, x >= x0],
                            [lambda x: a * x + b, lambda x: a * x0 + b])

    def fit(self, dfx, y):
        x = np.array(dfx)

        # Define the bounds
        bounds_min = [thresholds_a[0], thresholds_b[0], thresholds_x0[0]]
        bounds_max = [thresholds_a[1], thresholds_b[1], thresholds_x0[1]]
        bounds = (bounds_min, bounds_max)

        # Fit a model
        popt, pcov = scipy.optimize.curve_fit(self.get_ptgmodel,
                                              x,
                                              y,
                                              bounds=bounds)

        # Get the parameter of the model
        a = popt[0]
        b = popt[1]
        x0 = popt[2]

        self.coefficients = [a, b, x0]

    def predict(self, dfx):
        x = np.array(dfx)
        predictions = []
        for elt in x:
            forecast = self.get_ptgmodel(elt, self.coefficients[0],
                                         self.coefficients[1],
                                         self.coefficients[2])
            predictions.append(forecast)
        return np.array(predictions)


def train_ptgmodel(parameters, inputs, tags, log=False):
    with mlflow.start_run(nested=True):

        # Prepare the data
        df_inputs_train = df_trainvalidate_energyconsumption[inputs[0]]
        df_inputs_test = df_test_energyconsumption[inputs[0]]

        # Build the model
        tic = time.time()

        model = PTG(parameters["thresholds_x0"], parameters["thresholds_a"],
                    parameters["thresholds_b"])

        model.fit(df_inputs_train, array_output_train)
        duration_training = time.time() - tic

        # Make the prediction
        tic1 = time.time()
        prediction = model.predict(df_inputs_test)
        duration_prediction = time.time() - tic1

        # Evaluate the model prediction
        metrics = evaluation_model(array_output_test, prediction)

        # Log in the console
        if log:
            print(f"PTG:")
            print(parameters)
            print(metrics)

        # Log in mlflow (parameter)
        mlflow.log_params(parameters)

        # Log in mlflow (metrics)
        metrics["duration_training"] = duration_training
        metrics["duration_prediction"] = duration_prediction
        mlflow.log_metrics(metrics)

        # log in mlflow (model)
        mlflow.sklearn.log_model(model, f"model")

        # Tag the model
        mlflow.set_tags(tags)


# In[17]:

# Define the parameters of the model
thresholds_x0 = [0, 20]
thresholds_a = [-200000, -50000]
thresholds_b = [1000000, 3000000]

parameters = {
    "thresholds_x0": thresholds_x0,
    "thresholds_a": thresholds_a,
    "thresholds_b": thresholds_b
}

for field in ["only_meanweather_inputs_avg", "only_meanweather_inputs_wavg"]:

    tags = {"model": "ptg", "inputs": field}

    train_ptgmodel(parameters, possible_inputs[field], tags, log=False)

# # Evaluate mlflow results

# In[18]:

# Select the run of the experiment
df_runs = mlflow.search_runs(experiment_ids="0")
print("Number of runs done : ", len(df_runs))

# In[19]:

# Quick sorting to get the best models based on the RMSE metric
df_runs.sort_values(["metrics.rmse"], ascending=True, inplace=True)
df_runs.head()

# In[20]:

# Get the best one
runid_selected = df_runs.head(1)["run_id"].values[0]
runid_selected

# In[ ]:


================================================
FILE: ch06/Metadata.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Installation and imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already up-to-date: kfmd in ./.local/lib/python3.6/site-packages (0.1.8)\n",
      "Requirement already up-to-date: pandas in ./.local/lib/python3.6/site-packages (1.0.1)\n",
      "Requirement already satisfied, skipping upgrade: python-dateutil>=2.6.1 in /usr/local/lib/python3.6/dist-packages (from pandas) (2.8.1)\n",
      "Requirement already satisfied, skipping upgrade: numpy>=1.13.3 in /usr/local/lib/python3.6/dist-packages (from pandas) (1.18.1)\n",
      "Requirement already satisfied, skipping upgrade: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas) (2019.3)\n",
      "Requirement already satisfied, skipping upgrade: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.6.1->pandas) (1.11.0)\n"
     ]
    }
   ],
   "source": [
    "!pip install kfmd --upgrade --user\n",
    "!pip install pandas --upgrade --user\n",
    "\n",
    "from kfmd import metadata\n",
    "import pandas\n",
    "from datetime import datetime\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create a workspace, run and execution"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "ws1 = metadata.Workspace(\n",
    "    # Connect to metadata-service in namesapce kubeflow in k8s cluster.\n",
    "    backend_url_prefix=\"metadata-service.kubeflow.svc.cluster.local:8080\",\n",
    "    name=\"ws1\",\n",
    "    description=\"a workspace for testing\",\n",
    "    labels={\"n1\": \"v1\"})\n",
    "r = metadata.Run(\n",
    "    workspace=ws1,\n",
    "    name=\"run-\" + datetime.utcnow().isoformat(\"T\") ,\n",
    "    description=\"a run in ws_1\",\n",
    ")\n",
    "exec = metadata.Execution(\n",
    "    name = \"execution\" + datetime.utcnow().isoformat(\"T\") ,\n",
    "    workspace=ws1,\n",
    "    run=r,\n",
    "    description=\"execution example\",\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Log data set, model and its evaluation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_set = exec.log_input(\n",
    "        metadata.DataSet(\n",
    "            description=\"an example data\",\n",
    "            name=\"mytable-dump\",\n",
    "            owner=\"owner@my-company.org\",\n",
    "            uri=\"file://path/to/dataset\",\n",
    "            version=\"v1.0.0\",\n",
    "            query=\"SELECT * FROM mytable\"))\n",
    "model = exec.log_output(\n",
    "    metadata.Model(\n",
    "            name=\"MNIST\",\n",
    "            description=\"model to recognize handwritten digits\",\n",
    "            owner=\"someone@kubeflow.org\",\n",
    "            uri=\"gcs://my-bucket/mnist\",\n",
    "            model_type=\"neural network\",\n",
    "            training_framework={\n",
    "                \"name\": \"tensorflow\",\n",
    "                \"version\": \"v1.0\"\n",
    "            },\n",
    "            hyperparameters={\n",
    "                \"learning_rate\": 0.5,\n",
    "                \"layers\": [10, 3, 1],\n",
    "                \"early_stop\": True\n",
    "            },\n",
    "            version=\"v0.0.1\",\n",
    "            labels={\"mylabel\": \"l1\"}))\n",
    "metrics = exec.log_output(\n",
    "    metadata.Metrics(\n",
    "            name=\"MNIST-evaluation\",\n",
    "            description=\"validating the MNIST model to recognize handwritten digits\",\n",
    "            owner=\"someone@kubeflow.org\",\n",
    "            uri=\"gcs://my-bucket/mnist-eval.csv\",\n",
    "            data_set_id=data_set.id,\n",
    "            model_id=model.id,\n",
    "            metrics_type=metadata.Metrics.VALIDATION,\n",
    "            values={\"accuracy\": 0.95},\n",
    "            labels={\"mylabel\": \"l1\"}))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "List all the models in the workspace"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>workspace</th>\n",
       "      <th>run</th>\n",
       "      <th>create_time</th>\n",
       "      <th>description</th>\n",
       "      <th>model_type</th>\n",
       "      <th>name</th>\n",
       "      <th>owner</th>\n",
       "      <th>version</th>\n",
       "      <th>uri</th>\n",
       "      <th>training_framework</th>\n",
       "      <th>hyperparameters</th>\n",
       "      <th>labels</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>8</td>\n",
       "      <td>ws1</td>\n",
       "      <td>run-2020-02-18T00:48:10.734939</td>\n",
       "      <td>2020-02-18T00:48:13.273533Z</td>\n",
       "      <td>model to recognize handwritten digits</td>\n",
       "      <td>neural network</td>\n",
       "      <td>MNIST</td>\n",
       "      <td>someone@kubeflow.org</td>\n",
       "      <td>v0.0.1</td>\n",
       "      <td>gcs://my-bucket/mnist</td>\n",
       "      <td>{'name': 'tensorflow', 'version': 'v1.0'}</td>\n",
       "      <td>{'learning_rate': 0.5, 'layers': [10, 3, 1], '...</td>\n",
       "      <td>{'mylabel': 'l1'}</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  id workspace                             run                  create_time  \\\n",
       "0  8       ws1  run-2020-02-18T00:48:10.734939  2020-02-18T00:48:13.273533Z   \n",
       "\n",
       "                             description      model_type   name  \\\n",
       "0  model to recognize handwritten digits  neural network  MNIST   \n",
       "\n",
       "                  owner version                    uri  \\\n",
       "0  someone@kubeflow.org  v0.0.1  gcs://my-bucket/mnist   \n",
       "\n",
       "                          training_framework  \\\n",
       "0  {'name': 'tensorflow', 'version': 'v1.0'}   \n",
       "\n",
       "                                     hyperparameters             labels  \n",
       "0  {'learning_rate': 0.5, 'layers': [10, 3, 1], '...  {'mylabel': 'l1'}  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pandas.DataFrame.from_dict(ws1.list(metadata.Model.ARTIFACT_TYPE_NAME))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Get basic lineage"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "model id is 8\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(\"model id is %s\\n\" % model.id)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Find the execution that produces this model."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3\n"
     ]
    }
   ],
   "source": [
    "output_events = ws1.client.list_events2(model.id).events\n",
    "assert len(output_events) == 1\n",
    "execution_id = output_events[0].execution_id\n",
    "print(execution_id)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Find all events related to that execution."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "All events related to this model:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>artifact_id</th>\n",
       "      <th>execution_id</th>\n",
       "      <th>path</th>\n",
       "      <th>type</th>\n",
       "      <th>milliseconds_since_epoch</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>None</td>\n",
       "      <td>INPUT</td>\n",
       "      <td>1581986893248</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>None</td>\n",
       "      <td>OUTPUT</td>\n",
       "      <td>1581986893273</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9</td>\n",
       "      <td>3</td>\n",
       "      <td>None</td>\n",
       "      <td>OUTPUT</td>\n",
       "      <td>1581986893298</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  artifact_id execution_id  path    type milliseconds_since_epoch\n",
       "0           7            3  None   INPUT            1581986893248\n",
       "1           8            3  None  OUTPUT            1581986893273\n",
       "2           9            3  None  OUTPUT            1581986893298"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_events = ws1.client.list_events(execution_id).events\n",
    "assert len(all_events) == 3\n",
    "print(\"\\nAll events related to this model:\")\n",
    "pandas.DataFrame.from_dict([e.to_dict() for e in all_events])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: ch06/Metadata.py
================================================
#!/usr/bin/env python
# coding: utf-8

# # Installation and imports

# In[1]:

get_ipython().system('pip install kfmd --upgrade --user')
get_ipython().system('pip install pandas --upgrade --user')

from kfmd import metadata
import pandas
from datetime import datetime

# Create a workspace, run and execution

# In[2]:

ws1 = metadata.Workspace(
    # Connect to metadata-service in namesapce kubeflow in k8s cluster.
    backend_url_prefix="metadata-service.kubeflow.svc.cluster.local:8080",
    name="ws1",
    description="a workspace for testing",
    labels={"n1": "v1"})
r = metadata.Run(
    workspace=ws1,
    name="run-" + datetime.utcnow().isoformat("T"),
    description="a run in ws_1",
)
exec = metadata.Execution(
    name="execution" + datetime.utcnow().isoformat("T"),
    workspace=ws1,
    run=r,
    description="execution example",
)

# Log data set, model and its evaluation

# In[3]:

data_set = exec.log_input(
    metadata.DataSet(description="an example data",
                     name="mytable-dump",
                     owner="owner@my-company.org",
                     uri="file://path/to/dataset",
                     version="v1.0.0",
                     query="SELECT * FROM mytable"))
model = exec.log_output(
    metadata.Model(name="MNIST",
                   description="model to recognize handwritten digits",
                   owner="someone@kubeflow.org",
                   uri="gcs://my-bucket/mnist",
                   model_type="neural network",
                   training_framework={
                       "name": "tensorflow",
                       "version": "v1.0"
                   },
                   hyperparameters={
                       "learning_rate": 0.5,
                       "layers": [10, 3, 1],
                       "early_stop": True
                   },
                   version="v0.0.1",
                   labels={"mylabel": "l1"}))
metrics = exec.log_output(
    metadata.Metrics(
        name="MNIST-evaluation",
        description=
        "validating the MNIST model to recognize handwritten digits",
        owner="someone@kubeflow.org",
        uri="gcs://my-bucket/mnist-eval.csv",
        data_set_id=data_set.id,
        model_id=model.id,
        metrics_type=metadata.Metrics.VALIDATION,
        values={"accuracy": 0.95},
        labels={"mylabel": "l1"}))

# List all the models in the workspace

# In[4]:

pandas.DataFrame.from_dict(ws1.list(metadata.Model.ARTIFACT_TYPE_NAME))

# Get basic lineage

# In[5]:

print("model id is %s\n" % model.id)

# Find the execution that produces this model.

# In[6]:

output_events = ws1.client.list_events2(model.id).events
assert len(output_events) == 1
execution_id = output_events[0].execution_id
print(execution_id)

# Find all events related to that execution.

# In[7]:

all_events = ws1.client.list_events(execution_id).events
assert len(all_events) == 3
print("\nAll events related to this model:")
pandas.DataFrame.from_dict([e.to_dict() for e in all_events])

# In[ ]:


================================================
FILE: ch06/docker/Dockerfile
================================================
# from https://github.com/flmu/mlflow-tracking-server

FROM python:3.7

RUN pip3 install --upgrade pip && \
    pip3 install mlflow --upgrade && \
    pip3 install awscli --upgrade  && \
    pip3 install boto3 --upgrade

ENV PORT 5000
ENV AWS_BUCKET bucket
ENV AWS_ACCESS_KEY_ID aws_id
ENV AWS_SECRET_ACCESS_KEY aws_key

ENV FILE_DIR /tmp/mlflow

RUN mkdir -p /opt/mlflow

COPY run.sh /opt/mlflow
RUN chmod -R 777 /opt/mlflow/

ENTRYPOINT ["/opt/mlflow/run.sh"]

================================================
FILE: ch06/docker/build.sh
================================================
#!/bin/bash

img='lightbend/mlflow'
tag='0.1'
docker build -t $img:$tag .



================================================
FILE: ch06/docker/run.sh
================================================
#!/bin/sh

set -e

if [ -z "${AWS_BUCKET}" ]; then
  echo >&2 "AWS_BUCKET must be set"
  exit 1
fi

if [ -z "${AWS_ACCESS_KEY_ID}" ]; then
  echo >&2 "AWS_ACCESS_KEY_ID must be set"
  exit 1
fi

if [ -z "${AWS_SECRET_ACCESS_KEY}" ]; then
  echo >&2 "AWS_SECRET_ACCESS_KEY must be set"
  exit 1
fi

mkdir -p "${FILE_DIR}"

mlflow server \
    --backend-store-uri "file://$FILE_DIR" \
    --default-artifact-root "s3://$AWS_BUCKET/mlflow/artifacts" \
    --host 0.0.0.0 \
    --port "$PORT"


================================================
FILE: ch06/install/mlflowchart/.helmignore
================================================
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*~
# Various IDEs
.project
.idea/
*.tmproj


================================================
FILE: ch06/install/mlflowchart/Chart.yaml
================================================
apiVersion: v1
appVersion: 0.1
description: MLFlow
maintainers:
- name: Boris Lublinsky
name: MLFLOW tracking server
version: 0.1

================================================
FILE: ch06/install/mlflowchart/templates/NOTES.txt
================================================
ML Flow tracking server is installed


================================================
FILE: ch06/install/mlflowchart/templates/_helpers.tpl
================================================
{{/* vim: set filetype=mustache: */}}
{{/*
Expand the name of the chart.
*/}}
{{- define "modelserverchart.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
{{- end -}}

{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
*/}}
{{- define "modelserverchart.fullname" -}}
{{- $name := default .Chart.Name .Values.nameOverride -}}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
{{- end -}}


================================================
FILE: ch06/install/mlflowchart/templates/mlflow.yaml
================================================
apiVersion: apps/v1
kind: Deployment
metadata:
  namespace: kubeflow
  name: mlflowserver
  labels:
    app: mlflowserver
spec:
  replicas: 1
  selector:
    matchLabels:
      app: mlflowserver
  strategy:
    type: RollingUpdate
  template:
    metadata:
      labels:
        app: mlflowserver
    spec:
      containers:
        - name: server
          image: "{{ .Values.image.server }}:{{ .Values.image.version }}"
          imagePullPolicy: "{{ .Values.image.pullPolicy }}"
          ports:
            - containerPort: 5000
              name: serving
              protocol: TCP
          env:
            - name: "MLFLOW_S3_ENDPOINT_URL"
              value: "http://minio-service.kubeflow.svc.cluster.local:9000"
            - name: "AWS_ACCESS_KEY_ID"
              valueFrom: { secretKeyRef: { name: "minioaccess", key: "AWS_ACCESS_KEY_ID" } }
            - name: "AWS_SECRET_ACCESS_KEY"
              valueFrom: { secretKeyRef: { name: "minioaccess", key: "AWS_SECRET_ACCESS_KEY" } }
            - name: "AWS_BUCKET"
              value: "mlflow"
          volumes:
            - name: secret-volume
              secret:
                secretName: minioaccess
---
apiVersion: v1
kind: Service
metadata:
  namespace: kubeflow
  name: mlflowserver
spec:
  selector:
    app: mlflowserver
  ports:
  - protocol: TCP
    port: 5000
    targetPort: 5000
---
apiVersion: networking.istio.io/v1alpha3
kind: VirtualService
metadata:
  name: mlflow-server
  namespace: kubeflow
spec:
  gateways:
    - kubeflow-gateway
  hosts:
    - '*'
  http:
    - match:
        - uri:
            prefix: /mlflow/
      rewrite:
        uri: /
      route:
        - destination:
            host: mlflowserver.kubeflow.svc.cluster.local
            port:
              number: 5000

================================================
FILE: ch06/install/mlflowchart/values.yaml
================================================
# application name is a namespace
# docker images
image:
  server: lightbend/mlflow
  pullPolicy: Always
  version: 0.1


================================================
FILE: ch10/experiment.yaml
================================================
Name:         random-example
Namespace:    kubeflow
Labels:       controller-tools.k8s.io=1.0
Annotations:  <none>
API Version:  kubeflow.org/v1alpha3
Kind:         Experiment
Metadata:
  Creation Timestamp:  2019-12-22T22:53:25Z
  Finalizers:
    update-prometheus-metrics
  Generation:        2
  Resource Version:  720692
  Self Link:         /apis/kubeflow.org/v1alpha3/namespaces/kubeflow/experiments/random-example
  UID:               dc6bc15a-250d-11ea-8cae-42010a80010f
Spec:
  Algorithm:
    Algorithm Name:        random
    Algorithm Settings:    <nil>
  Max Failed Trial Count:  3
  Max Trial Count:         12
  Metrics Collector Spec:
    Collector:
      Kind:  StdOut
  Objective:
    Additional Metric Names:
      accuracy
    Goal:                   0.99
    Objective Metric Name:  Validation-accuracy
    Type:                   maximize
  Parallel Trial Count:     3
  Parameters:
    Feasible Space:
      Max:           0.03
      Min:           0.01
    Name:            --lr
    Parameter Type:  double
    Feasible Space:
      Max:           5
      Min:           2
    Name:            --num-layers
    Parameter Type:  int
    Feasible Space:
      List:
        sgd
        adam
        ftrl
    Name:            --optimizer
    Parameter Type:  categorical
  Trial Template:
    Go Template:
      Raw Template:  apiVersion: batch/v1
kind: Job
metadata:
  name: {{.Trial}}
  namespace: {{.NameSpace}}
spec:
  template:
    spec:
      containers:
      - name: {{.Trial}}
        image: docker.io/kubeflowkatib/mxnet-mnist-example
        command:
        - "python"
        - "/mxnet/example/image-classification/train_mnist.py"
        - "--batch-size=64"
        {{- with .HyperParameters}}
        {{- range .}}
        - "{{.Name}}={{.Value}}"
        {{- end}}
        {{- end}}
      restartPolicy: Never
Status:
  Conditions:
    Last Transition Time:  2019-12-22T22:53:25Z
    Last Update Time:      2019-12-22T22:53:25Z
    Message:               Experiment is created
    Reason:                ExperimentCreated
    Status:                True
    Type:                  Created
    Last Transition Time:  2019-12-22T22:55:10Z
    Last Update Time:      2019-12-22T22:55:10Z
    Message:               Experiment is running
    Reason:                ExperimentRunning
    Status:                True
    Type:                  Running
  Current Optimal Trial:
    Observation:
      Metrics:
        Name:   Validation-accuracy
        Value:  0.981091
    Parameter Assignments:
      Name:          --lr
      Value:         0.025139701133432946
      Name:          --num-layers
      Value:         4
      Name:          --optimizer
      Value:         sgd
  Start Time:        2019-12-22T22:53:25Z
  Trials:            12
  Trials Running:    2
  Trials Succeeded:  10
Events:              <none>Type something here!


================================================
FILE: ch10/hptuning.py
================================================
# Initialize search space
# Initialize model
while not objective_reached and not bugdget_exhausted:
    # Obtain new hyperparameters
    suggestion = GetSuggestions()

    # Run trial with new hyperparameters; collect metrics
    metrics = RunTrial(suggestion)

    # Report metrics
    Report(metrics)


================================================
FILE: ch10/random.yaml
================================================
apiVersion: "kubeflow.org/v1alpha3"
kind: Experiment
metadata:
  namespace: kubeflow
  labels:
    controller-tools.k8s.io: "1.0"
  name: random-example
spec:
  objective:
    type: maximize
    goal: 0.99
    objectiveMetricName: Validation-accuracy
    additionalMetricNames:
      - Train-accuracy
  algorithm:
    algorithmName: random
  parallelTrialCount: 3
  maxTrialCount: 12
  maxFailedTrialCount: 3
  parameters:
    - name: --lr
      parameterType: double
      feasibleSpace:
        min: "0.01"
        max: "0.03"
    - name: --num-layers
      parameterType: int
      feasibleSpace:
        min: "2"
        max: "5"
    - name: --optimizer
      parameterType: categorical
      feasibleSpace:
        list:
        - sgd
        - adam
        - ftrl
  trialTemplate:
    goTemplate:
        rawTemplate: |-
          apiVersion: batch/v1
          kind: Job
          metadata:
            name: {{.Trial}}
            namespace: {{.NameSpace}}
          spec:
            template:
              spec:
                containers:
                - name: {{.Trial}}
                  image: docker.io/kubeflowkatib/mxnet-mnist
                  command:
                  - "python3"
                  - "/opt/mxnet-mnist/mnist.py"
                  - "--batch-size=64"
                  {{- with .HyperParameters}}
                  {{- range .}}
                  - "{{.Name}}={{.Value}}"
                  {{- end}}
                  {{- end}}
                restartPolicy: NeverType something here!


================================================
FILE: ch2/Dockerfile
================================================
FROM gcr.io/kubeflow-images-public/tensorflow-2.1.0-notebook-cpu:1.0.0

================================================
FILE: ch2/build-and-push.sh
================================================
#!/bin/bash
#tag::buildandpush[]
IMAGE="${CONTAINER_REGISTRY}/kubeflow/test:v1"
docker build  -t "${IMAGE}" -f Dockerfile .
docker push "${IMAGE}"
#end::buildandpush[]


================================================
FILE: ch2/query-endpoint.py
================================================
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
#     specific language governing permissions and limitations
# under the License.

#tag::scriptSetup[]
import requests
import numpy as np

from tensorflow.examples.tutorials.mnist import input_data
from matplotlib import pyplot as plt

def download_mnist():
    return input_data.read_data_sets("MNIST_data/", one_hot=True)


def gen_image(arr):
    two_d = (np.reshape(arr, (28, 28)) * 255).astype(np.uint8)
    plt.imshow(two_d, cmap=plt.cm.gray_r, interpolation='nearest')
    return plt
#end::scriptSetup[]

AMBASSADOR_API_IP = "10.53.148.167:30134"

#tag::scriptGuts[]
mnist = download_mnist()
batch_xs, batch_ys = mnist.train.next_batch(1)
chosen = 0
gen_image(batch_xs[chosen]).show()
data = batch_xs[chosen].reshape((1, 784))
features = ["X" + str(i + 1) for i in range(0, 784)]
request = {"data": {"names": features, "ndarray": data.tolist()}}
deploymentName = "mnist-classifier"
uri = "http://" + AMBASSADOR_API_IP + "/seldon/" + \
    deploymentName + "/api/v0.1/predictions"

response = requests.post(uri, json=request)
#end::scriptGuts[]
print(response.status_code)


================================================
FILE: ch2_seldon_examples/pipeline_role.yaml
================================================
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
  namespace: kubeflow
  name: pipeline-runner
rules:
- apiGroups: ["machinelearning.seldon.io"]
  resources: ["seldondeployments"]
  verbs: ["*"]


================================================
FILE: ch2_seldon_examples/pipeline_rolebinding.yaml
================================================
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  name: pipeline-runner
  namespace: kubeflow
subjects:
- kind: ServiceAccount
  name: pipeline-runner
  namespace: kubeflow
roleRef:
  kind: Role
  name: pipeline-runner
  apiGroup: rbac.authorization.k8s.io


================================================
FILE: ch2_seldon_examples/pv-claim.yaml
================================================
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
  name: "nfs-1"
spec:
  storageClassName: manual
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 3Gi


================================================
FILE: ch2_seldon_examples/pv-volume.yaml
================================================
kind: PersistentVolume
apiVersion: v1
metadata:
  name: task-pv-volume
  labels:
    type: local
spec:
  storageClassName: manual
  capacity:
    storage: 10Gi
  accessModes:
    - ReadWriteOnce
  hostPath:
    path: "/mnt/data"

================================================
FILE: ch2_seldon_examples/request_example.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting matplotlib\n",
      "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/57/4f/dd381ecf6c6ab9bcdaa8ea912e866dedc6e696756156d8ecc087e20817e2/matplotlib-3.1.1-cp36-cp36m-manylinux1_x86_64.whl (13.1MB)\n",
      "\u001b[K    100% |████████████████████████████████| 13.1MB 2.7MB/s eta 0:00:01\n",
      "\u001b[?25hRequirement already satisfied: python-dateutil>=2.1 in /opt/conda/lib/python3.6/site-packages (from matplotlib) (2.8.0)\n",
      "Collecting cycler>=0.10 (from matplotlib)\n",
      "  Downloading https://files.pythonhosted.org/packages/f7/d2/e07d3ebb2bd7af696440ce7e754c59dd546ffe1bbe732c8ab68b9c834e61/cycler-0.10.0-py2.py3-none-any.whl\n",
      "Collecting kiwisolver>=1.0.1 (from matplotlib)\n",
      "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/f8/a1/5742b56282449b1c0968197f63eae486eca2c35dcd334bab75ad524e0de1/kiwisolver-1.1.0-cp36-cp36m-manylinux1_x86_64.whl (90kB)\n",
      "\u001b[K    100% |████████████████████████████████| 92kB 32.5MB/s ta 0:00:01\n",
      "\u001b[?25hCollecting pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 (from matplotlib)\n",
      "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/11/fa/0160cd525c62d7abd076a070ff02b2b94de589f1a9789774f17d7c54058e/pyparsing-2.4.2-py2.py3-none-any.whl (65kB)\n",
      "\u001b[K    100% |████████████████████████████████| 71kB 25.6MB/s ta 0:00:01\n",
      "\u001b[?25hRequirement already satisfied: numpy>=1.11 in /opt/conda/lib/python3.6/site-packages (from matplotlib) (1.16.2)\n",
      "Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.6/site-packages (from python-dateutil>=2.1->matplotlib) (1.12.0)\n",
      "Requirement already satisfied: setuptools in /opt/conda/lib/python3.6/site-packages (from kiwisolver>=1.0.1->matplotlib) (40.9.0)\n",
      "Installing collected packages: cycler, kiwisolver, pyparsing, matplotlib\n",
      "Successfully installed cycler-0.10.0 kiwisolver-1.1.0 matplotlib-3.1.1 pyparsing-2.4.2\n",
      "\u001b[33mYou are using pip version 19.0.1, however version 19.2.3 is available.\n",
      "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "!pip install matplotlib"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "import numpy as np\n",
    "\n",
    "from tensorflow.examples.tutorials.mnist import input_data\n",
    "from matplotlib import pyplot as plt\n",
    "\n",
    "\n",
    "def download_mnist():\n",
    "    return input_data.read_data_sets(\"MNIST_data/\", one_hot = True)\n",
    "\n",
    "def gen_image(arr):\n",
    "    two_d = (np.reshape(arr, (28, 28)) * 255).astype(np.uint8)\n",
    "    plt.imshow(two_d,cmap=plt.cm.gray_r, interpolation='nearest')\n",
    "    return plt\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From <ipython-input-3-0613226129c0>:9: read_data_sets (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Please use alternatives such as official/mnist/dataset.py from tensorflow/models.\n",
      "WARNING:tensorflow:From /opt/conda/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:260: maybe_download (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Please write your own downloading logic.\n",
      "WARNING:tensorflow:From /opt/conda/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/base.py:252: _internal_retry.<locals>.wrap.<locals>.wrapped_fn (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Please use urllib or similar directly.\n",
      "Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.\n",
      "WARNING:tensorflow:From /opt/conda/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:262: extract_images (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Please use tf.data to implement this functionality.\n",
      "Extracting MNIST_data/train-images-idx3-ubyte.gz\n",
      "Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.\n",
      "WARNING:tensorflow:From /opt/conda/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:267: extract_labels (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Please use tf.data to implement this functionality.\n",
      "Extracting MNIST_data/train-labels-idx1-ubyte.gz\n",
      "WARNING:tensorflow:From /opt/conda/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:110: dense_to_one_hot (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Please use tf.one_hot on tensors.\n",
      "Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.\n",
      "Extracting MNIST_data/t10k-images-idx3-ubyte.gz\n",
      "Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.\n",
      "Extracting MNIST_data/t10k-labels-idx1-ubyte.gz\n",
      "WARNING:tensorflow:From /opt/conda/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:290: DataSet.__init__ (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Please use alternatives such as official/mnist/dataset.py from tensorflow/models.\n"
     ]
    }
   ],
   "source": [
    "mnist = download_mnist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPsAAAD4CAYAAAAq5pAIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAANMklEQVR4nO3dXaxV9ZnH8d9Ppr0REmE4ORDAgakYo2Ok5EhMahonZIgvIdgYTblATMxQXyCtaeIYJ1ovvMAJ0BQzklAlpaRDbWwVYohTB5uY3hCPBoUjaX0JBghyDhqiqFiVZy7Osjni2Wsf9lr7RZ7vJznZe69nrb2erPBj7b3+e++/I0IAzn3ndbsBAJ1B2IEkCDuQBGEHkiDsQBL/0MmdTZ8+PebOndvJXQKpHDx4UMePH/d4tUpht32tpF9ImiTp8YhYW7b+3LlzNTg4WGWXAEoMDAw0rLX8Mt72JEn/Lek6SZdKWm770lafD0B7VXnPvkjSmxHxdkT8TdJvJS2rpy0AdasS9lmSDo15fLhY9hW2V9ketD04MjJSYXcAqmj71fiI2BwRAxEx0NfX1+7dAWigStiPSJoz5vHsYhmAHlQl7C9Jmm97nu1vS/qhpJ31tAWgbi0PvUXE57ZXS/pfjQ69bYmIodo6A1CrSuPsEbFL0q6aegHQRnxcFkiCsANJEHYgCcIOJEHYgSQIO5AEYQeSIOxAEoQdSIKwA0kQdiAJwg4kQdiBJAg7kARhB5Ig7EAShB1IgrADSRB2IAnCDiRB2IEkCDuQBGEHkiDsQBKEHUiCsANJEHYgCcIOJEHYgSQIO5BEpSmbbR+U9KGkLyR9HhEDdTQFoH6Vwl7414g4XsPzAGgjXsYDSVQNe0j6o+2Xba8abwXbq2wP2h4cGRmpuDsAraoa9qsjYqGk6yTdbfv7Z64QEZsjYiAiBvr6+iruDkCrKoU9Io4Ut8OSnpa0qI6mANSv5bDbPt/2lC/vS1oiaX9djQGoV5Wr8f2Snrb95fP8T0Q8V0tXAGrXctgj4m1JV9TYC4A2YugNSIKwA0kQdiAJwg4kQdiBJOr4Igy+wbZt21Za/+STTzrUydl74IEHSuvr1q1rWFuxYkXd7fQ8zuxAEoQdSIKwA0kQdiAJwg4kQdiBJAg7kATj7OeANWvWNKzt2bOndNu9e/eW1j/77LPSen9/f8vbnz59unTbEydOlNabKb5+jQJndiAJwg4kQdiBJAg7kARhB5Ig7EAShB1IgnH2Djh27Fhp/eGHH670/Dt27GhYO3ToUKXnvuOOO0rrt912W2n9o48+aljbtGlT6bZPPfVUaX3+/Pml9SuvvLK0ng1ndiAJwg4kQdiBJAg7kARhB5Ig7EAShB1IgnH2Grz11lul9WXLlpXWh4aGKu1/8uTJDWu33npr6bbr168vrU+bNq20ft555eeLxx9/vGFtcHCwdNvLLrustP7cc+UzhM+ePbu0nk3TM7vtLbaHbe8fs2ya7edtv1HcTm1vmwCqmsjL+F9JuvaMZfdJ2h0R8yXtLh4D6GFNwx4RL0p6/4zFyyRtLe5vlXRjzX0BqFmrF+j6I+Jocf9dSQ1/iMz2KtuDtgdHRkZa3B2AqipfjY+IkBQl9c0RMRARA319fVV3B6BFrYb9mO2ZklTcDtfXEoB2aDXsOyWtLO6vlNT4O5YAekLTcXbb2yVdI2m67cOSfiZpraTf2b5d0juSbmlnk72ubJxbkmbNmlVarzrO/sgjjzSs3XXXXZWeu5n33nuvtL5hw4aGtZMnT5Zue/PNN5fWGUc/O03DHhHLG5QW19wLgDbi47JAEoQdSIKwA0kQdiAJwg4kwVdca9Bs2uKNGzeW1i+55JJK+2/2k8rt9Nhjj5XWDxw40LDW7Ou3N9xwQ0s9YXyc2YEkCDuQBGEHkiDsQBKEHUiCsANJEHYgCcbZO+Ciiy4qra9Zs6a0/uijj9bZzlk5depUab3Z13NnzJjRsHbnnXeWbnvVVVeV1nF2OLMDSRB2IAnCDiRB2IEkCDuQBGEHkiDsQBKMs3fApEmTSuurV68urS9durS0vnDhwrPuaaJOnDhRWn/yySdL60uWLGlYYxy9szizA0kQdiAJwg4kQdiBJAg7kARhB5Ig7EASjLP3gIsvvrhSvZ2eeeaZru0b9Wp6Zre9xfaw7f1jlj1k+4jtvcXf9e1tE0BVE3kZ/ytJ146z/OcRsaD421VvWwDq1jTsEfGipPc70AuANqpygW617deKl/lTG61ke5XtQduDIyMjFXYHoIpWw75J0nckLZB0VNL6RitGxOaIGIiIgb6+vhZ3B6CqlsIeEcci4ouIOC3pl5IW1dsWgLq1FHbbM8c8/IGk/Y3WBdAbmo6z294u6RpJ020flvQzSdfYXiApJB2U9KM29ogu2rWrfKDlnnvuKa0/+OCDdbaDCpqGPSKWj7P4iTb0AqCN+LgskARhB5Ig7EAShB1IgrADSfAV1+SGh4dL659++mlpffLkyaX1Cy644Kx7QntwZgeSIOxAEoQdSIKwA0kQdiAJwg4kQdiBJBhnT+7yyy8vrX/88cel9XvvvbfOdtBGnNmBJAg7kARhB5Ig7EAShB1IgrADSRB2IAnG2VGq2ffVFy9e3KFOUBVndiAJwg4kQdiBJAg7kARhB5Ig7EAShB1IgnH2c9z27dtL6x988EFpfcaMGXW2gy5qema3Pcf2n2y/bnvI9o+L5dNsP2/7jeJ2avvbBdCqibyM/1zSTyPiUklXSbrb9qWS7pO0OyLmS9pdPAbQo5qGPSKORsQrxf0PJR2QNEvSMklbi9W2SrqxXU0CqO6sLtDZnivpu5L2SOqPiKNF6V1J/Q22WWV70PbgyMhIhVYBVDHhsNueLOn3kn4SEV+5qhMRISnG2y4iNkfEQEQM9PX1VWoWQOsmFHbb39Jo0H8TEX8oFh+zPbOoz5RUPh0ogK5qOvRm25KekHQgIjaMKe2UtFLS2uJ2R1s6RCVDQ0Ol9VOnTpXWN27cWGc76KKJjLN/T9IKSfts7y2W3a/RkP/O9u2S3pF0S3taBFCHpmGPiD9LcoMyv1wAfEPwcVkgCcIOJEHYgSQIO5AEYQeSIOxAEoQdSIKwA0kQdiAJwg4kQdiBJAg7kARhB5Lgp6TPAWvXrm1Y27lzZ+m2CxYsKK1fccUVLfWE3sOZHUiCsANJEHYgCcIOJEHYgSQIO5AEYQeSYJz9HPDCCy80rO3bt6/Sc7/66qul9QsvvLDS86NzOLMDSRB2IAnCDiRB2IEkCDuQBGEHkiDsQBITmZ99jqRfS+qXFJI2R8QvbD8k6d8ljRSr3h8Ru9rVKBqbN29e255727ZtpfWlS5e2bd+o10Q+VPO5pJ9GxCu2p0h62fbzRe3nEbGufe0BqMtE5mc/Kulocf9D2wckzWp3YwDqdVbv2W3PlfRdSXuKRattv2Z7i+2pDbZZZXvQ9uDIyMh4qwDogAmH3fZkSb+X9JOI+EDSJknfkbRAo2f+9eNtFxGbI2IgIgb6+vpqaBlAKyYUdtvf0mjQfxMRf5CkiDgWEV9ExGlJv5S0qH1tAqiqadhtW9ITkg5ExIYxy2eOWe0HkvbX3x6Aukzkavz3JK2QtM/23mLZ/ZKW216g0eG4g5J+1JYO0dS6dY0HRI4cOVK67eLFi0vrN910U0s9ofdM5Gr8nyV5nBJj6sA3CJ+gA5Ig7EAShB1IgrADSRB2IAnCDiTBT0mfA6ZMmdKw9uyzz3awE/QyzuxAEoQdSIKwA0kQdiAJwg4kQdiBJAg7kIQjonM7s0ckvTNm0XRJxzvWwNnp1d56tS+J3lpVZ2//FBHj/v5bR8P+tZ3bgxEx0LUGSvRqb73al0RvrepUb7yMB5Ig7EAS3Q775i7vv0yv9tarfUn01qqO9NbV9+wAOqfbZ3YAHULYgSS6Enbb19r+i+03bd/XjR4asX3Q9j7be20PdrmXLbaHbe8fs2ya7edtv1HcjjvHXpd6e8j2keLY7bV9fZd6m2P7T7Zftz1k+8fF8q4eu5K+OnLcOv6e3fYkSX+V9G+SDkt6SdLyiHi9o400YPugpIGI6PoHMGx/X9JJSb+OiH8plv2XpPcjYm3xH+XUiPiPHuntIUknuz2NdzFb0cyx04xLulHSberisSvp6xZ14Lh148y+SNKbEfF2RPxN0m8lLetCHz0vIl6U9P4Zi5dJ2lrc36rRfywd16C3nhARRyPileL+h5K+nGa8q8eupK+O6EbYZ0k6NObxYfXWfO8h6Y+2X7a9qtvNjKM/Io4W99+V1N/NZsbRdBrvTjpjmvGeOXatTH9eFRfovu7qiFgo6TpJdxcvV3tSjL4H66Wx0wlN490p40wz/nfdPHatTn9eVTfCfkTSnDGPZxfLekJEHCluhyU9rd6bivrYlzPoFrfDXe7n73ppGu/xphlXDxy7bk5/3o2wvyRpvu15tr8t6YeSdnahj6+xfX5x4US2z5e0RL03FfVOSSuL+ysl7ehiL1/RK9N4N5pmXF0+dl2f/jwiOv4n6XqNXpF/S9J/dqOHBn39s6RXi7+hbvcmabtGX9Z9ptFrG7dL+kdJuyW9Ien/JE3rod62Sdon6TWNBmtml3q7WqMv0V+TtLf4u77bx66kr44cNz4uCyTBBTogCcIOJEHYgSQIO5AEYQeSIOxAEoQdSOL/AQe88PwDu2A0AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "401\n"
     ]
    }
   ],
   "source": [
    "batch_xs, batch_ys = mnist.train.next_batch(1)\n",
    "chosen=0\n",
    "gen_image(batch_xs[chosen]).show()\n",
    "data = batch_xs[chosen].reshape((1,784))\n",
    "features = [\"X\"+str(i+1) for i in range (0,784)]\n",
    "request = {\"data\":{\"names\":features,\"ndarray\":data.tolist()}}\n",
    "deploymentName = \"mnist-classifier\"\n",
    "uri = \"http://istio-ingressgateway.istio-system.svc.cluster.local/seldon/\"+deploymentName+\"/api/v0.1/predictions\"\n",
    "\n",
    "response = requests.post(\n",
    "    uri,\n",
    "    json=request)\n",
    "\n",
    "print(response.status_code)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Origin authentication failed.\n"
     ]
    }
   ],
   "source": [
    "print(response.text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: ch2_seldon_examples/run_example.sh
================================================
#!/bin/bash
#tag::buildPipeline[]
dsl-compile --py train_pipeline.py --output job.yaml
#end::buildPipeline[]
#tag::connectToWebUI[]
# If you're on minikube and not using a loadbalancer:
minikube service --url -n istio-system istio-ingressgateway
# If your on GCP https://<kf_app_name>.endpoints.<gcp_project_name>.cloud.goog/
# If you're on vanilla K8s
INGRESS_HOST=$(kubectl -n istio-system get service istio-ingressgateway \
		       -o jsonpath='{.status.loadBalancer.ingress[0].ip}')
export INGRESS_HOST
INGRESS_PORT=$(kubectl -n istio-system get service istio-ingressgateway \
	       -o jsonpath='{.spec.ports[?(@.name=="http2")].port}')
export INGRESS_PORT
SECURE_INGRESS_PORT=$(kubectl -n istio-system get service istio-ingressgateway \
		      -o jsonpath='{.spec.ports[?(@.name=="https")].port}')
export SECURE_INGRESS_PORT

kubectl get svc istio-ingressgateway -n istio-system
#end::connectToWebUI[]


================================================
FILE: ch2_seldon_examples/setup_example.sh
================================================
#!/bin/bash

set -ex

echo "Setting up example"

unset ch2_example_path
ch2_example_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
echo "Using path ${ch2_example_path} for our example path"
example_path=$(dirname "${ch2_example_path}")
#tag::generate_kf_app_p1[]
# Pick the correct config file for your platform from
# https://github.com/kubeflow/manifests/tree/[version]/kfdef
# You can download & edit the configuration at this point if you need to.
# For generic k8s with istio:
MANIFEST_BRANCH=${MANIFEST_BRANCH:-v1.0-branch}
export MANIFEST_BRANCH
MANIFEST_VERSION=${MANIFEST_VERSION:-v1.0.1}
export MANIFEST_VERSION

KF_PROJECT_NAME=${KF_PROJECT_NAME:-hello-kf-${PLATFORM}}
export KF_PROJECT_NAME
mkdir "${KF_PROJECT_NAME}"
pushd "${KF_PROJECT_NAME}"

manifest_root=https://raw.githubusercontent.com/kubeflow/manifests/
# On most enviroments this will create a "vanilla" kubeflow install using istio.
KFDEF=${manifest_root}${MANIFEST_BRANCH}/kfdef/kfctl_k8s_istio.${MANIFEST_VERSION}.yaml
#end::generate_kf_app_p1[]
# On GCP this will create a cluster with basic authentication
if [ "$PLATFORM" == "gcp" ]; then
  KFDEF=${manifest_root}${MANIFEST_BRANCH}/kfdef/kfctl_gcp_iap.${MANIFEST_VERSION}.yaml
  # Temp hack
  cp "${example_path}/kfctl_gcp_iap.v1.0.1.yaml" ./
  KFDEF=./kfctl_gcp_iap.v1.0.1.yaml
  # Set up IAP
  # TODO(holden)
  # Set up environment variables for GCP
  export PROJECT=${PROJECT:-"<your GCP project name>"}
  gcloud config set project "${PROJECT}"
  export ZONE=${ZONE:-"<your GCP zone>"}
  gcloud config set compute/zone "${ZONE}"
fi
pwd
#tag::generate_kf_app_p2[]
kfctl apply -f $KFDEF -V
echo $?

popd
#end::generate_kf_app_p2[]


# TODO(trevor): what version/tag?
#tag::cloneSeldonExample[]
# Clone the base seldon example
git clone https://github.com/kubeflow/example-seldon
#end::cloneSeldonExample[]


================================================
FILE: ch2_seldon_examples/tf_mnist_no_seldon_pipeline.py
================================================
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Kubeflow Pipelines MNIST example

Run this script to compile pipeline
"""

import kfp.dsl as dsl
import kfp.gcp as gcp
import kfp.onprem as onprem

gcs_or_pvc = 'PVC'


@dsl.pipeline(name='MNIST',
              description='A pipeline to train and serve the MNIST example.')
def mnist_pipeline(gcs_bucket=None,
                   train_steps='200',
                   learning_rate='0.01',
                   batch_size='100'):
    """
    Pipeline with three stages:
      1. train an MNIST classifier
      2. deploy a tf-serving instance to the cluster
      3. deploy a web-ui to interact with it
    """

    vop = None
    volume = None
    if gcs_or_pvc == "PVC":
        vop = dsl.VolumeOp(name="create_pvc",
                           resource_name="nfs-1",
                           modes=dsl.VOLUME_MODE_RWO,
                           size="10G")
        volume = vop.volume

    train = dsl.ContainerOp(
        name='train',
        image=
        'gcr.io/kubeflow-examples/mnist/model:v20190304-v0.2-176-g15d997b',
        arguments=[
            "/opt/model.py", "--tf-export-dir", gcs_bucket or "/mnt",
            "--tf-train-steps", train_steps, "--tf-batch-size", batch_size,
            "--tf-learning-rate", learning_rate
        ])

    serve_args = [
        '--model-export-path', gcs_bucket or "/mnt", '--server-name',
        "mnist-service"
    ]
    if gcs_or_pvc != 'GCS':
        serve_args.extend(
            ['--cluster-name', "mnist-pipeline", '--pvc-name', volume])

    serve = dsl.ContainerOp(
        name='serve',
        image='gcr.io/ml-pipeline/ml-pipeline-kubeflow-deployer:'
        '7775692adf28d6f79098e76e839986c9ee55dd61',
        arguments=serve_args)
    serve.after(train)

    webui_args = [
        '--image', 'gcr.io/kubeflow-examples/mnist/web-ui:'
        'v20190304-v0.2-176-g15d997b-pipelines', '--name', 'web-ui',
        '--container-port', '5000', '--service-port', '80', '--service-type',
        "LoadBalancer"
    ]

    web_ui = dsl.ContainerOp(
        name='web-ui',
        image='gcr.io/kubeflow-examples/mnist/deploy-service:latest',
        arguments=webui_args)
    web_ui.after(serve)

    steps = [train, serve, web_ui]
    for step in steps:
        if gcs_or_pvc == 'GCS':
            step.apply(gcp.use_gcp_secret('user-gcp-sa'))
        else:
            step.after(vop)
            step.add_pvolumes({"/mnt": volume})


if __name__ == '__main__':
    import kfp.compiler as compiler
    compiler.Compiler().compile(mnist_pipeline, __file__ + '.tar.gz')


================================================
FILE: ch2_seldon_examples/tiller_rbac.yaml
================================================
apiVersion: v1
kind: ServiceAccount
metadata:
  name: tiller
  namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: tiller
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: cluster-admin
subjects:
  - kind: ServiceAccount
    name: tiller
    namespace: kube-system

================================================
FILE: ch2_seldon_examples/train_pipeline.py
================================================
import kfp.dsl as dsl
import kfp.gcp as gcp
import kfp.onprem as onprem

from string import Template
import json


@dsl.pipeline(name='Simple sci-kit KF Pipeline',
              description='A simple end to end sci-kit seldon kf pipeline')
def mnist_train_pipeline(docker_org="index.docker.io/seldonio",
                         train_container_version="0.2",
                         serve_container_version="0.1"):

    vop = dsl.VolumeOp(name="create_pvc",
                       resource_name="nfs-1",
                       modes=dsl.VOLUME_MODE_RWO,
                       size="10G")
    volume = vop.volume
    train = dsl.ContainerOp(
        name='sk-train',
        image=
        f"{docker_org}/skmnistclassifier_trainer:{train_container_version}",
        pvolumes={"/data": volume})

    seldon_serving_json_template = Template("""
{
	"apiVersion": "machinelearning.seldon.io/v1alpha2",
	"kind": "SeldonDeployment",
	"metadata": {
		"labels": {
			"app": "seldon"
		},
		"name": "mnist-classifier"
	},
	"spec": {
		"annotations": {
			"deployment_version": "v1",
			"project_name": "MNIST Example"
		},
		"name": "mnist-classifier",
		"predictors": [
			{
				"annotations": {
					"predictor_version": "v1"
				},
				"componentSpecs": [
					{
						"spec": {
							"containers": [
								{
									"image": "$dockerreposerving:$dockertagserving",
									"imagePullPolicy": "Always",
									"name": "mnist-classifier",
									"volumeMounts": [
										{
											"mountPath": "/data",
											"name": "persistent-storage"
										}
									]
								}
							],
							"terminationGracePeriodSeconds": 1,
							"volumes": [
								{
									"name": "persistent-storage",
									"persistentVolumeClaim": {
											"claimName": "$modelpvc"
									}
								}
							]
						}
					}
				],
				"graph": {
					"children": [],
					"endpoint": {
						"type": "REST"
					},
					"name": "mnist-classifier",
					"type": "MODEL"
				},
				"name": "mnist-classifier",
				"replicas": 1
			}
		]
	}
}    
""")
    seldon_serving_json = seldon_serving_json_template.substitute({
        'dockerreposerving':
        f"{docker_org}/skmnistclassifier_runtime",
        'dockertagserving':
        str(serve_container_version),
        'modelpvc':
        vop.outputs["name"]
    })

    seldon_deployment = json.loads(seldon_serving_json)

    serve = dsl.ResourceOp(
        name='serve',
        k8s_resource=seldon_deployment,
        success_condition='status.state == Available').after(train)


# If we're called directly create an expirement and run
if __name__ == '__main__':
    pipeline_func = mnist_train_pipeline
    pipeline_filename = pipeline_func.__name__ + '.pipeline.zip'
    import kfp.compiler as compiler
    compiler.Compiler().compile(pipeline_func, pipeline_filename)
    expirement_name = "cheese"
    experiment = client.create_experiment(expirement_name)
    run_name = pipeline_func.__name__ + ' run'
    run_result = client.run_pipeline(experiment.id, run_name,
                                     pipeline_filename, arguments)
    print(run_result)


================================================
FILE: ch9/ctscans/DICOM Denoising Pipeline.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Defaulting to user installation because normal site-packages is not writeable\n",
      "Collecting kfp\n",
      "  Downloading kfp-0.5.1.tar.gz (119 kB)\n",
      "\u001b[K     |████████████████████████████████| 119 kB 3.5 MB/s eta 0:00:01\n",
      "\u001b[?25hRequirement already satisfied: PyYAML in /usr/local/lib/python3.6/dist-packages (from kfp) (5.3)\n",
      "Requirement already satisfied: google-cloud-storage>=1.13.0 in /usr/local/lib/python3.6/dist-packages (from kfp) (1.25.0)\n",
      "Requirement already satisfied: kubernetes<12.0.0,>=8.0.0 in /usr/local/lib/python3.6/dist-packages (from kfp) (10.0.1)\n",
      "Requirement already satisfied: google-auth>=1.6.1 in /usr/local/lib/python3.6/dist-packages (from kfp) (1.11.0)\n",
      "Collecting requests_toolbelt>=0.8.0\n",
      "  Downloading requests_toolbelt-0.9.1-py2.py3-none-any.whl (54 kB)\n",
      "\u001b[K     |████████████████████████████████| 54 kB 4.0 MB/s  eta 0:00:01\n",
      "\u001b[?25hRequirement already satisfied: cloudpickle in /usr/local/lib/python3.6/dist-packages (from kfp) (1.2.2)\n",
      "Collecting kfp-server-api<0.6.0,>=0.2.5\n",
      "  Downloading kfp-server-api-0.5.0.tar.gz (39 kB)\n",
      "Requirement already satisfied: jsonschema>=3.0.1 in /usr/local/lib/python3.6/dist-packages (from kfp) (3.2.0)\n",
      "Collecting tabulate\n",
      "  Downloading tabulate-0.8.7-py3-none-any.whl (24 kB)\n",
      "Collecting click\n",
      "  Downloading click-7.1.2-py2.py3-none-any.whl (82 kB)\n",
      "\u001b[K     |████████████████████████████████| 82 kB 1.5 MB/s  eta 0:00:01\n",
      "\u001b[?25hCollecting Deprecated\n",
      "  Downloading Deprecated-1.2.9-py2.py3-none-any.whl (8.6 kB)\n",
      "Collecting strip-hints\n",
      "  Downloading strip-hints-0.1.9.tar.gz (30 kB)\n",
      "Requirement already satisfied: google-cloud-core<2.0dev,>=1.2.0 in /usr/local/lib/python3.6/dist-packages (from google-cloud-storage>=1.13.0->kfp) (1.3.0)\n",
      "Requirement already satisfied: google-resumable-media<0.6dev,>=0.5.0 in /usr/local/lib/python3.6/dist-packages (from google-cloud-storage>=1.13.0->kfp) (0.5.0)\n",
      "Requirement already satisfied: six>=1.9.0 in /usr/lib/python3/dist-packages (from kubernetes<12.0.0,>=8.0.0->kfp) (1.11.0)\n",
      "Requirement already satisfied: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /usr/local/lib/python3.6/dist-packages (from kubernetes<12.0.0,>=8.0.0->kfp) (0.57.0)\n",
      "Requirement already satisfied: python-dateutil>=2.5.3 in /usr/local/lib/python3.6/dist-packages (from kubernetes<12.0.0,>=8.0.0->kfp) (2.8.1)\n",
      "Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from kubernetes<12.0.0,>=8.0.0->kfp) (2.22.0)\n",
      "Requirement already satisfied: requests-oauthlib in /usr/local/lib/python3.6/dist-packages (from kubernetes<12.0.0,>=8.0.0->kfp) (1.3.0)\n",
      "Requirement already satisfied: setuptools>=21.0.0 in /usr/local/lib/python3.6/dist-packages (from kubernetes<12.0.0,>=8.0.0->kfp) (45.1.0)\n",
      "Requirement already satisfied: urllib3>=1.24.2 in /usr/local/lib/python3.6/dist-packages (from kubernetes<12.0.0,>=8.0.0->kfp) (1.25.8)\n",
      "Requirement already satisfied: certifi>=14.05.14 in /usr/local/lib/python3.6/dist-packages (from kubernetes<12.0.0,>=8.0.0->kfp) (2019.11.28)\n",
      "Requirement already satisfied: rsa<4.1,>=3.1.4 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.6.1->kfp) (4.0)\n",
      "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.6.1->kfp) (4.0.0)\n",
      "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.6.1->kfp) (0.2.8)\n",
      "Requirement already satisfied: attrs>=17.4.0 in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kfp) (19.3.0)\n",
      "Requirement already satisfied: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kfp) (1.4.0)\n",
      "Requirement already satisfied: pyrsistent>=0.14.0 in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kfp) (0.15.7)\n",
      "Requirement already satisfied: wrapt<2,>=1.10 in /usr/local/lib/python3.6/dist-packages (from Deprecated->kfp) (1.11.2)\n",
      "Requirement already satisfied: wheel in /usr/lib/python3/dist-packages (from strip-hints->kfp) (0.30.0)\n",
      "Requirement already satisfied: google-api-core<2.0.0dev,>=1.16.0 in /usr/local/lib/python3.6/dist-packages (from google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (1.16.0)\n",
      "Requirement already satisfied: idna<2.9,>=2.5 in /usr/lib/python3/dist-packages (from requests->kubernetes<12.0.0,>=8.0.0->kfp) (2.6)\n",
      "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->kubernetes<12.0.0,>=8.0.0->kfp) (3.0.4)\n",
      "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib->kubernetes<12.0.0,>=8.0.0->kfp) (3.1.0)\n",
      "Requirement already satisfied: pyasn1>=0.1.3 in /usr/local/lib/python3.6/dist-packages (from rsa<4.1,>=3.1.4->google-auth>=1.6.1->kfp) (0.4.8)\n",
      "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.6/dist-packages (from importlib-metadata; python_version < \"3.8\"->jsonschema>=3.0.1->kfp) (2.1.0)\n",
      "Requirement already satisfied: protobuf>=3.4.0 in /usr/local/lib/python3.6/dist-packages (from google-api-core<2.0.0dev,>=1.16.0->google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (3.11.2)\n",
      "Requirement already satisfied: pytz in /usr/local/lib/python3.6/dist-packages (from google-api-core<2.0.0dev,>=1.16.0->google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (2019.3)\n",
      "Requirement already satisfied: googleapis-common-protos<2.0dev,>=1.6.0 in /usr/local/lib/python3.6/dist-packages (from google-api-core<2.0.0dev,>=1.16.0->google-cloud-core<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp) (1.51.0)\n",
      "Building wheels for collected packages: kfp, kfp-server-api, strip-hints\n",
      "  Building wheel for kfp (setup.py) ... \u001b[?25ldone\n",
      "\u001b[?25h  Created wheel for kfp: filename=kfp-0.5.1-py3-none-any.whl size=163151 sha256=da5b540ae9834d37659146f0576997ffd8f7a7e2b305e1eb7b2a99dd4745930b\n",
      "  Stored in directory: /home/jovyan/.cache/pip/wheels/2f/26/f9/e3836cb6e6cabd63ef912304e18a852ac29cb870a4a0b85f98\n",
      "  Building wheel for kfp-server-api (setup.py) ... \u001b[?25ldone\n",
      "\u001b[?25h  Created wheel for kfp-server-api: filename=kfp_server_api-0.5.0-py3-none-any.whl size=106319 sha256=84f55948cc254c0f836dffdfd51574a828ae8a503a2ca9198acf7a27ca2aaea7\n",
      "  Stored in directory: /home/jovyan/.cache/pip/wheels/73/36/4e/bfe2efeeea4f74f04984ebe1d44136202b72191302f4760951\n",
      "  Building wheel for strip-hints (setup.py) ... \u001b[?25ldone\n",
      "\u001b[?25h  Created wheel for strip-hints: filename=strip_hints-0.1.9-py2.py3-none-any.whl size=24671 sha256=3bcfd573a91f5f4c46d23509ac3fee9a0cf351b414e00ed505a8f71d0e6a1141\n",
      "  Stored in directory: /home/jovyan/.cache/pip/wheels/21/6d/fa/7ed7c0560e1ef39ebabd5cc0241e7fca711660bae1ad752e2b\n",
      "Successfully built kfp kfp-server-api strip-hints\n",
      "Installing collected packages: requests-toolbelt, kfp-server-api, tabulate, click, Deprecated, strip-hints, kfp\n",
      "\u001b[33m  WARNING: The script tabulate is installed in '/home/jovyan/.local/bin' which is not on PATH.\n",
      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\n",
      "\u001b[33m  WARNING: The script strip-hints is installed in '/home/jovyan/.local/bin' which is not on PATH.\n",
      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\n",
      "\u001b[33m  WARNING: The scripts dsl-compile and kfp are installed in '/home/jovyan/.local/bin' which is not on PATH.\n",
      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\n",
      "Successfully installed Deprecated-1.2.9 click-7.1.2 kfp-0.5.1 kfp-server-api-0.5.0 requests-toolbelt-0.9.1 strip-hints-0.1.9 tabulate-0.8.7\n",
      "\u001b[33mWARNING: You are using pip version 20.0.2; however, version 20.1 is available.\n",
      "You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "!pip3 install kfp\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import kfp\n",
    "import kubernetes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "container_manifest = {\n",
    "    \"apiVersion\": \"sparkoperator.k8s.io/v1beta2\",\n",
    "    \"kind\": \"SparkApplication\",\n",
    "    \"metadata\": {\n",
    "        \"name\": \"spark-app\",\n",
    "        \"namespace\": \"kubeflow\"\n",
    "    },\n",
    "    \"spec\": {\n",
    "        \"type\": \"Scala\",\n",
    "        \"mode\": \"cluster\",\n",
    "        \"image\": \"docker.io/rawkintrevo/covid-basis-vectors:0.2.0\",\n",
    "        \"imagePullPolicy\": \"Always\",\n",
    "        \"hadoopConf\": {\n",
    "            \"fs.gs.project.id\": \"kubeflow-hacky-hacky\",\n",
    "            \"fs.gs.system.bucket\": \"covid-dicoms\",\n",
    "            \"fs.gs.impl\" : \"com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem\",\n",
    "            \"google.cloud.auth.service.account.enable\": \"true\",\n",
    "            \"google.cloud.auth.service.account.json.keyfile\": \"/mnt/secrets/user-gcp-sa.json\",\n",
    "        },\n",
    "        \"mainClass\": \"org.rawkintrevo.covid.App\",\n",
    "        \"mainApplicationFile\": \"local:///covid-0.1-jar-with-dependencies.jar\", # See the Dockerfile\n",
    "        \"arguments\": [\"245\", \"15\", \"1\"],\n",
    "        \"sparkVersion\": \"2.4.5\",\n",
    "        \"restartPolicy\": {\n",
    "            \"type\": \"Never\"\n",
    "        },\n",
    "        \"driver\": {\n",
    "            \"cores\": 1,\n",
    "            \"secrets\": [\n",
    "                {\"name\": \"user-gcp-sa\",\n",
    "                 \"path\": \"/mnt/secrets\",\n",
    "                 \"secretType\": \"GCPServiceAccount\"\n",
    "                 }\n",
    "            ],\n",
    "\n",
    "            \"coreLimit\": \"1200m\",\n",
    "            \"memory\": \"512m\",\n",
    "            \"labels\": {\n",
    "                \"version\": \"2.4.5\",\n",
    "            },\n",
    "            \"serviceAccount\": \"spark-operatoroperator-sa\", # also try spark-operatoroperator-sa\n",
    "        },\n",
    "        \"executor\": {\n",
    "            \"cores\": 1,\n",
    "            \"secrets\": [\n",
    "                {\"name\": \"user-gcp-sa\",\n",
    "                 \"path\": \"/mnt/secrets\",\n",
    "                 \"secretType\": \"GCPServiceAccount\"\n",
    "                 }\n",
    "            ],\n",
    "            \"instances\": 4,\n",
    "            \"memory\": \"4084m\"\n",
    "        },\n",
    "        \"labels\": {\n",
    "            \"version\": \"2.4.5\"\n",
    "        },\n",
    "\n",
    "    }\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "from kfp.gcp import use_gcp_secret\n",
    "@kfp.dsl.pipeline(\n",
    "    name=\"Covid DICOM Pipe v2\",\n",
    "    description=\"Create Basis Vectors for Lung Images\"\n",
    ")\n",
    "def covid_dicom_pipeline():\n",
    "    vop = kfp.dsl.VolumeOp(\n",
    "        name=\"requisition-PVC\",\n",
    "        resource_name=\"datapvc\",\n",
    "        size=\"20Gi\", #10 Gi blows up...\n",
    "        modes=kfp.dsl.VOLUME_MODE_RWO\n",
    "    )\n",
    "    step1 = kfp.dsl.ContainerOp(\n",
    "        name=\"download-dicom\",\n",
    "        image=\"rawkintrevo/download-dicom:0.0.0.4\",\n",
    "        command=[\"/run.sh\"],\n",
    "        pvolumes={\"/data\": vop.volume}\n",
    "    )\n",
    "    step2 = kfp.dsl.ContainerOp(\n",
    "        name=\"convert-dicoms-to-vectors\",\n",
    "        image=\"rawkintrevo/covid-prep-dicom:0.9.5\",\n",
    "        arguments=[\n",
    "            '--bucket_name', \"covid-dicoms\",\n",
    "        ],\n",
    "        command=[\"python\", \"/program.py\"],\n",
    "        pvolumes={\"/mnt/data\": step1.pvolume}\n",
    "    ).apply(kfp.gcp.use_gcp_secret(secret_name='user-gcp-sa'))\n",
    "    rop = kfp.dsl.ResourceOp(\n",
    "        name=\"calculate-basis-vectors\",\n",
    "        k8s_resource=container_manifest,\n",
    "        action=\"create\",\n",
    "        success_condition=\"status.applicationState.state == COMPLETED\"\n",
    "    ).after(step2)\n",
    "    pyviz = kfp.dsl.ContainerOp(\n",
    "        name=\"visualize-slice-of-dicom\",\n",
    "        image=\"rawkintrevo/visualize-dicom-output:0.0.11\",\n",
    "        command=[\"python\", \"/program.py\"],\n",
    "        arguments=[\n",
    "            '--bucket_name', \"covid-dicoms\",\n",
    "        ],\n",
    "    ).apply(kfp.gcp.use_gcp_secret(secret_name='user-gcp-sa')).after(rop)\n",
    "    \n",
    "\n",
    "kfp.compiler.Compiler().compile(covid_dicom_pipeline,\"dicom-pipeline-2.zip\")\n",
    "client = kfp.Client()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "Experiment link <a href=\"/pipeline/#/experiments/details/a7292089-5186-4e53-b0bb-9264dfbb9775\" target=\"_blank\" >here</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Run link <a href=\"/pipeline/#/runs/details/0f3f3d01-f6c4-4216-8e03-396c49fa040f\" target=\"_blank\" >here</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "\n",
    "my_experiment = client.create_experiment(name='my-experiments')\n",
    "my_run = client.run_pipeline(my_experiment.id, 'my-run1', 'dicom-pipeline-2.zip')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}


================================================
FILE: ch9/ctscans/calculate-basis-vectors/Dockerfile
================================================
FROM gcr.io/spark-operator/spark:v2.4.5-gcs-prometheus

COPY target/covid-0.1-jar-with-dependencies.jar /

## Someday soon we'll live in a world where this hack is unnessecary
# https://github.com/GoogleCloudDataproc/hadoop-connectors/issues/323
CMD rm /opt/spark/jars/gcs-connector-latest-hadoop2.jar
ADD https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop2-2.0.1.jar $SPARK_HOME/jars

ENTRYPOINT ["/opt/entrypoint.sh"]


================================================
FILE: ch9/ctscans/calculate-basis-vectors/build-component.sh
================================================
#!/usr/bin/env bash

image_name=rawkintrevo/covid-basis-vectors # Specify the image name here
image_tag=0.2.0
full_image_name=${image_name}:${image_tag}

cd "$(dirname "$0")"
docker build -t "${full_image_name}" .
docker push "$full_image_name"


================================================
FILE: ch9/ctscans/calculate-basis-vectors/pom.xml
================================================
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>org.rawkintrevo</groupId>
  <artifactId>covid</artifactId>
  <version>0.1</version>
  <inceptionYear>2020</inceptionYear>
  <properties>
    <scala.version>2.11.12</scala.version>
  </properties>

  <repositories>
    <repository>
      <id>scala-tools.org</id>
      <name>Scala-Tools Maven2 Repository</name>
      <url>http://scala-tools.org/repo-releases</url>
    </repository>
  </repositories>

  <pluginRepositories>
    <pluginRepository>
      <id>scala-tools.org</id>
      <name>Scala-Tools Maven2 Repository</name>
      <url>http://scala-tools.org/repo-releases</url>
    </pluginRepository>
  </pluginRepositories>

  <dependencies>
    <dependency>
      <groupId>org.scala-lang</groupId>
      <artifactId>scala-library</artifactId>
      <version>${scala.version}</version>
    </dependency>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>4.4</version>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>org.specs</groupId>
      <artifactId>specs</artifactId>
      <version>1.2.5</version>
      <scope>test</scope>
    </dependency>

    <dependency>
      <groupId>org.apache.mahout</groupId>
      <artifactId>mahout-core_2.11</artifactId>
      <version>14.1-SNAPSHOT</version>
    </dependency>

    <dependency>
      <groupId>org.apache.mahout</groupId>
      <artifactId>mahout-hdfs_2.11</artifactId>
      <version>14.1-SNAPSHOT</version>
    </dependency>

    <dependency>
      <groupId>org.apache.mahout</groupId>
      <artifactId>mahout-spark_2.11</artifactId>
      <version>14.1-SNAPSHOT</version>
    </dependency>


  </dependencies>

  <build>
    <sourceDirectory>src/main/scala</sourceDirectory>
    <testSourceDirectory>src/test/scala</testSourceDirectory>
    <plugins>
      <plugin>
        <groupId>org.scala-tools</groupId>
        <artifactId>maven-scala-plugin</artifactId>
        <executions>
          <execution>
            <goals>
              <goal>compile</goal>
              <goal>testCompile</goal>
            </goals>
          </execution>
        </executions>
        <configuration>
          <scalaVersion>${scala.version}</scalaVersion>
          <args>
            <arg>-target:jvm-1.5</arg>
          </args>
        </configuration>
      </plugin>
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-eclipse-plugin</artifactId>
        <configuration>
          <downloadSources>true</downloadSources>
          <buildcommands>
            <buildcommand>ch.epfl.lamp.sdt.core.scalabuilder</buildcommand>
          </buildcommands>
          <additionalProjectnatures>
            <projectnature>ch.epfl.lamp.sdt.core.scalanature</projectnature>
          </additionalProjectnatures>
          <classpathContainers>
            <classpathContainer>org.eclipse.jdt.launching.JRE_CONTAINER</classpathContainer>
            <classpathContainer>ch.epfl.lamp.sdt.launching.SCALA_CONTAINER</classpathContainer>
          </classpathContainers>
        </configuration>
      </plugin>
      <!-- This builds the fat JAR -->
      <plugin>
        <artifactId>maven-assembly-plugin</artifactId>
        <configuration>
          <archive>
            <manifest>
              <mainClass>org.rawkintrevo.covid.App</mainClass>
            </manifest>
          </archive>
          <descriptorRefs>
            <descriptorRef>jar-with-dependencies</descriptorRef>
          </descriptorRefs>
        </configuration>
        <executions>
          <execution>
            <id>make-assembly</id>
            <phase>package</phase>
            <goals>
              <goal>single</goal>
            </goals>
          </execution>
        </executions>
      </plugin>
    </plugins>
  </build>
  <reporting>
    <plugins>
      <plugin>
        <groupId>org.scala-tools</groupId>
        <artifactId>maven-scala-plugin</artifactId>
        <configuration>
          <scalaVersion>${scala.version}</scalaVersion>
        </configuration>
      </plugin>
    </plugins>
  </reporting>
</project>


================================================
FILE: ch9/ctscans/calculate-basis-vectors/src/main/scala/org/rawkintrevo/covid/App.scala
================================================
package org.rawkintrevo.covid

import org.apache.mahout.math._
import org.apache.mahout.math.scalabindings._
import org.apache.mahout.math.drm._
import org.apache.mahout.math.scalabindings.RLikeOps._
import org.apache.mahout.math.drm.RLikeDrmOps._
import org.apache.mahout.sparkbindings._
import org.apache.mahout.math.decompositions._
import org.apache.mahout.math.scalabindings.MahoutCollections._

import org.apache.spark.SparkContext
import org.apache.spark.SparkConf

import org.apache.spark.SparkFiles

object App {
  def main(args: Array[String]) {

    val conf:SparkConf = new SparkConf()
      .setAppName("Calculate CT Scan Basis Vectors")
      .set("spark.kryo.referenceTracking", "false")
      .set("spark.kryo.registrator", "org.apache.mahout.sparkbindings.io.MahoutKryoRegistrator")
      .set("spark.kryoserializer.buffer", "32")
      .set("spark.kryoserializer.buffer.max" , "600m")
      .set("spark.serializer",	"org.apache.spark.serializer.KryoSerializer")

    //create spark context object
    val sc = new SparkContext(conf)
    implicit val sdc: org.apache.mahout.sparkbindings.SparkDistributedContext = sc2sdc(sc)


    val pathToMatrix = "gs://covid-dicoms/s.csv"  // todo make this an arg.

    val voxelRDD:DrmRdd[Int]  = sc.textFile(pathToMatrix)
      .map(s => dvec( s.split(",")
      .map(f => f.toDouble)))
      .zipWithIndex
      .map(o => (o._2.toInt, o._1))

    val voxelDRM = drmWrap(voxelRDD)

    // k, p, q should all be cli parameters
    // k is rank of the output e.g. the number of eigenfaces we want out.
    // p is oversampling parameter,
    // and q is the number of additional power iterations
    // Read https://mahout.apache.org/users/dim-reduction/ssvd.html
    val k = args(0).toInt
    val p = args(1).toInt
    val q = args(2).toInt

    val(drmU, drmV, s) = dssvd(voxelDRM.t, k, p, q)

    val V = drmV.checkpoint().rdd.saveAsTextFile("gs://covid-dicoms/drmV")
    val U = drmU.t.checkpoint().rdd.saveAsTextFile("gs://covid-dicoms/drmU")

    sc.parallelize(s.toArray,1).saveAsTextFile("gs://covid-dicoms/s")
    println("The job is done!")
  }
}

// $SPARK_HOME/bin/spark-submit --driver-memory 4G --executor-memory 4G --class org.rawkintrevo.book.App *jar

================================================
FILE: ch9/ctscans/download-dicom/Dockerfile
================================================
FROM gcr.io/google.com/cloudsdktool/cloud-sdk:latest
#
## install gsutil lightly
#RUN  apt update \
#  && apt install -y wget
#RUN wget https://storage.googleapis.com/pub/gsutil.tar.gz
#RUN tar xfz gsutil.tar.gz -C $HOME
#ENV PATH="${PATH}:$HOME/gsutil"

COPY ./run.sh /run.sh


================================================
FILE: ch9/ctscans/download-dicom/build-component.sh
================================================
#!/usr/bin/env bash

image_name=rawkintrevo/download-dicom # Specify the image name here
image_tag=0.0.0.4
full_image_name=${image_name}:${image_tag}

cd "$(dirname "$0")"
docker build -t "${full_image_name}" .
docker push "$full_image_name"


================================================
FILE: ch9/ctscans/download-dicom/run.sh
================================================
#!/usr/bin/env bash
set -e

# 1st arg- case number (leading zero required if < 10), defaults to case1

if [ -z "${1}" ]
then
      CASE="01"
else
      CASE="${1}"
fi



echo "Downloading DICOMs"
# If not on GCP need to download this
gsutil cp gs://covid-dicoms/covid-dicoms.tar.gz /tmp/covid-dicoms.tar.gz
tar -xzf /tmp/covid-dicoms.tar.gz -C /tmp

mv "/tmp/case0${CASE}/axial" /data/dicom





================================================
FILE: ch9/ctscans/process-dicoms-into-vectors/Dockerfile
================================================
FROM pydicom/dicom:v3.6.5

# From https://github.com/HealthplusAI/python3-gdcm
RUN apt update && apt install -y python-vtk6 libvtk6-dev cmake-curses-gui swig python3-dev libpython3.7-dev
## checkinstall missing...
RUN ln -s /opt/conda/bin/* /usr/local/bin
RUN
Download .txt
gitextract_v2ir0_h2/

├── .circleci/
│   └── config.yml
├── .gitignore
├── .travis.yaml
├── LICENSE
├── README.md
├── autopep_stuff.sh
├── ch03/
│   ├── example_secret.yaml
│   ├── linux_install.sh
│   ├── mac_install.sh
│   └── minio.sh
├── ch04/
│   ├── code/
│   │   ├── ControlStructures.ipynb
│   │   ├── ControlStructures.py
│   │   ├── Lightweight Pipeline.ipynb
│   │   ├── Lightweight Pipeline.py
│   │   ├── RecommenderPipeline.ipynb
│   │   ├── RecommenderPipeline.py
│   │   └── download_components.sh
│   └── install/
│       ├── deployment.yaml
│       └── virtualservice.yaml
├── ch06/
│   ├── MLflow.ipynb
│   ├── MLflow.py
│   ├── Metadata.ipynb
│   ├── Metadata.py
│   ├── docker/
│   │   ├── Dockerfile
│   │   ├── build.sh
│   │   └── run.sh
│   └── install/
│       └── mlflowchart/
│           ├── .helmignore
│           ├── Chart.yaml
│           ├── templates/
│           │   ├── NOTES.txt
│           │   ├── _helpers.tpl
│           │   └── mlflow.yaml
│           └── values.yaml
├── ch10/
│   ├── experiment.yaml
│   ├── hptuning.py
│   └── random.yaml
├── ch2/
│   ├── Dockerfile
│   ├── build-and-push.sh
│   └── query-endpoint.py
├── ch2_seldon_examples/
│   ├── pipeline_role.yaml
│   ├── pipeline_rolebinding.yaml
│   ├── pv-claim.yaml
│   ├── pv-volume.yaml
│   ├── request_example.ipynb
│   ├── run_example.sh
│   ├── setup_example.sh
│   ├── tf_mnist_no_seldon_pipeline.py
│   ├── tiller_rbac.yaml
│   └── train_pipeline.py
├── ch9/
│   └── ctscans/
│       ├── DICOM Denoising Pipeline.ipynb
│       ├── calculate-basis-vectors/
│       │   ├── Dockerfile
│       │   ├── build-component.sh
│       │   ├── pom.xml
│       │   └── src/
│       │       └── main/
│       │           └── scala/
│       │               └── org/
│       │                   └── rawkintrevo/
│       │                       └── covid/
│       │                           └── App.scala
│       ├── download-dicom/
│       │   ├── Dockerfile
│       │   ├── build-component.sh
│       │   └── run.sh
│       ├── process-dicoms-into-vectors/
│       │   ├── Dockerfile
│       │   ├── build-component.sh
│       │   ├── data/
│       │   │   └── s.150.csv
│       │   ├── process-dicoms-into-vectors.yaml
│       │   └── src/
│       │       └── program.py
│       └── visualize-basis-vectors/
│           ├── Dockerfile
│           ├── build-component.sh
│           └── src/
│               └── program.py
├── ci.sh
├── convert_notebooks.sh
├── data-extraction/
│   ├── README.md
│   ├── github_comments_query.bsql
│   ├── github_issues_query.bsql
│   ├── iot/
│   │   ├── basic.yaml
│   │   └── build.sh
│   ├── python-notebook/
│   │   ├── AddSpamassassinDockerfile
│   │   ├── MailingListDataPrep.ipynb
│   │   ├── MailingListDataPrep.py
│   │   └── RunNBDockerfile
│   ├── python-spark/
│   │   ├── Dockerfile
│   │   ├── LaunchSparkJobs.ipynb
│   │   ├── LaunchSparkJobs.py
│   │   ├── fake_job.py
│   │   └── requirements.txt
│   ├── python-spark-notebook/
│   │   ├── AddGCSDockerfile
│   │   ├── AddPython3.6Dockerfile
│   │   ├── Dockerfile
│   │   ├── SparkMailingListForKF.ipynb
│   │   ├── SparkMailingListForKF.py
│   │   ├── build.sh
│   │   ├── dr.yaml
│   │   ├── no-saprk-tls.yaml
│   │   ├── spark-driver-service.yaml
│   │   └── virt_service.yaml
│   ├── spark-hello-world/
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   ├── hello_world_pipeline.py
│   │   ├── lr_demo/
│   │   │   ├── .gitignore
│   │   │   ├── .travis.yml
│   │   │   ├── README.md
│   │   │   ├── build.sbt
│   │   │   ├── project/
│   │   │   │   ├── build.properties
│   │   │   │   └── plugins.sbt
│   │   │   ├── sample.csv
│   │   │   ├── sbt/
│   │   │   │   └── sbt
│   │   │   └── src/
│   │   │       ├── main/
│   │   │       │   └── scala/
│   │   │       │       └── com/
│   │   │       │           └── introtomlwithkubeflow/
│   │   │       │               └── spark/
│   │   │       │                   └── demo/
│   │   │       │                       └── lr/
│   │   │       │                           ├── TrainingApp.scala
│   │   │       │                           └── TrainingPipeline.scala
│   │   │       └── test/
│   │   │           └── scala/
│   │   │               └── com/
│   │   │                   └── introtomlwithkubeflow/
│   │   │                       └── spark/
│   │   │                           └── demo/
│   │   │                               └── lr/
│   │   │                                   └── TrainingPipelineTest.scala
│   │   ├── setup.sh
│   │   ├── spark-pi-min.yaml
│   │   └── spark-pi.yaml
│   ├── stack_overflow_questions.bsql
│   └── tfx/
│       ├── TFDV.ipynb
│       ├── TFDV.py
│       ├── install_tfx.sh
│       ├── requirements.txt
│       └── run_on_dataflow_ex.py
├── dev-setup/
│   ├── install-argo.sh
│   ├── install-kf-pipeline-sdk.sh
│   ├── install-kf.sh
│   ├── install-kubectl.sh
│   ├── install-kustomize.sh
│   ├── install-microk8s.sh
│   └── jsonnet.sh
├── feature-prep/
│   ├── README.md
│   ├── spark/
│   │   ├── SparkMailingListFeaturePrep.ipynb
│   │   └── SparkMailingListFeaturePrep.py
│   └── tft/
│       ├── requirements.txt
│       └── transform.py
├── gcp-setup/
│   ├── cloudshell_scrip.sh
│   └── setup-gcp.sh
├── kfctl_gcp_iap.v1.0.1.yaml
├── pipelines/
│   ├── ControlStructures.ipynb
│   ├── Lightweight Pipeline.ipynb
│   ├── RecommenderPipeline.ipynb
│   └── download_components.sh
├── recommender/
│   ├── Dockerfile
│   ├── Recommender_Kubeflow.ipynb
│   ├── Recommender_Kubeflow.py
│   ├── docker/
│   │   ├── Dockerfile
│   │   └── build.sh
│   └── tfservingchart/
│       ├── .helmignore
│       ├── Chart.yaml
│       ├── templates/
│       │   ├── NOTES.txt
│       │   ├── _helpers.tpl
│       │   ├── minioaccess.yaml
│       │   ├── tfserving.yaml
│       │   └── tfserving1.yaml
│       └── values.yaml
├── runthrough.sh
└── scikitLearn/
    └── python/
        └── IncomePrediction.ipynb
Download .txt
SYMBOL INDEX (60 symbols across 17 files)

FILE: ch04/code/ControlStructures.py
  function get_random_int_op (line 22) | def get_random_int_op(minimum: int, maximum: int) -> int:
  function process_small_op (line 31) | def process_small_op(data: int):
  function process_medium_op (line 38) | def process_medium_op(data: int):
  function process_large_op (line 45) | def process_large_op(data: int):
  function conditional_pipeline (line 58) | def conditional_pipeline():

FILE: ch04/code/Lightweight Pipeline.py
  function add (line 22) | def add(a: float, b: float) -> float:
  function my_divmod (line 40) | def my_divmod(
  function calc_pipeline (line 80) | def calc_pipeline(

FILE: ch04/code/RecommenderPipeline.py
  function recommender_pipeline (line 46) | def recommender_pipeline():

FILE: ch06/MLflow.py
  function evaluation_model (line 120) | def evaluation_model(y_test, y_pred):
  function train_knnmodel (line 142) | def train_knnmodel(parameters, inputs, tags, log=False):
  function train_mlpmodel (line 211) | def train_mlpmodel(parameters, inputs, tags, log=False):
  class PTG (line 286) | class PTG:
    method __init__ (line 287) | def __init__(self, thresholds_x0, thresholds_a, thresholds_b):
    method get_ptgmodel (line 292) | def get_ptgmodel(self, x, a, b, x0):
    method fit (line 296) | def fit(self, dfx, y):
    method predict (line 317) | def predict(self, dfx):
  function train_ptgmodel (line 328) | def train_ptgmodel(parameters, inputs, tags, log=False):

FILE: ch2/query-endpoint.py
  function download_mnist (line 25) | def download_mnist():
  function gen_image (line 29) | def gen_image(arr):

FILE: ch2_seldon_examples/tf_mnist_no_seldon_pipeline.py
  function mnist_pipeline (line 29) | def mnist_pipeline(gcs_bucket=None,

FILE: ch2_seldon_examples/train_pipeline.py
  function mnist_train_pipeline (line 11) | def mnist_train_pipeline(docker_org="index.docker.io/seldonio",

FILE: ch9/ctscans/process-dicoms-into-vectors/src/program.py
  function create_3d_matrix (line 25) | def create_3d_matrix(path):
  function upload_blob (line 53) | def upload_blob(bucket_name, source_file_name, destination_blob_name):

FILE: ch9/ctscans/visualize-basis-vectors/src/program.py
  function read_mahout_drm (line 19) | def read_mahout_drm(path):
  function plot_3d_matrix (line 35) | def plot_3d_matrix(img3d, img_shape, ax_aspect, sag_aspect, cor_aspect):
  function plot_2_3d_matrices (line 51) | def plot_2_3d_matrices(img1, img2, aspect, slice, cmap):
  function upload_blob (line 61) | def upload_blob(bucket_name, source_file_name, destination_blob_name):
  function download_folder (line 77) | def download_folder(bucket_name='your-bucket-name',

FILE: data-extraction/python-notebook/MailingListDataPrep.py
  function scrapeMailArchives (line 36) | def scrapeMailArchives(mailingList: str, year: int, month: int):
  function extract_links (line 69) | def extract_links(body):
  function extract_domains (line 75) | def extract_domains(links):
  function contains_python_stack_trace (line 91) | def contains_python_stack_trace(body):
  function contains_probably_java_stack_trace (line 95) | def contains_probably_java_stack_trace(body):
  function contains_exception_in_task (line 106) | def contains_exception_in_task(body):
  function makeDomainsAList (line 147) | def makeDomainsAList(d):
  function download_data (line 189) | def download_data(year: int) -> str:
  function download_tld_data (line 252) | def download_tld_data() -> str:
  function clean_data (line 278) | def clean_data(input_path: str) -> str:
  function prepare_features (line 313) | def prepare_features(input_path: str, tld_info_path: str):
  function my_pipeline_mini (line 430) | def my_pipeline_mini(year: int):
  function my_pipeline2 (line 466) | def my_pipeline2(year: int):
  function train_func (line 535) | def train_func(input_path: String):

FILE: data-extraction/python-spark-notebook/SparkMailingListForKF.py
  function download_emails (line 88) | def download_emails(date):
  function extract_date_from_email_datefield (line 165) | def extract_date_from_email_datefield(datefield):
  function is_ok (line 202) | def is_ok(post):

FILE: data-extraction/python-spark/LaunchSparkJobs.py
  function local_pipeline (line 61) | def local_pipeline():

FILE: data-extraction/spark-hello-world/hello_world_pipeline.py
  function spark_hello_world_pipeline (line 11) | def spark_hello_world_pipeline(jar_location="gcs://....", tf_job_image="...

FILE: data-extraction/tfx/TFDV.py
  function pipeline_with_dl (line 42) | def pipeline_with_dl():
  function tfdv_pipeline (line 69) | def tfdv_pipeline():
  function tfx_pipeline (line 145) | def tfx_pipeline():

FILE: feature-prep/spark/SparkMailingListFeaturePrep.py
  function extract_links (line 133) | def extract_links(body):
  function extract_domains (line 140) | def extract_domains(links):
  function contains_python_stack_trace (line 156) | def contains_python_stack_trace(body):
  function contains_probably_java_stack_trace (line 160) | def contains_probably_java_stack_trace(body):
  function contains_exception_in_task (line 171) | def contains_exception_in_task(body):

FILE: feature-prep/tft/transform.py
  function preprocessing_fn (line 10) | def preprocessing_fn(inputs):

FILE: recommender/Recommender_Kubeflow.py
  class DeepCollaborativeFiltering (line 233) | class DeepCollaborativeFiltering(Model):
    method __init__ (line 234) | def __init__(self, n_customers, n_products, n_factors, p_dropout=0.2):
    method rate (line 256) | def rate(self, customer_idxs, product_idxs):
Condensed preview — 147 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (563K chars).
[
  {
    "path": ".circleci/config.yml",
    "chars": 412,
    "preview": "version: 2\n\napt-run:  &apt-install\n  name: Install apt packages\n  command: |\n    sudo apt-get -qq update\n    sudo apt-ge"
  },
  {
    "path": ".gitignore",
    "chars": 1261,
    "preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packagi"
  },
  {
    "path": ".travis.yaml",
    "chars": 95,
    "preview": "language: generic\nsudo: true\naddons:\n  apt:\n    packages:\n     - shellcheck\nscript:\n  - ./ci.sh"
  },
  {
    "path": "LICENSE",
    "chars": 11357,
    "preview": "                                 Apache License\n                           Version 2.0, January 2004\n                   "
  },
  {
    "path": "README.md",
    "chars": 85,
    "preview": "# intro-to-ml-with-kubeflow-examples\nExamples for the Intro to ML with Kubeflow book\n"
  },
  {
    "path": "autopep_stuff.sh",
    "chars": 448,
    "preview": "#!/bin/bash\n# autopep8 a bunch of things that we can\nautopep8 -i -r ./ \\\n\t --select E101,E202,E201,E203,E211,E221,E222,E"
  },
  {
    "path": "ch03/example_secret.yaml",
    "chars": 168,
    "preview": "apiVersion: v1\nkind: Secret\nmetadata:\n  name: minioaccess\n  namespace: mynamespace\ndata:\n  AWS_ACCESS_KEY_ID: xxxxxxxxxx"
  },
  {
    "path": "ch03/linux_install.sh",
    "chars": 139,
    "preview": "#!/bin/bash\n#tag::installMCLinux[]\npushd ~/bin\nwget https://dl.min.io/client/mc/release/linux-amd64/mc\nchmod a+x mc\n#end"
  },
  {
    "path": "ch03/mac_install.sh",
    "chars": 86,
    "preview": "#!/bin/bash\n#tag::installMCMac[]\nbrew install minio/stable/minio\n#end::installMCMac[]\n"
  },
  {
    "path": "ch03/minio.sh",
    "chars": 600,
    "preview": "#!/bin/bash\nset -ex\n\n# Minio runs on port 9000 (both UI and service) so expose locally to use cli or UI\n#tag::fwdMinio[]"
  },
  {
    "path": "ch04/code/ControlStructures.ipynb",
    "chars": 11142,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Simple Control structure\\n\",\n    "
  },
  {
    "path": "ch04/code/ControlStructures.py",
    "chars": 1597,
    "preview": "#!/usr/bin/env python\n# coding: utf-8\n\n# # Simple Control structure\n#\n# Shows how to use conditional execution\n\n# In[1]:"
  },
  {
    "path": "ch04/code/Lightweight Pipeline.ipynb",
    "chars": 13249,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Setup\"\n   ]\n  },\n  {\n   \"cell_typ"
  },
  {
    "path": "ch04/code/Lightweight Pipeline.py",
    "chars": 2703,
    "preview": "#!/usr/bin/env python\n# coding: utf-8\n\n# # Setup\n\n# In[1]:\n\nget_ipython().system('pip install kfp --upgrade --user')\n\nim"
  },
  {
    "path": "ch04/code/RecommenderPipeline.ipynb",
    "chars": 16063,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Kubeflow pipeline\\n\",\n    \"This i"
  },
  {
    "path": "ch04/code/RecommenderPipeline.py",
    "chars": 3772,
    "preview": "#!/usr/bin/env python\n# coding: utf-8\n\n# # Kubeflow pipeline\n# This is a fairly simple pipeline, containing sequential s"
  },
  {
    "path": "ch04/code/download_components.sh",
    "chars": 150,
    "preview": "#!/bin/bash\n#tag::dlPipelineRelease[]\nwget https://github.com/kubeflow/pipelines/archive/0.2.5.tar.gz\ntar -xvf 0.2.5.tar"
  },
  {
    "path": "ch04/install/deployment.yaml",
    "chars": 2518,
    "preview": "apiVersion: extensions/v1beta1\nkind: Deployment\nmetadata:\n  labels:\n    app: argo-ui\n    app.kubernetes.io/component: ar"
  },
  {
    "path": "ch04/install/virtualservice.yaml",
    "chars": 394,
    "preview": "apiVersion: networking.istio.io/v1alpha3\nkind: VirtualService\nmetadata:\n  name: argo-ui\n  namespace: kubeflow\nspec:\n  ga"
  },
  {
    "path": "ch06/MLflow.ipynb",
    "chars": 41579,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# mlflow-energyforecast\\n\",\n    \"\\n"
  },
  {
    "path": "ch06/MLflow.py",
    "chars": 12024,
    "preview": "#!/usr/bin/env python\n# coding: utf-8\n\n# # mlflow-energyforecast\n#\n# This is a showcase for ML Flow capabilities, based "
  },
  {
    "path": "ch06/Metadata.ipynb",
    "chars": 11547,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Installation and imports\"\n   ]\n  "
  },
  {
    "path": "ch06/Metadata.py",
    "chars": 3019,
    "preview": "#!/usr/bin/env python\n# coding: utf-8\n\n# # Installation and imports\n\n# In[1]:\n\nget_ipython().system('pip install kfmd --"
  },
  {
    "path": "ch06/docker/Dockerfile",
    "chars": 461,
    "preview": "# from https://github.com/flmu/mlflow-tracking-server\n\nFROM python:3.7\n\nRUN pip3 install --upgrade pip && \\\n    pip3 ins"
  },
  {
    "path": "ch06/docker/build.sh",
    "chars": 75,
    "preview": "#!/bin/bash\n\nimg='lightbend/mlflow'\ntag='0.1'\ndocker build -t $img:$tag .\n\n"
  },
  {
    "path": "ch06/docker/run.sh",
    "chars": 489,
    "preview": "#!/bin/sh\n\nset -e\n\nif [ -z \"${AWS_BUCKET}\" ]; then\n  echo >&2 \"AWS_BUCKET must be set\"\n  exit 1\nfi\n\nif [ -z \"${AWS_ACCES"
  },
  {
    "path": "ch06/install/mlflowchart/.helmignore",
    "chars": 333,
    "preview": "# Patterns to ignore when building packages.\n# This supports shell glob matching, relative path matching, and\n# negation"
  },
  {
    "path": "ch06/install/mlflowchart/Chart.yaml",
    "chars": 129,
    "preview": "apiVersion: v1\nappVersion: 0.1\ndescription: MLFlow\nmaintainers:\n- name: Boris Lublinsky\nname: MLFLOW tracking server\nver"
  },
  {
    "path": "ch06/install/mlflowchart/templates/NOTES.txt",
    "chars": 37,
    "preview": "ML Flow tracking server is installed\n"
  },
  {
    "path": "ch06/install/mlflowchart/templates/_helpers.tpl",
    "chars": 550,
    "preview": "{{/* vim: set filetype=mustache: */}}\n{{/*\nExpand the name of the chart.\n*/}}\n{{- define \"modelserverchart.name\" -}}\n{{-"
  },
  {
    "path": "ch06/install/mlflowchart/templates/mlflow.yaml",
    "chars": 1779,
    "preview": "apiVersion: apps/v1\nkind: Deployment\nmetadata:\n  namespace: kubeflow\n  name: mlflowserver\n  labels:\n    app: mlflowserve"
  },
  {
    "path": "ch06/install/mlflowchart/values.yaml",
    "chars": 120,
    "preview": "# application name is a namespace\n# docker images\nimage:\n  server: lightbend/mlflow\n  pullPolicy: Always\n  version: 0.1\n"
  },
  {
    "path": "ch10/experiment.yaml",
    "chars": 2871,
    "preview": "Name:         random-example\nNamespace:    kubeflow\nLabels:       controller-tools.k8s.io=1.0\nAnnotations:  <none>\nAPI V"
  },
  {
    "path": "ch10/hptuning.py",
    "chars": 303,
    "preview": "# Initialize search space\n# Initialize model\nwhile not objective_reached and not bugdget_exhausted:\n    # Obtain new hyp"
  },
  {
    "path": "ch10/random.yaml",
    "chars": 1524,
    "preview": "apiVersion: \"kubeflow.org/v1alpha3\"\nkind: Experiment\nmetadata:\n  namespace: kubeflow\n  labels:\n    controller-tools.k8s."
  },
  {
    "path": "ch2/Dockerfile",
    "chars": 70,
    "preview": "FROM gcr.io/kubeflow-images-public/tensorflow-2.1.0-notebook-cpu:1.0.0"
  },
  {
    "path": "ch2/build-and-push.sh",
    "chars": 168,
    "preview": "#!/bin/bash\n#tag::buildandpush[]\nIMAGE=\"${CONTAINER_REGISTRY}/kubeflow/test:v1\"\ndocker build  -t \"${IMAGE}\" -f Dockerfil"
  },
  {
    "path": "ch2/query-endpoint.py",
    "chars": 1784,
    "preview": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE f"
  },
  {
    "path": "ch2_seldon_examples/pipeline_role.yaml",
    "chars": 208,
    "preview": "apiVersion: rbac.authorization.k8s.io/v1\nkind: Role\nmetadata:\n  namespace: kubeflow\n  name: pipeline-runner\nrules:\n- api"
  },
  {
    "path": "ch2_seldon_examples/pipeline_rolebinding.yaml",
    "chars": 278,
    "preview": "apiVersion: rbac.authorization.k8s.io/v1\nkind: RoleBinding\nmetadata:\n  name: pipeline-runner\n  namespace: kubeflow\nsubje"
  },
  {
    "path": "ch2_seldon_examples/pv-claim.yaml",
    "chars": 194,
    "preview": "kind: PersistentVolumeClaim\r\napiVersion: v1\r\nmetadata:\r\n  name: \"nfs-1\"\r\nspec:\r\n  storageClassName: manual\r\n  accessMode"
  },
  {
    "path": "ch2_seldon_examples/pv-volume.yaml",
    "chars": 241,
    "preview": "kind: PersistentVolume\r\napiVersion: v1\r\nmetadata:\r\n  name: task-pv-volume\r\n  labels:\r\n    type: local\r\nspec:\r\n  storageC"
  },
  {
    "path": "ch2_seldon_examples/request_example.ipynb",
    "chars": 13000,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 2,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\":"
  },
  {
    "path": "ch2_seldon_examples/run_example.sh",
    "chars": 911,
    "preview": "#!/bin/bash\n#tag::buildPipeline[]\ndsl-compile --py train_pipeline.py --output job.yaml\n#end::buildPipeline[]\n#tag::conne"
  },
  {
    "path": "ch2_seldon_examples/setup_example.sh",
    "chars": 1862,
    "preview": "#!/bin/bash\n\nset -ex\n\necho \"Setting up example\"\n\nunset ch2_example_path\nch2_example_path=\"$( cd \"$( dirname \"${BASH_SOUR"
  },
  {
    "path": "ch2_seldon_examples/tf_mnist_no_seldon_pipeline.py",
    "chars": 3113,
    "preview": "# Copyright 2019 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "ch2_seldon_examples/tiller_rbac.yaml",
    "chars": 348,
    "preview": "apiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: tiller\n  namespace: kube-system\n---\napiVersion: rbac.authorization"
  },
  {
    "path": "ch2_seldon_examples/train_pipeline.py",
    "chars": 3117,
    "preview": "import kfp.dsl as dsl\nimport kfp.gcp as gcp\nimport kfp.onprem as onprem\n\nfrom string import Template\nimport json\n\n\n@dsl."
  },
  {
    "path": "ch9/ctscans/DICOM Denoising Pipeline.ipynb",
    "chars": 15394,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 1,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\":"
  },
  {
    "path": "ch9/ctscans/calculate-basis-vectors/Dockerfile",
    "chars": 436,
    "preview": "FROM gcr.io/spark-operator/spark:v2.4.5-gcs-prometheus\n\nCOPY target/covid-0.1-jar-with-dependencies.jar /\n\n## Someday so"
  },
  {
    "path": "ch9/ctscans/calculate-basis-vectors/build-component.sh",
    "chars": 245,
    "preview": "#!/usr/bin/env bash\n\nimage_name=rawkintrevo/covid-basis-vectors # Specify the image name here\nimage_tag=0.2.0\nfull_image"
  },
  {
    "path": "ch9/ctscans/calculate-basis-vectors/pom.xml",
    "chars": 4325,
    "preview": "<project xmlns=\"http://maven.apache.org/POM/4.0.0\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocat"
  },
  {
    "path": "ch9/ctscans/calculate-basis-vectors/src/main/scala/org/rawkintrevo/covid/App.scala",
    "chars": 2222,
    "preview": "package org.rawkintrevo.covid\n\nimport org.apache.mahout.math._\nimport org.apache.mahout.math.scalabindings._\nimport org."
  },
  {
    "path": "ch9/ctscans/download-dicom/Dockerfile",
    "chars": 277,
    "preview": "FROM gcr.io/google.com/cloudsdktool/cloud-sdk:latest\n#\n## install gsutil lightly\n#RUN  apt update \\\n#  && apt install -y"
  },
  {
    "path": "ch9/ctscans/download-dicom/build-component.sh",
    "chars": 242,
    "preview": "#!/usr/bin/env bash\n\nimage_name=rawkintrevo/download-dicom # Specify the image name here\nimage_tag=0.0.0.4\nfull_image_na"
  },
  {
    "path": "ch9/ctscans/download-dicom/run.sh",
    "chars": 394,
    "preview": "#!/usr/bin/env bash\nset -e\n\n# 1st arg- case number (leading zero required if < 10), defaults to case1\n\nif [ -z \"${1}\" ]\n"
  },
  {
    "path": "ch9/ctscans/process-dicoms-into-vectors/Dockerfile",
    "chars": 1431,
    "preview": "FROM pydicom/dicom:v3.6.5\n\n# From https://github.com/HealthplusAI/python3-gdcm\nRUN apt update && apt install -y python-v"
  },
  {
    "path": "ch9/ctscans/process-dicoms-into-vectors/build-component.sh",
    "chars": 242,
    "preview": "#!/usr/bin/env bash\n\nimage_name=rawkintrevo/covid-prep-dicom # Specify the image name here\nimage_tag=0.9.5\nfull_image_na"
  },
  {
    "path": "ch9/ctscans/process-dicoms-into-vectors/data/s.150.csv",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "ch9/ctscans/process-dicoms-into-vectors/process-dicoms-into-vectors.yaml",
    "chars": 413,
    "preview": "name: Process DICOMs into Vectors\ndescription: Take a number of COVID DICOMs - output a list of vectors for DS-SVD.\ninpu"
  },
  {
    "path": "ch9/ctscans/process-dicoms-into-vectors/src/program.py",
    "chars": 2319,
    "preview": "from os import listdir\nimport numpy as np\nimport pydicom\n\nimport argparse\nfrom google.cloud import storage\n\nparser = arg"
  },
  {
    "path": "ch9/ctscans/visualize-basis-vectors/Dockerfile",
    "chars": 171,
    "preview": "FROM python:3-buster\n\nRUN pip install numpy\nRUN pip install matplotlib\nRUN pip install google-cloud-storage\nCOPY src/pro"
  },
  {
    "path": "ch9/ctscans/visualize-basis-vectors/build-component.sh",
    "chars": 249,
    "preview": "#!/usr/bin/env bash\n\nimage_name=rawkintrevo/visualize-dicom-output # Specify the image name here\nimage_tag=0.0.11\nfull_i"
  },
  {
    "path": "ch9/ctscans/visualize-basis-vectors/src/program.py",
    "chars": 4333,
    "preview": "from ast import literal_eval\n\nfrom os import listdir\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nimport argpars"
  },
  {
    "path": "ci.sh",
    "chars": 613,
    "preview": "#!/bin/bash\n\nset -ex\n\n# Check all the shell scripts\nfind ./ -iregex '^.+\\.sh$' -type f -print0 | \\\n  xargs -0 shellcheck"
  },
  {
    "path": "convert_notebooks.sh",
    "chars": 95,
    "preview": "#!/bin/bash\nfind . -name \"*ipynb\" |grep -v venv | xargs -d '\\n' ipython3 nbconvert --to script\n"
  },
  {
    "path": "data-extraction/README.md",
    "chars": 503,
    "preview": "## Data Extraction\n\nTo successfully construct a machine learning pipeline we need to collect the data we are going to tr"
  },
  {
    "path": "data-extraction/github_comments_query.bsql",
    "chars": 988,
    "preview": "SELECT pull_request_url,\n ANY_VALUE(pull_patch_url) as pull_patch_url,\n ARRAY_AGG(comment_position) as comments_position"
  },
  {
    "path": "data-extraction/github_issues_query.bsql",
    "chars": 236,
    "preview": "SELECT repo.name, JSON_EXTRACT(payload, '$.issue.url') \nAS url FROM (\n  SELECT *, JSON_EXTRACT(payload, '$.action') AS a"
  },
  {
    "path": "data-extraction/iot/basic.yaml",
    "chars": 564,
    "preview": "apiVersion: batch/v1\nkind: Job\nmetadata:\n  name: iot-data-extraction\n  namespace: kubeflow\nspec:\n  template:\n    spec:\n "
  },
  {
    "path": "data-extraction/iot/build.sh",
    "chars": 331,
    "preview": "#!/bin/bash\n\nCONTAINER_REGISTRY=\"gcr.io/${PROJECT_NAME}\"\n#tag::buildandpush[]\nTARGET=\"${CONTAINER_REGISTRY}/kf-steps/iot"
  },
  {
    "path": "data-extraction/python-notebook/AddSpamassassinDockerfile",
    "chars": 283,
    "preview": "ARG base\nFROM $base\n# Run as root for updates\nUSER root\n# Install Spamassassin\nRUN apt-get update && \\\n    apt-get insta"
  },
  {
    "path": "data-extraction/python-notebook/MailingListDataPrep.ipynb",
    "chars": 27981,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"Here we can install some packages o"
  },
  {
    "path": "data-extraction/python-notebook/MailingListDataPrep.py",
    "chars": 20347,
    "preview": "#!/usr/bin/env python\n# coding: utf-8\n\n# Here we can install some packages our notebook needs. We can also install them "
  },
  {
    "path": "data-extraction/python-notebook/RunNBDockerfile",
    "chars": 582,
    "preview": "# Since we used Jupyter notebooks to do the first pass extraction, we can try directly use that notebook with\n# Kubeflow"
  },
  {
    "path": "data-extraction/python-spark/Dockerfile",
    "chars": 289,
    "preview": "# Use the spark operator image as base\nFROM gcr.io/spark-operator/spark-py:v2.4.5\n# Install Python requirements\nCOPY req"
  },
  {
    "path": "data-extraction/python-spark/LaunchSparkJobs.ipynb",
    "chars": 3503,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": "
  },
  {
    "path": "data-extraction/python-spark/LaunchSparkJobs.py",
    "chars": 1859,
    "preview": "#!/usr/bin/env python\n# coding: utf-8\n\n# In[ ]:\n\nget_ipython().system('pip3 install --upgrade --user kfp')\n\n# In[ ]:\n\nim"
  },
  {
    "path": "data-extraction/python-spark/fake_job.py",
    "chars": 333,
    "preview": "# Yes we need both these imports\n#tag::imports[]\nfrom pyspark.sql import SparkSession\nfrom pyspark.sql.functions import "
  },
  {
    "path": "data-extraction/python-spark/requirements.txt",
    "chars": 7,
    "preview": "pandas\n"
  },
  {
    "path": "data-extraction/python-spark-notebook/AddGCSDockerfile",
    "chars": 822,
    "preview": "ARG base\nFROM $base\n\n# Set an enviroment variable for where we are going to put spark\nENV SPARK_HOME /opt/spark\n\n# Run a"
  },
  {
    "path": "data-extraction/python-spark-notebook/AddPython3.6Dockerfile",
    "chars": 776,
    "preview": "ARG base\nFROM $base\n\nUSER root\n\n# Install libraries we need to build Python 3.6\nRUN apt-get update && \\\n    DEBIAN_FRONT"
  },
  {
    "path": "data-extraction/python-spark-notebook/Dockerfile",
    "chars": 1808,
    "preview": "#tag::include[]\n# See https://www.kubeflow.org/docs/notebooks/custom-notebook/\nARG base\nFROM $base\nARG sparkversion\nARG "
  },
  {
    "path": "data-extraction/python-spark-notebook/SparkMailingListForKF.ipynb",
    "chars": 11599,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": "
  },
  {
    "path": "data-extraction/python-spark-notebook/SparkMailingListForKF.py",
    "chars": 6921,
    "preview": "#!/usr/bin/env python\n# coding: utf-8\n\n# In[ ]:\n\n# Yes we need both these imports\nfrom pyspark.sql import SparkSession\nf"
  },
  {
    "path": "data-extraction/python-spark-notebook/build.sh",
    "chars": 2276,
    "preview": "#!/bin/bash\n# Build a notebook with Spark 3\n# Note when Spark 3 is fully released we can use gcr.io/spark-operator/spark"
  },
  {
    "path": "data-extraction/python-spark-notebook/dr.yaml",
    "chars": 206,
    "preview": "apiVersion: networking.istio.io/v1alpha3\nkind: DestinationRule\nmetadata:\n  name: default\n  namespace: kubeflow-programme"
  },
  {
    "path": "data-extraction/python-spark-notebook/no-saprk-tls.yaml",
    "chars": 143,
    "preview": " apiVersion: \"authentication.istio.io/v1alpha1\"\n kind: \"Policy\"\n metadata:\n   name: spark-no-tls\n spec:\n   targets:\n   -"
  },
  {
    "path": "data-extraction/python-spark-notebook/spark-driver-service.yaml",
    "chars": 298,
    "preview": "apiVersion: v1\nkind: Service\nmetadata:\n  name: spark-driver\n  namespace: kubeflow-programmerboo\nspec:\n  selector:\n    no"
  },
  {
    "path": "data-extraction/python-spark-notebook/virt_service.yaml",
    "chars": 957,
    "preview": "apiVersion: networking.istio.io/v1alpha3\nkind: VirtualService\nmetadata:\n  creationTimestamp: \"2019-10-14T20:09:50Z\"\n  ge"
  },
  {
    "path": "data-extraction/spark-hello-world/Dockerfile",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "data-extraction/spark-hello-world/README.md",
    "chars": 224,
    "preview": "This directory will walk you through running a Spark Hello world example with kubeflow.\nIt (currently) uses the master b"
  },
  {
    "path": "data-extraction/spark-hello-world/hello_world_pipeline.py",
    "chars": 1008,
    "preview": "import kfp.dsl as dsl\nimport kfp.gcp as gcp\nimport kfp.onprem as onprem\n\nfrom string import Template\nimport json\n\n\n@dsl."
  },
  {
    "path": "data-extraction/spark-hello-world/lr_demo/.gitignore",
    "chars": 1489,
    "preview": "*.class\n*.log\nbuild.sbt_back\n\n# sbt specific\ndist/*\ntarget/\nlib_managed/\nsrc_managed/\nproject/boot/\nproject/plugins/proj"
  },
  {
    "path": "data-extraction/spark-hello-world/lr_demo/.travis.yml",
    "chars": 301,
    "preview": "language: scala\n\n# These directories are cached to S3 at the end of the build\ncache:\n  directories:\n    - $HOME/.ivy2/ca"
  },
  {
    "path": "data-extraction/spark-hello-world/lr_demo/README.md",
    "chars": 38,
    "preview": "A simple, bad, LR example with Spark.\n"
  },
  {
    "path": "data-extraction/spark-hello-world/lr_demo/build.sbt",
    "chars": 4541,
    "preview": "val sparkVersion = \"2.3.1\"\n\nlazy val root = (project in file(\".\")).\n\n  settings(\n    inThisBuild(List(\n      organizatio"
  },
  {
    "path": "data-extraction/spark-hello-world/lr_demo/project/build.properties",
    "chars": 18,
    "preview": "sbt.version=1.2.8\n"
  },
  {
    "path": "data-extraction/spark-hello-world/lr_demo/project/plugins.sbt",
    "chars": 369,
    "preview": "addSbtPlugin(\"org.scalastyle\" %% \"scalastyle-sbt-plugin\" % \"1.0.0\")\n\nresolvers += \"sonatype-releases\" at \"https://oss.so"
  },
  {
    "path": "data-extraction/spark-hello-world/lr_demo/sample.csv",
    "chars": 40,
    "preview": "e1,e2,label\n1.0, 0.0, 1.0\n2.0, 2.1, 2.0\n"
  },
  {
    "path": "data-extraction/spark-hello-world/lr_demo/sbt/sbt",
    "chars": 2150,
    "preview": "#!/bin/bash\n\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  S"
  },
  {
    "path": "data-extraction/spark-hello-world/lr_demo/src/main/scala/com/introtomlwithkubeflow/spark/demo/lr/TrainingApp.scala",
    "chars": 608,
    "preview": "package com.introtomlwithkubeflow.spark.demo.lr\n\nimport org.apache.spark.{SparkConf, SparkContext}\n\n\n/**\n  * Use this wh"
  },
  {
    "path": "data-extraction/spark-hello-world/lr_demo/src/main/scala/com/introtomlwithkubeflow/spark/demo/lr/TrainingPipeline.scala",
    "chars": 1901,
    "preview": "package com.introtomlwithkubeflow.spark.demo.lr\n\nimport java.nio.file.{Files, Paths}\n\n\nimport ml.combust.bundle.BundleFi"
  },
  {
    "path": "data-extraction/spark-hello-world/lr_demo/src/test/scala/com/introtomlwithkubeflow/spark/demo/lr/TrainingPipelineTest.scala",
    "chars": 1027,
    "preview": "package com.introtomlwithkubeflow.spark.demo.lr\n\n/**\n * A simple test for the training pipeline\n */\n\nimport com.holdenka"
  },
  {
    "path": "data-extraction/spark-hello-world/setup.sh",
    "chars": 1641,
    "preview": "#!/bin/bash\nset -ex\n\nSPARK_DEMO_DIR=${SPARK_DEMO_DIR:=~/spark_demo_3}\nSPARK_DEMO_GCS=${SPARK_DEMO_GCS:=gs://boo-spark-kf"
  },
  {
    "path": "data-extraction/spark-hello-world/spark-pi-min.yaml",
    "chars": 849,
    "preview": "apiVersion: \"sparkoperator.k8s.io/v1beta2\"\nkind: SparkApplication\nmetadata:\n  name: spark-pi\n  namespace: kubeflow\nspec:"
  },
  {
    "path": "data-extraction/spark-hello-world/spark-pi.yaml",
    "chars": 895,
    "preview": "apiVersion: \"sparkoperator.k8s.io/v1beta2\"\nkind: SparkApplication\nmetadata:\n  name: spark-pi\n  namespace: kubeflow\nspec:"
  },
  {
    "path": "data-extraction/stack_overflow_questions.bsql",
    "chars": 7,
    "preview": "SELECT "
  },
  {
    "path": "data-extraction/tfx/TFDV.ipynb",
    "chars": 8587,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"We start by downloading a specific "
  },
  {
    "path": "data-extraction/tfx/TFDV.py",
    "chars": 5225,
    "preview": "#!/usr/bin/env python\n# coding: utf-8\n\n# We start by downloading a specific release of the components because running fr"
  },
  {
    "path": "data-extraction/tfx/install_tfx.sh",
    "chars": 88,
    "preview": "#!/bin/bash\n#tag::install[]\npip3 install tfx tensorflow-data-validation\n#end::install[]\n"
  },
  {
    "path": "data-extraction/tfx/requirements.txt",
    "chars": 4,
    "preview": "tfx\n"
  },
  {
    "path": "data-extraction/tfx/run_on_dataflow_ex.py",
    "chars": 501,
    "preview": "#tag::example[]\ngenerated_output_uri = root_output_uri + kfp.dsl.EXECUTION_ID_PLACEHOLDER\nbeam_pipeline_args = [\n    '--"
  },
  {
    "path": "dev-setup/install-argo.sh",
    "chars": 227,
    "preview": "#!/bin/bash\n# Download the binary\ncurl -sLO https://github.com/argoproj/argo/releases/download/v2.8.1/argo-linux-amd64\n\n"
  },
  {
    "path": "dev-setup/install-kf-pipeline-sdk.sh",
    "chars": 471,
    "preview": "#!/bin/bash\n# Put as inside a venv\npushd /tmp\n#tag::venv[]\nvirtualenv kfvenv --python python3\nsource kfvenv/bin/activate"
  },
  {
    "path": "dev-setup/install-kf.sh",
    "chars": 724,
    "preview": "#!/bin/bash\nset -ex\n#tag::install[]\nPLATFORM=$(uname) # Either Linux or Darwin\nexport PLATFORM\nmkdir -p ~/bin\n#Configura"
  },
  {
    "path": "dev-setup/install-kubectl.sh",
    "chars": 1250,
    "preview": "#!/bin/bash\n#tag::ubuntu-kubectl[]\nsudo snap install kubectl --classic\n#end::ubuntu-kubectl[]\n#tag::debian-kubectl[]\nsud"
  },
  {
    "path": "dev-setup/install-kustomize.sh",
    "chars": 576,
    "preview": "#!/bin/bash\n#tag::kustomize[]\nPLATFORM=$(uname) # Either Linux or Darwin\nexport PLATFORM\nmkdir -p ~/bin\nKUSTOMIZE_URL=$("
  },
  {
    "path": "dev-setup/install-microk8s.sh",
    "chars": 934,
    "preview": "#!/bin/bash\n#tag::installmicrok8s[]\nsudo snap install microk8s --classic\n#end::installmicrok8s[]\n#tag::setupmicrok8s[]\n#"
  },
  {
    "path": "dev-setup/jsonnet.sh",
    "chars": 397,
    "preview": "#!/bin/bash\nset -e\nset -x\n#tag::snap[]\nsudo snap install jsonnet\n#end::snap[]\n#tag::manual[]\nexport JSONNET_VERSION=0.12"
  },
  {
    "path": "feature-prep/README.md",
    "chars": 381,
    "preview": "Feature preparation is the task of converting the data into features\nsuitable for our machine algorithms. What makes a \""
  },
  {
    "path": "feature-prep/spark/SparkMailingListFeaturePrep.ipynb",
    "chars": 14207,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": "
  },
  {
    "path": "feature-prep/spark/SparkMailingListFeaturePrep.py",
    "chars": 9754,
    "preview": "#!/usr/bin/env python\n# coding: utf-8\n\n# In[ ]:\n\n# Yes we need both these imports\nfrom pyspark.sql import SparkSession\nf"
  },
  {
    "path": "feature-prep/tft/requirements.txt",
    "chars": 27,
    "preview": "tfx\ntensorflow\napache-beam\n"
  },
  {
    "path": "feature-prep/tft/transform.py",
    "chars": 481,
    "preview": "#tag::imports[]\nimport tensorflow as tf\nimport tensorflow_transform as tft\nfrom tensorflow_transform.tf_metadata import "
  },
  {
    "path": "gcp-setup/cloudshell_scrip.sh",
    "chars": 342,
    "preview": "#!/bin/bash\n# Note: this only works inside of cloudshell!\n#tag::cloudshell_script[]\nG_SOURCES=\"https://source.developers"
  },
  {
    "path": "gcp-setup/setup-gcp.sh",
    "chars": 1176,
    "preview": "#!/bin/bash\n#tag::ubuntu[]\napt-get install google-cloud-sdk\n#end::ubuntu[]\napt-get remove google-cloud-sdk\n#tag::general"
  },
  {
    "path": "kfctl_gcp_iap.v1.0.1.yaml",
    "chars": 9886,
    "preview": "apiVersion: kfdef.apps.kubeflow.org/v1\nkind: KfDef\nmetadata:\n  namespace: kubeflow\nspec:\n  applications:\n  - kustomizeCo"
  },
  {
    "path": "pipelines/ControlStructures.ipynb",
    "chars": 11142,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Simple Control structure\\n\",\n    "
  },
  {
    "path": "pipelines/Lightweight Pipeline.ipynb",
    "chars": 13249,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Setup\"\n   ]\n  },\n  {\n   \"cell_typ"
  },
  {
    "path": "pipelines/RecommenderPipeline.ipynb",
    "chars": 16063,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Kubeflow pipeline\\n\",\n    \"This i"
  },
  {
    "path": "pipelines/download_components.sh",
    "chars": 150,
    "preview": "#!/bin/bash\n#tag::dlPipelineRelease[]\nwget https://github.com/kubeflow/pipelines/archive/0.2.5.tar.gz\ntar -xvf 0.2.5.tar"
  },
  {
    "path": "recommender/Dockerfile",
    "chars": 311,
    "preview": "FROM  tensorflow/tensorflow:1.12.0-devel-py3\nRUN pip3 install --upgrade pip\nRUN pip3 install pandas --upgrade\nRUN pip3 i"
  },
  {
    "path": "recommender/Recommender_Kubeflow.ipynb",
    "chars": 62264,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# This is implementation of the Rec"
  },
  {
    "path": "recommender/Recommender_Kubeflow.py",
    "chars": 12874,
    "preview": "#!/usr/bin/env python\n# coding: utf-8\n\n# # This is implementation of the Recommender training\n#\n# This implementation ta"
  },
  {
    "path": "recommender/docker/Dockerfile",
    "chars": 375,
    "preview": "FROM  tensorflow/tensorflow:1.15.0-py3\nRUN pip3 install --upgrade pip\nRUN pip3 install pandas --upgrade\nRUN pip3 install"
  },
  {
    "path": "recommender/docker/build.sh",
    "chars": 86,
    "preview": "#!/bin/bash\n\nimg='lightbend/ml-tf-recommender'\ntag='0.1'\ndocker build -t $img:$tag .\n\n"
  },
  {
    "path": "recommender/tfservingchart/.helmignore",
    "chars": 333,
    "preview": "# Patterns to ignore when building packages.\n# This supports shell glob matching, relative path matching, and\n# negation"
  },
  {
    "path": "recommender/tfservingchart/Chart.yaml",
    "chars": 151,
    "preview": "apiVersion: v1\nappVersion: 1.14.0\ndescription: TF Serving\nmaintainers:\n- name: Boris Lublinsky\nname: TF Serving Recommen"
  },
  {
    "path": "recommender/tfservingchart/templates/NOTES.txt",
    "chars": 59,
    "preview": "Kubeflow Model serving components : tfserving is installed\n"
  },
  {
    "path": "recommender/tfservingchart/templates/_helpers.tpl",
    "chars": 550,
    "preview": "{{/* vim: set filetype=mustache: */}}\n{{/*\nExpand the name of the chart.\n*/}}\n{{- define \"modelserverchart.name\" -}}\n{{-"
  },
  {
    "path": "recommender/tfservingchart/templates/minioaccess.yaml",
    "chars": 153,
    "preview": "apiVersion: v1\nkind: Secret\nmetadata:\n  name: minioaccess\n  namespace: kubeflow\ndata:\n  AWS_ACCESS_KEY_ID: bWluaW8=\n  AW"
  },
  {
    "path": "recommender/tfservingchart/templates/tfserving.yaml",
    "chars": 2364,
    "preview": "apiVersion: apps/v1\nkind: Deployment\nmetadata:\n  namespace: kubeflow\n  name: recommendermodelserver\n  labels:\n    app: r"
  },
  {
    "path": "recommender/tfservingchart/templates/tfserving1.yaml",
    "chars": 2372,
    "preview": "apiVersion: apps/v1\nkind: Deployment\nmetadata:\n  namespace: kubeflow\n  name: recommendermodelserver1\n  labels:\n    app: "
  },
  {
    "path": "recommender/tfservingchart/values.yaml",
    "chars": 125,
    "preview": "# application name is a namespace\n# docker images\nimage:\n  server: tensorflow/serving\n  pullPolicy: Always\n  version: 1."
  },
  {
    "path": "runthrough.sh",
    "chars": 645,
    "preview": "#!/bin/bash\nset -ex\nexample_repo_home=\"$( cd \"$( dirname \"${BASH_SOURCE[0]}\" )\" >/dev/null 2>&1 && pwd )\"\nKF_PLATFORM=${"
  },
  {
    "path": "scikitLearn/python/IncomePrediction.ipynb",
    "chars": 32527,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Income prediction\\n\",\n    \"based "
  }
]

About this extraction

This page contains the full source code of the intro-to-ml-with-kubeflow/intro-to-ml-with-kubeflow-examples GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 147 files (501.5 KB), approximately 155.7k tokens, and a symbol index with 60 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!