Showing preview only (5,912K chars total). Download the full file or copy to clipboard to get everything.
Repository: amygdala/code-snippets
Branch: master
Commit: 9fa00e5a4ec4
Files: 232
Total size: 5.6 MB
Directory structure:
gitextract_gz1rnp0c/
├── LICENSE
├── README.md
├── cloud_run/
│ └── twilio_vision/
│ ├── Dockerfile
│ ├── README.md
│ └── src/
│ ├── requirements.txt
│ └── whats_that.py
├── datalab/
│ └── facets/
│ ├── README.md
│ └── facets_snippets.ipynb
└── ml/
├── README.md
├── automl/
│ └── tables/
│ ├── kfp_e2e/
│ │ ├── README.md
│ │ ├── create_dataset_for_tables/
│ │ │ ├── tables_component.py
│ │ │ └── tables_component.yaml
│ │ ├── create_model_for_tables/
│ │ │ ├── tables_component.py
│ │ │ ├── tables_component.yaml
│ │ │ ├── tables_eval_component.py
│ │ │ ├── tables_eval_component.yaml
│ │ │ ├── tables_eval_metrics_component.py
│ │ │ └── tables_eval_metrics_component.yaml
│ │ ├── deploy_model_for_tables/
│ │ │ ├── convert_oss.py
│ │ │ ├── exported_model_deploy.py
│ │ │ ├── instances.json
│ │ │ ├── model_serve_template.yaml
│ │ │ ├── tables_deploy_component.py
│ │ │ └── tables_deploy_component.yaml
│ │ ├── import_data_from_bigquery/
│ │ │ ├── tables_component.py
│ │ │ ├── tables_component.yaml
│ │ │ ├── tables_schema_component.py
│ │ │ └── tables_schema_component.yaml
│ │ ├── tables_containers/
│ │ │ └── model-service-launcher/
│ │ │ ├── Dockerfile
│ │ │ └── build.sh
│ │ ├── tables_pipeline_caip.py
│ │ └── tables_pipeline_kf.py
│ ├── model_export/
│ │ ├── Dockerfile.template
│ │ ├── automl_tables_model_export_cloud_run.md
│ │ ├── convert_oss.py
│ │ └── instances.json
│ └── xai/
│ ├── README.md
│ ├── automl_tables_xai.ipynb
│ └── bigquery_examples.md
├── census_train_and_eval/
│ ├── README.md
│ ├── config_custom_gpus.yaml
│ ├── hptuning_config.yaml
│ ├── test.json
│ ├── trainer/
│ │ ├── __init__.py
│ │ ├── model.py
│ │ └── task.py
│ └── using_tf.estimator.train_and_evaluate.ipynb
├── kubeflow-pipelines/
│ ├── README.md
│ ├── README_github_summ.md
│ ├── README_taxidata_examples.md
│ ├── components/
│ │ ├── README.md
│ │ ├── automl/
│ │ │ ├── container/
│ │ │ │ ├── Dockerfile
│ │ │ │ └── build.sh
│ │ │ └── dataset_train/
│ │ │ └── dataset_model.py
│ │ ├── cmle/
│ │ │ ├── containers/
│ │ │ │ ├── base/
│ │ │ │ │ ├── Dockerfile
│ │ │ │ │ └── build.sh
│ │ │ │ └── cmle_deploy/
│ │ │ │ ├── Dockerfile
│ │ │ │ └── build.sh
│ │ │ └── deploy/
│ │ │ └── deploy_model.py
│ │ └── older/
│ │ ├── dataflow/
│ │ │ ├── containers/
│ │ │ │ ├── base/
│ │ │ │ │ ├── Dockerfile
│ │ │ │ │ └── build.sh
│ │ │ │ ├── tfma/
│ │ │ │ │ ├── Dockerfile
│ │ │ │ │ └── build.sh
│ │ │ │ └── tft/
│ │ │ │ ├── Dockerfile
│ │ │ │ └── build.sh
│ │ │ ├── taxi_schema/
│ │ │ │ └── taxi_schema/
│ │ │ │ ├── __init__.py
│ │ │ │ └── taxi_schema.py
│ │ │ ├── tfma/
│ │ │ │ ├── analysis/
│ │ │ │ │ └── setup.py
│ │ │ │ ├── model_analysis-taxi.py
│ │ │ │ └── tfma_expers.ipynb
│ │ │ └── tft/
│ │ │ ├── mcsv_coder.py
│ │ │ ├── preprocessing.py
│ │ │ ├── preprocessing2.py
│ │ │ ├── schema.pbtxt
│ │ │ ├── taxi_preprocess_bq.py
│ │ │ └── transform/
│ │ │ └── setup.py
│ │ ├── kubeflow/
│ │ │ ├── containers/
│ │ │ │ ├── launcher/
│ │ │ │ │ ├── Dockerfile
│ │ │ │ │ └── build.sh
│ │ │ │ ├── tf-serving/
│ │ │ │ │ ├── Dockerfile
│ │ │ │ │ └── build.sh
│ │ │ │ ├── tf-serving-gh/
│ │ │ │ │ ├── Dockerfile
│ │ │ │ │ └── build.sh
│ │ │ │ └── trainer/
│ │ │ │ ├── Dockerfile
│ │ │ │ └── build.sh
│ │ │ ├── launcher/
│ │ │ │ ├── train.py
│ │ │ │ └── train.template.yaml
│ │ │ ├── taxi_model/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── data/
│ │ │ │ │ ├── eval/
│ │ │ │ │ │ └── data.csv
│ │ │ │ │ └── train/
│ │ │ │ │ └── data.csv
│ │ │ │ ├── schema.pbtxt
│ │ │ │ ├── setup.py
│ │ │ │ └── trainer/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── model.py
│ │ │ │ ├── task.py
│ │ │ │ └── taxi.py
│ │ │ ├── tf-serving/
│ │ │ │ ├── chicago_taxi_client.py
│ │ │ │ ├── deploy-tf-serve.py
│ │ │ │ ├── schema.pbtxt
│ │ │ │ └── tf-serve-template.yaml
│ │ │ └── tf-serving-gh/
│ │ │ ├── deploy-tf-serve.py
│ │ │ └── tf-serve-template.yaml
│ │ └── t2t/
│ │ ├── containers/
│ │ │ ├── base/
│ │ │ │ ├── Dockerfile
│ │ │ │ └── build.sh
│ │ │ ├── t2t_app/
│ │ │ │ ├── Dockerfile
│ │ │ │ └── build.sh
│ │ │ ├── t2t_proc/
│ │ │ │ ├── Dockerfile
│ │ │ │ └── build.sh
│ │ │ ├── t2t_train/
│ │ │ │ ├── Dockerfile
│ │ │ │ └── build.sh
│ │ │ └── webapp-launcher/
│ │ │ ├── Dockerfile
│ │ │ └── build.sh
│ │ ├── t2t-app/
│ │ │ └── app/
│ │ │ ├── ghsumm/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── setup.py
│ │ │ │ └── trainer/
│ │ │ │ ├── __init__.py
│ │ │ │ └── problem.py
│ │ │ ├── github_issues_sample.csv
│ │ │ ├── main.py
│ │ │ └── templates/
│ │ │ └── index.html
│ │ ├── t2t-proc/
│ │ │ └── ghsumm/
│ │ │ ├── __init__.py
│ │ │ ├── setup.py
│ │ │ └── trainer/
│ │ │ ├── __init__.py
│ │ │ └── problem.py
│ │ ├── t2t-train/
│ │ │ ├── ghsumm/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── setup.py
│ │ │ │ └── trainer/
│ │ │ │ ├── __init__.py
│ │ │ │ └── problem.py
│ │ │ └── train_model.py
│ │ └── webapp-launcher/
│ │ ├── deploy-webapp.py
│ │ └── t2tapp-template.yaml
│ ├── keras_tuner/
│ │ ├── README.md
│ │ ├── components/
│ │ │ ├── eval_metrics_component.yaml
│ │ │ ├── kubeflow-resources/
│ │ │ │ ├── bikesw_training/
│ │ │ │ │ ├── bikes_weather_limited.py
│ │ │ │ │ ├── bw_hptune_standalone.py
│ │ │ │ │ ├── bwmodel/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ └── model.py
│ │ │ │ │ ├── deploy_tuner.py
│ │ │ │ │ ├── eval_metrics.py
│ │ │ │ │ ├── kchief_deployment_templ.yaml
│ │ │ │ │ └── ktuners_deployment_templ.yaml
│ │ │ │ ├── cloudbuild.yaml
│ │ │ │ ├── containers/
│ │ │ │ │ ├── bikesw_training/
│ │ │ │ │ │ ├── Dockerfile
│ │ │ │ │ │ ├── build.sh
│ │ │ │ │ │ ├── cloudbuild.yaml
│ │ │ │ │ │ └── copydir.sh
│ │ │ │ │ ├── bikesw_training_hptune/
│ │ │ │ │ │ ├── Dockerfile
│ │ │ │ │ │ ├── build.sh
│ │ │ │ │ │ ├── cloudbuild.yaml
│ │ │ │ │ │ └── copydir.sh
│ │ │ │ │ ├── deploy_jobs/
│ │ │ │ │ │ ├── Dockerfile
│ │ │ │ │ │ ├── build.sh
│ │ │ │ │ │ ├── cloudbuild.yaml
│ │ │ │ │ │ └── copydir.sh
│ │ │ │ │ └── tf-serving/
│ │ │ │ │ ├── Dockerfile
│ │ │ │ │ ├── build.sh
│ │ │ │ │ ├── cloudbuild.yaml
│ │ │ │ │ └── copydir.sh
│ │ │ │ └── tf-serving/
│ │ │ │ ├── deploy-tfserve.py
│ │ │ │ └── tf-serve-template.yaml
│ │ │ ├── serve_component.yaml
│ │ │ ├── tfdv/
│ │ │ │ ├── Dockerfile
│ │ │ │ ├── requirements.txt
│ │ │ │ ├── tfdv.py
│ │ │ │ └── tfdv_compare.py
│ │ │ ├── tfdv_component.yaml
│ │ │ ├── tfdv_drift_component.yaml
│ │ │ └── train_component.yaml
│ │ ├── example_pipelines/
│ │ │ ├── bw_ktune.py
│ │ │ ├── bw_ktune_metrics.py
│ │ │ ├── bw_tfdv.py
│ │ │ ├── bw_train.py
│ │ │ └── bw_train_metrics.py
│ │ └── notebooks/
│ │ └── metrics_eval_component.ipynb
│ ├── samples/
│ │ ├── automl/
│ │ │ ├── README.md
│ │ │ └── dataset_and_train.py
│ │ └── kubeflow-tf/
│ │ ├── README.md
│ │ └── older/
│ │ ├── README.md
│ │ ├── gh_summ.py
│ │ ├── gh_summ_serve.py
│ │ ├── pipelines-kubecon.ipynb
│ │ ├── workflow1.py
│ │ └── workflow2.py
│ └── sbtb/
│ ├── README.md
│ ├── components/
│ │ ├── kubeflow-resources/
│ │ │ ├── bikesw_training/
│ │ │ │ └── bikes_weather.py
│ │ │ ├── containers/
│ │ │ │ ├── bikesw_training/
│ │ │ │ │ ├── Dockerfile
│ │ │ │ │ └── build.sh
│ │ │ │ └── tf-serving/
│ │ │ │ ├── Dockerfile
│ │ │ │ └── build.sh
│ │ │ └── tf-serving/
│ │ │ ├── deploy-tfserve.py
│ │ │ └── tf-serve-template.yaml
│ │ ├── serve_component.yaml
│ │ └── train_component.yaml
│ └── example_pipelines/
│ └── bw.py
├── notebook_examples/
│ ├── TF_linear_regressor.ipynb
│ ├── caipp/
│ │ ├── caipp_connect.ipynb
│ │ └── kfp_in_a_notebook.ipynb
│ ├── functions/
│ │ ├── hosted_kfp_gcf.ipynb
│ │ ├── main.py
│ │ └── requirements.txt
│ ├── hosted_kfp/
│ │ └── event_triggered_kfp_pipeline_bw.ipynb
│ ├── keras_linear_regressor.ipynb
│ └── mnist_estimator.ipynb
└── vertex_pipelines/
└── pytorch/
└── cifar/
├── Dockerfile
├── Dockerfile-gpu
├── Dockerfile-gpu-ct
├── LICENSE
├── README.md
├── input.json
├── pytorch-pipeline/
│ ├── .gitignore
│ ├── README.md
│ ├── cifar10_datamodule.py
│ ├── cifar10_pytorch.py
│ ├── cifar10_train.py
│ ├── process_test.py
│ ├── pytorch_pipeline/
│ │ ├── Dockerfile
│ │ ├── __init__.py
│ │ ├── components/
│ │ │ ├── base/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base_component.py
│ │ │ │ └── base_executor.py
│ │ │ └── trainer/
│ │ │ ├── __init__.py
│ │ │ ├── component.py
│ │ │ ├── executor.py
│ │ │ └── generic_executor.py
│ │ └── examples/
│ │ ├── __init__.py
│ │ └── cifar10/
│ │ ├── cifar10_datamodule.py
│ │ ├── cifar10_pre_process.py
│ │ ├── cifar10_pytorch.py
│ │ ├── cifar10_train.py
│ │ ├── input.json
│ │ └── utils.py
│ └── training_task.py
├── pytorch_cifar10_vertex_pipelines.ipynb
└── requirements.txt
================================================
FILE CONTENTS
================================================
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: README.md
================================================
This is a repo for small Google Cloud Platform (GCP) snippets and examples used in blog posts etc.
Contributions are not currently accepted. This is not an official Google product.
================================================
FILE: cloud_run/twilio_vision/Dockerfile
================================================
# Copyright 2019 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# For more information about this base image and dockerfile, see
# https://github.com/GoogleCloudPlatform/python-docker
FROM python:3.7
ENV APP_HOME /app
WORKDIR $APP_HOME
# COPY .
ADD src /app
RUN pip install -r /app/requirements.txt
CMD gunicorn -w 4 -b :$PORT whats_that:app
================================================
FILE: cloud_run/twilio_vision/README.md
================================================
# Cloud Run example: 'Twilio-vision'
This directory contains a simple [Cloud Run](https://cloud.google.com/run/docs/) example that lets you text images to a [Twilio](https://www.twilio.com) phone number (once set up), and get back information about how the
[Cloud Vision API](https://cloud.google.com/vision/docs/) labeled the image.
<a href="https://storage.googleapis.com/amy-jo/images/doofball_doghouse.jpg" target="_blank"><img src="https://storage.googleapis.com/amy-jo/images/doofball_doghouse.jpg" width=300/></a>
## Prerequisites
1. Create a project in the [Google Cloud Platform Console](https://console.cloud.google.com).
2. Enable the Cloud Vision API. See the
["Getting Started"](https://cloud.google.com/vision/docs/quickstart) page
for more information.
3. Install the [Google Cloud SDK](https://cloud.google.com/sdk):
$ curl https://sdk.cloud.google.com | bash
$ gcloud init
Alternately, you can use the [Cloud Shell](https://cloud.google.com/shell/docs/), where `gcloud` is already installed.
4. Optional: Install and start up [Docker](https://www.docker.com/).
This is only necessary if you want to build your container image locally.
You can run the example without doing this.
## Build and push the Docker image for your Cloud Run app
You can do this via either a local Docker installation, or via [Cloud Build](https://cloud.google.com/cloud-build/).
### Using a local Docker install
(If you're using the Cloud Shell, no installation or config should be necessary).
Install docker, then run the following `gcloud` commands to add the gcloud Docker credential helper:
```sh
gcloud auth configure-docker
```
Then, run the following from this directory (the one that contains the `Dockerfile`) to build and push your container. Replace the following with your GCP project ID.
```sh
docker build -t gcr.io/<your_project>/twilio-vision:v1 .
docker push gcr.io/<your_project>/twilio-vision:v1
```
### Using Cloud Build
Alternately, build your container image using Cloud Build. Run the following from this directory, again editing to use your project ID:
```sh
gcloud builds submit --tag gcr.io/<your_project>/twilio-vision:v1
```
## Deploy the Cloud Run app
Once you've built the container image, deploy the app as follows (edit to use your project id). Replace the
`MESSAGE_BLURB` string with whatever you prefer — this string will be returned as part of the app response, along with information from the Cloud Vision API.
```sh
gcloud beta run deploy \
--image gcr.io/<your_project>/twilio-vision:v1 \
--region us-central1 \
--allow-unauthenticated \
--set-env-vars MESSAGE_BLURB="Courtesy of the Google Cloud Vision API..."
```
Make note of the endpoint the app is running on. You'll need it for the next step.
## Set up a Twilio number
Create and set up a [Twilio account](https://www.twilio.com/try-twilio) and number capable of sending and receiving MMS.
### Create a Twilio "TwilML app" using the your new Cloud Run service's URL
Visit [this page](https://www.twilio.com/console/sms/runtime/twiml-apps) on the Twilio
site, and create a "TwilML" app that points to the Cloud Run endpoint from the previous step.
Then, configure your Twilio phone number to use that TwiML app for Messaging.
## Test your app!
Text an image to the Twilio number. You should receive a response with some information about the image from the Cloud Vision API.
<a href="https://storage.googleapis.com/amy-jo/images/doofball_doghouse.jpg" target="_blank"><img src="https://storage.googleapis.com/amy-jo/images/doofball_doghouse.jpg" width=300/></a>
================================================
FILE: cloud_run/twilio_vision/src/requirements.txt
================================================
google-api-python-client
oauth2client
gunicorn
flask
twilio
requests
================================================
FILE: cloud_run/twilio_vision/src/whats_that.py
================================================
#!/usr/bin/env python
# Copyright 2019 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import base64
import os
import logging
from flask import Flask, request
from googleapiclient import discovery
from googleapiclient import errors
from oauth2client.client import GoogleCredentials
import requests
import twilio.twiml
from twilio.twiml.messaging_response import MessagingResponse
logging.getLogger().setLevel(logging.INFO)
DISCOVERY_URL = 'https://{api}.googleapis.com/$discovery/rest?version={apiVersion}' # noqa
ACCEPTABLE_FILE_TYPES = ["image/jpeg", "image/png", "image/jpg"]
DEFAULT_PRETEXT = ("Your message has been passed to the Google Cloud "
"Vision API for processing.\n Images are not stored in "
"Google Cloud Platform, and will periodically need to be "
"deleted from Twilio.")
PRETEXT = os.environ.get('MESSAGE_BLURB', DEFAULT_PRETEXT)
app = Flask(__name__)
app.debug = True
@app.route("/", methods=['GET', 'POST'])
def receive_message():
"""Run a label request on an image received from Twilio"""
resp = MessagingResponse()
attachments = int(request.values.get('NumMedia', 0))
if not attachments:
response_text = "No media attachments found."
resp.message(response_text)
return str(resp)
# Process media attachments
for i in range(attachments):
media_content_type = request.values.get('MediaContentType%i' % i, None)
# First, check to see if we can use the attachment
if media_content_type in ACCEPTABLE_FILE_TYPES:
# get the image
media_url = request.values.get('MediaUrl%i' % i, None)
image = requests.get(media_url).content
# Query the API
labels, face_annotations, logos = get_labels(image)
logging.info("got labels %s", str(labels))
# We're only looking at the first image
break
else:
response_text = "We are not able to process this type of attachment."
resp.message(response_text)
return str(resp)
# Construct the response
resp = construct_message(labels, face_annotations, logos)
return str(resp)
def construct_message(labels, face_annotations, logos):
"""Build up the response from the labels found"""
# We'll use this to construct our response
response_text = PRETEXT
label_desc = ""
pos_labels = ['very likely', 'likely', 'possibly']
# Go through labels and turn them into text of the response
for i in range(len(labels)):
# We've got an answer! Let's tell them about it
label_desc += '\nScore is %s for %s' % (labels[i]['score'],
labels[i]['description'])
for i in range(len(logos)):
label_desc += '\nLogo score is %s for %s' % (logos[i]['score'],
logos[i]['description'])
joy, anger, sorrow, surprise = extract_sentiment(face_annotations)
for i in range(len(pos_labels)):
if joy[i] > 0:
label_desc += '\nWe found %s people who are ' \
'%s experiencing joy' % (joy[i], pos_labels[i])
if anger[i] > 0:
label_desc += '\nWe found %s people who are ' \
'%s experiencing anger' % (anger[i], pos_labels[i])
if sorrow[i] > 0:
label_desc += '\nWe found %s people who are ' \
'%s experiencing sorrow' \
% (sorrow[i], pos_labels[i])
if surprise[i] > 0:
label_desc += '\nWe found %s people who are ' \
'%s experiencing surprise' \
% (surprise[i], pos_labels[i])
# Add the prefix
if not label_desc:
label_desc = " No labels found."
response_text += label_desc
resp = MessagingResponse()
resp.message(response_text)
return resp
def extract_sentiment(emotions):
"""Extract the sentiment from the facial annotations"""
joy = [0, 0, 0]
sorrow = [0, 0, 0]
anger = [0, 0, 0]
surprise = [0, 0, 0]
odds = ['VERY_LIKELY', 'LIKELY', 'POSSIBLE']
# Loop through the emotions we're pulling and get the count
for i in range(len(odds)):
joy[i] = sum(f['joyLikelihood'] == odds[i] for f in emotions)
anger[i] = sum(f['angerLikelihood'] == odds[i] for f in emotions)
sorrow[i] = sum(f['sorrowLikelihood'] == odds[i] for f in emotions)
surprise[i] = sum(f['surpriseLikelihood'] == odds[i] for f in emotions)
return joy, anger, sorrow, surprise
def get_labels(image, num_retries=3, max_labels=3, max_faces=10, max_logos=1):
"""Given an image, execute the label request"""
labels = ""
face_annotations = ""
logos = ""
# Set up the service that can access the API
credentials = GoogleCredentials.get_application_default()
service = discovery.build('vision', 'v1', credentials=credentials,
discoveryServiceUrl=DISCOVERY_URL)
# Prepare the image for the API
image_content = base64.b64encode(image).decode('UTF-8')
# Construct the request
service_request = service.images().annotate(
body={
'requests': [{
'image': {
'content': image_content
},
'features': [{
'type': 'LABEL_DETECTION',
'maxResults': max_labels,
},
{
'type': 'FACE_DETECTION',
'maxResults': max_faces,
},
{
'type': 'LOGO_DETECTION',
'maxResults': max_logos,
}]
}]
})
# Send it off to the API
try:
response = service_request.execute(num_retries=num_retries)
logging.info("got response: %s", str(response))
if('responses' in response):
if('labelAnnotations' in response['responses'][0]):
labels = response['responses'][0]['labelAnnotations']
if('logoAnnotations' in response['responses'][0]):
logos = response['responses'][0]['logoAnnotations']
if('faceAnnotations' in response['responses'][0]):
face_annotations = response['responses'][0]['faceAnnotations']
except KeyError as e2:
logging.warn("Key error: %s", e2)
except Exception as e:
logging.warn(e)
return labels, face_annotations, logos
if __name__ == "__main__":
app.run(debug=True,host='0.0.0.0',port=int(os.environ.get('PORT', 8080)))
================================================
FILE: datalab/facets/README.md
================================================
[To be added.]
================================================
FILE: datalab/facets/facets_snippets.ipynb
================================================
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"```\n",
"Copyright 2017 Google Inc. All rights reserved.\n",
"Licensed under the Apache License, Version 2.0 (the \"License\");\n",
"you may not use this file except in compliance with the License.\n",
"You may obtain a copy of the License at\n",
" http://www.apache.org/licenses/LICENSE-2.0\n",
"Unless required by applicable law or agreed to in writing, software\n",
"distributed under the License is distributed on an \"AS IS\" BASIS,\n",
"WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
"See the License for the specific language governing permissions and\n",
"limitations under the License.\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import google.datalab.bigquery as bq\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"%%bq query -n requests\n",
"SELECT *\n",
"FROM `bigquery-public-data.nhtsa_traffic_fatalities.accident_2015` \n",
"LIMIT 10000"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df = requests.execute(output_options=bq.QueryOutput.dataframe()).result()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from google.datalab.ml import FacetsOverview\n",
"\n",
"FacetsOverview().plot({'data': df})"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"...."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from google.datalab.ml import FacetsDiveview\n",
"\n",
"FacetsDiveview().plot(df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
================================================
FILE: ml/README.md
================================================
This directory contains various ML-related examples.
(The Cloud Shell tutorials have moved [here](https://github.com/GoogleCloudPlatform/cloud-shell-tutorials/tree/master/ml)).
================================================
FILE: ml/automl/tables/kfp_e2e/README.md
================================================
<!-- START doctoc generated TOC please keep comment here to allow auto update -->
# AutoML Tables: end-to-end workflow on Cloud AI Platform Pipelines
- [Introduction](#introduction)
- [About the example dataset and scenario](#about-the-example-dataset-and-scenario)
- [Using Cloud AI Platform Pipelines or Kubeflow Pipelines to orchestrate a Tables workflow](#using-cloud-ai-platform-pipelines-or-kubeflow-pipelines-to-orchestrate-a-tables-workflow)
- [Install a Cloud AI Platform Pipelines cluster](#install-a-cloud-ai-platform-pipelines-cluster)
- [Or, install Kubeflow to use Kubeflow Pipelines](#or-install-kubeflow-to-use-kubeflow-pipelines)
- [Upload and run the Tables end-to-end Pipeline](#upload-and-run-the-tables-end-to-end-pipeline)
- [The steps executed by the pipeline](#the-steps-executed-by-the-pipeline)
- [Create a Tables dataset and adjust its schema](#create-a-tables-dataset-and-adjust-its-schema)
- [Train a custom model on the dataset](#train-a-custom-model-on-the-dataset)
- [View model search information via Cloud Logging](#view-model-search-information-via-cloud-logging)
- [Custom model evaluation](#custom-model-evaluation)
- [(Conditional) model deployment](#conditional-model-deployment)
- [Putting it together: The full pipeline execution](#putting-it-together-the-full-pipeline-execution)
- [Getting explanations about your model’s predictions](#getting-explanations-about-your-models-predictions)
- [The AutoML Tables UI in the Cloud Console](#the-automl-tables-ui-in-the-cloud-console)
- [Export the trained model and serve it on a GKE cluster](#export-the-trained-model-and-serve-it-on-a-gke-cluster)
- [Send prediction requests to your deployed model service](#send-prediction-requests-to-your-deployed-model-service)
- [A deeper dive into the pipeline code](#a-deeper-dive-into-the-pipeline-code)
- [Using the ‘lightweight python components’ functionality to build pipeline steps](#using-the-lightweight-python-components--functionality-to-build-pipeline-steps)
- [Specifying the Tables pipeline](#specifying-the-tables-pipeline)
## Introduction
[AutoML Tables][1] lets you automatically build, analyze, and deploy state-of-the-art machine learning models using your own structured data.
A number of new AutoML Tables features have been released recently. These include:
- An improved [Python client library][2]
- The ability to obtain [explanations][3] for your online predictions
- The ability to [export your model and serve it in a container][4] anywhere
- The ability to view model search progress and final model hyperparameters [in Cloud Logging][5]
This tutorial gives a tour of some of these new features via a [Cloud AI Platform Pipelines][6] example, that shows end-to-end management of an AutoML Tables workflow.
The example pipeline [creates a _dataset_][7], [imports][8] data into the dataset from a [BigQuery][9] _view_, and [trains][10] a custom model on that data. Then, it fetches [evaluation and metrics][11] information about the trained model, and based on specified criteria about model quality, uses that information to automatically determine whether to [deploy][12] the model for online prediction. Once the model is deployed, you can make prediction requests, and optionally obtain prediction [explanations][13] as well as the prediction result.
In addition, the example shows how to scalably **_serve_** your exported trained model from your Cloud AI Platform Pipelines installation for prediction requests.
You can manage all the parts of this workflow from the [Tables UI][14] as well, or programmatically via a [notebook][15] or script. But specifying this process as a workflow has some advantages: the workflow becomes reliable and repeatable, and Pipelines makes it easy to monitor the results and schedule recurring runs.
For example, if your dataset is updated regularly—say once a day— you could schedule a workflow to run daily, each day building a model that trains on an updated dataset.
(With a bit more work, you could also set up event-based triggering pipeline runs, for example [when new data is added][16] to a [Google Cloud Storage][17] bucket.)
### About the example dataset and scenario
The [Cloud Public Datasets Program][18] makes available public datasets that are useful for experimenting with machine learning. For our examples, we’ll use data that is essentially a join of two public datasets stored in [BigQuery][19]: [London Bike rentals][20] and [NOAA weather data][21], with some additional processing to clean up outliers and derive additional GIS and day-of-week fields. Using this dataset, we’ll build a regression model to predict the _duration_ of a bike rental based on information about the start and end rental stations, the day of the week, the weather on that day, and other data. If we were running a bike rental company, we could use these predictions—and their [explanations][22]—to help us anticipate demand and even plan how to stock each location.
While we’re using bike and weather data here, you can use AutoML Tables for tasks as varied as asset valuations, fraud detection, credit risk analysis, customer retention prediction, analyzing item layouts in stores, and many more.
## Using Cloud AI Platform Pipelines or Kubeflow Pipelines to orchestrate a Tables workflow
You can run this example via a [Cloud AI Platform Pipelines][23] installation, or via [Kubeflow Pipelines][24] on a [Kubeflow on GKE][25] installation. [Cloud AI Platform Pipelines][26] was recently launched in Beta. Slightly different variants of the pipeline specification are required depending upon which you’re using. (It would be possible to run the example on other Kubeflow installations too, but that would require additional credentials setup not covered in this tutorial).
### Install a Cloud AI Platform Pipelines cluster
You can create an AI Platform Pipelines installation with a few clicks. Access AI Platform Pipelines by visiting the [AI Platform Panel][27] in the [Cloud Console][28].
<figure>
<a href="https://storage.googleapis.com/amy-jo/images/automl/tables_e2e/sA17BykJuzF.png" target="_blank"><img src="https://storage.googleapis.com/amy-jo/images/automl/tables_e2e/sA17BykJuzF.png" width="90%"/></a>
<figcaption><br/><i>Create a new Pipelines instance.</i></figcaption>
</figure>
See the [documentation][29] for more detail.
(You can also do this installation [from the command line][30] onto an existing GKE cluster if you prefer. If you do, for consistency with the UI installation, create the GKE cluster with `--scopes cloud-platform`).
### Or, install Kubeflow to use Kubeflow Pipelines
You can also run this example from a [Kubeflow][31] installation. For the example to work out of the box, you’ll need a Kubeflow on [GKE][32] installation, set up to use [IAP][33]. An easy way to do this is via the Kubeflow [‘click to deploy’ web app][34], or you can follow the command-line instructions [here][35].
### Upload and run the Tables end-to-end Pipeline
Once a Pipelines installation is running, we can upload the example AutoML Tables pipeline.
Click on **Pipelines** in the left nav bar of the Pipelines Dashboard. Click on **Upload Pipeline**.
- For Cloud AI Platform Pipelines, upload [`tables_pipeline_caip.py.tar.gz`][36], from this directory. This archive points to the compiled version of [this pipeline][37], specified and compiled using the [Kubeflow Pipelines SDK][38].
- For Kubeflow Pipelines on a Kubeflow installation, upload [`tables_pipeline_kf.py.tar.gz`][39]. This archive points to the compiled version of [this pipeline][40]. **To run this example on a KF installation, you will need to give the `<deployment-name>-user@<project-id>.iam.gserviceaccount.com` service account `AutoML Admin` privileges**.
> Note: The difference between the two pipelines relates to how GCP authentication is handled. For the Kubeflow pipeline, we’ve added `.apply(gcp.use_gcp_secret('user-gcp-sa'))` annotations to the pipeline steps. This tells the pipeline to use the mounted _secret_—set up during the installation process— that provides GCP account credentials. With the Cloud AI Platform Pipelines installation, the GKE cluster nodes have been set up to use the `cloud-platform` scope. With recent Kubeflow releases, specification of the mounted secret is no longer necessary, but we include both versions for compatibility.
The uploaded pipeline graph will look similar to this:
<figure>
<a href="https://storage.googleapis.com/amy-jo/images/automl/tables_e2e/Screen%20Shot%202020-03-17%20at%204.27.41%20PM.png" target="_blank"><img src="https://storage.googleapis.com/amy-jo/images/automl/tables_e2e/Screen%20Shot%202020-03-17%20at%204.27.41%20PM.png" width="40%"/></a>
<figcaption><br/><i>The uploaded Tables 'end-to-end' pipeline.</i></figcaption>
</figure>
Click the **+Create Run** button to run the pipeline. You will need to fill in some pipeline parameters.
Specifically, replace `YOUR_PROJECT_HERE` with the name of your project; replace `YOUR_DATASET_NAME` with the name you want to give your new dataset (make it unique, and use letters, numbers and underscores up to 32 characters); and replace `YOUR_BUCKET_NAME` with the name of a [GCS bucket][41]. Do not include the `gs://` prefix— just enter the name. This bucket should be in the [same _region_][42] as that specified by the `gcp_region` parameter. E.g., if you keep the default `us-central1` region, your bucket should also be a _regional_ (not multi-regional) bucket in the `us-central1` region.
If you want to schedule a recurrent set of runs, you can do that instead. If your data is in [BigQuery][43]— as is the case for this example pipeline— and has a temporal aspect, you could define a _view_ to reflect that, e.g. to return data from a window over the last `N` days or hours. Then, the AutoML pipeline could specify ingestion of data from that view, grabbing an updated data window each time the pipeline is run, and building a new model based on that updated window.
## The steps executed by the pipeline
The example pipeline [creates a _dataset_][44], [imports][45] data into the dataset from a [BigQuery][46] _view_, and [trains][47] a custom model on that data. Then, it fetches [evaluation and metrics][48] information about the trained model, and based on specified criteria about model quality, uses that information to automatically determine whether to [deploy][49] the model for online prediction. We’ll take a closer look at each of the pipeline steps, and how they’re implemented.
### Create a Tables dataset and adjust its schema
This pipeline creates a new Tables _dataset_, and ingests data from a [BigQuery][50] table for the “bikes and weather” dataset described above. These actions are implemented by the first two steps in the pipeline (the `automl-create-dataset-for-tables` and `automl-import-data-for-tables` steps).
While we’re not showing it in this example, AutoML Tables supports ingestion from BigQuery _views_ as well as tables. This can be an easy way to do **_feature engineering_**: leverage BigQuery’s rich set of functions and operators to clean and transform your data before you ingest it.
When the data is ingested, AutoML Tables infers the _data type_ for each field (column). In some cases, those inferred types may not be what you want. For example, for our “bikes and weather” dataset, several ID fields (like the rental station IDs) are set by default to be numeric, but we want them treated as categorical when we train our model. In addition, we want to treat the `loc_cross` strings as categorical rather than text.
We make these adjustments programmatically, by defining a pipeline parameter that specifies the schema changes we want to make. Then, in the `automl-set-dataset-schema` pipeline step, for each indicated schema adjustment, we call `update_column_spec`:
```python
client.update_column_spec(
dataset_display_name=dataset_display_name,
column_spec_display_name=column_spec_display_name,
type_code=type_code,
nullable=nullable
)
```
Before we can train the model, we also need to specify the _target_ column— what we want our model to predict. In this case, we’ll train the model to predict rental _duration_. This is a numeric value, so we’ll be training a [regression][51] model.
```python
client.set_target_column(
dataset_display_name=dataset_display_name,
column_spec_display_name=target_column_spec_name
)
```
### Train a custom model on the dataset
Once the dataset is defined and its schema set properly, the pipeline will train the model. This happens in the `automl-create-model-for-tables` pipeline step. Via pipeline parameters, we can specify the training budget, the _optimization objective_ (if not using the default), and can additionally specify which columns to include or exclude from the model inputs.
You may want to specify a non-default optimization objective depending upon the characteristics of your dataset. [This table][52] describes the available optimization objectives and when you might want to use them. For example, if you were training a classification model using an imbalanced dataset, you might want to specify use of AUC PR (`MAXIMIZE_AU_PRC`), which optimizes results for predictions for the less common class.
```python
client.create_model(
model_display_name,
train_budget_milli_node_hours=train_budget_milli_node_hours,
dataset_display_name=dataset_display_name,
optimization_objective=optimization_objective,
include_column_spec_names=include_column_spec_names,
exclude_column_spec_names=exclude_column_spec_names,
)
```
### View model search information via Cloud Logging
You can view details about an AutoML Tables model [via Cloud Logging][53]. Using Logging, you can see the final model hyperparameters as well as the hyperparameters and object values used during model training and tuning.
An easy way to access these logs is to go to the [AutoML Tables page][54] in the Cloud Console. Select the Models tab in the left navigation pane and click on the model you’re interested in. Click the “Model” link to see the final hyperparameter logs. To see the tuning trial hyperparameters, click the “Trials” link.
<figure>
<a href="https://storage.googleapis.com/amy-jo/images/automl/tables_e2e/Screen%20Shot%202020-03-23%20at%202.20.46%20PM.png" target="_blank"><img src="https://storage.googleapis.com/amy-jo/images/automl/tables_e2e/Screen%20Shot%202020-03-23%20at%202.20.46%20PM.png" width="30%"/></a>
<figcaption><br/><i>View a model's search logs from its evaluation information.</i></figcaption>
</figure>
For example, here is a look at the Trials logs a custom model trained on the “bikes and weather” dataset, with one of the entries expanded in the logs:
<figure>
<a href="https://storage.googleapis.com/amy-jo/images/automl/tables_e2e/Screen%20Shot%202020-03-23%20at%202.23.00%20PM.png" target="_blank"><img src="https://storage.googleapis.com/amy-jo/images/automl/tables_e2e/Screen%20Shot%202020-03-23%20at%202.23.00%20PM.png" width="90%"/></a>
<figcaption><br/><i>The 'Trials' logs for a "bikes and weather" model</i></figcaption>
</figure>
### Custom model evaluation
Once your custom model has finished training, the pipeline moves on to its next step: model evaluation. We can access evaluation metrics via the API. We’ll use this information to decide whether or not to deploy the model.
These actions are factored into two steps. The process of fetching the evaluation information can be a general-purpose component (pipeline step) used in many situations; and then we’ll follow that with a more special-purpose step, that analyzes that information and uses it to decide whether or not to deploy the trained model.
In the first of these pipeline steps— the `automl-eval-tables-model` step— we’ll retrieve the evaluation and _global feature importance_ information.
```python
model = client.get_model(model_display_name=model_display_name)
feat_list = [(column.feature_importance, column.column_display_name)
for column in model.tables_model_metadata.tables_model_column_info]
evals = list(client.list_model_evaluations(model_display_name=model_display_name))
```
AutoML Tables automatically computes global feature importance for a trained model. This shows, across the evaluation set, the average absolute attribution each feature receives. Higher values mean the feature generally has greater influence on the model’s predictions.
This information is useful for debugging and improving your model. If a feature’s contribution is negligible—if it has a low value—you can simplify the model by excluding it from future training.
The pipeline step renders the global feature importance data as part of the pipeline run’s output:
<figure>
<a href="https://storage.googleapis.com/amy-jo/images/automl/tables_e2e/Screen%20Shot%202020-03-23%20at%201.22.42%20PM.png" target="_blank"><img src="https://storage.googleapis.com/amy-jo/images/automl/tables_e2e/Screen%20Shot%202020-03-23%20at%201.22.42%20PM.png" width="50%"/></a>
<figcaption><br/><i>Global feature importance for the model inputs, rendered by a Kubeflow Pipeline step.</i></figcaption>
</figure>
For our example, based on the graphic above, we might try training a model without including `bike_id`.
In the following pipeline step— the `automl-eval-metrics` step— the evaluation output from the previous step is grabbed as input, and parsed to extract metrics that we’ll use in conjunction with pipeline parameters to decide whether or not to deploy the model. Note that this component is more special-purpose: unlike the other components in this pipeline, which support generalizable operations, this component— while it is parameterized— is specific in how it analyzes the evaluation info and decides whether or not to do the deployment.
One of the pipeline input parameters allows specification of metric thresholds. In this example, we’re training a regression model, and we’re specifying a `mean_absolute_error` (MAE) value as a threshold in the pipeline input parameters:
```python
{"mean_absolute_error": 450}
```
The `automl-eval-metrics` pipeline step compares the model evaluation information to the given threshold constraints. In this case, if the MAE is \< `450`, the model will not be deployed. The pipeline step outputs that decision, and displays the evaluation information it’s using as part of the pipeline run’s output:
<figure>
<a href="https://storage.googleapis.com/amy-jo/images/automl/tables_e2e/Screen%20Shot%202020-03-23%20at%202.07.21%20PM.png" target="_blank"><img src="https://storage.googleapis.com/amy-jo/images/automl/tables_e2e/Screen%20Shot%202020-03-23%20at%202.07.21%20PM.png" width="25%"/></a>
<figcaption><br/><i>Information about a model's evaluation, rendered by a Kubeflow Pipeline step.</i></figcaption>
</figure>
### (Conditional) model deployment
You can _deploy_ any of your custom Tables models to make them accessible for online prediction requests.
The pipeline code uses a _conditional test_ to determine whether or not to run the step that deploys the model, based on the output of the evaluation step described above:
```python
with dsl.Condition(eval_metrics.outputs['deploy'] == True):
deploy_model = deploy_model_op( ... )
```
Only if the model meets the given criteria, will the deployment step (called `automl-deploy-tables-model`) be run, and the model be deployed automatically as part of the pipeline run:
```python
response = client.deploy_model(model_display_name=model_display_name)
```
You can always deploy a model later if you like.
### Putting it together: The full pipeline execution
The figure below shows the result of a pipeline run. In this case, the conditional step was executed— based on the model evaluation metrics— and the trained model was deployed.
Via the UI, you can view outputs, logs for each step, run artifacts and lineage information, and more. See [this post][55] for more detail.
++TODO: replace the following figure with something better++
<figure>
<a href="https://storage.googleapis.com/amy-jo/images/automl/tables_e2e/Screen%20Shot%202020-03-17%20at%204.28.32%20PM.png" target="_blank"><img src="https://storage.googleapis.com/amy-jo/images/automl/tables_e2e/Screen%20Shot%202020-03-17%20at%204.28.32%20PM.png" width="40%"/></a>
<figcaption><br/><i>Execution of a pipeline run. You can view outputs, logs for each step, run artifacts and lineage information, and more.</i></figcaption>
</figure>
## Getting explanations about your model’s predictions
Once a model is deployed, you can request predictions from that model. You can additionally request _explanations for local feature importance_: a score showing how much (and in which direction) each feature influenced the prediction for a single example. See [this blog post][56] for more information on how those values are calculated.
Here is a [notebook example][57] of how to request a prediction and its explanation using the Python client libraries.
```python
from google.cloud import automl_v1beta1 as automl
client = automl.TablesClient(project=PROJECT_ID, region=REGION)
response = client.predict(
model_display_name=model_display_name,
inputs=inputs,
feature_importance=True,
)
```
The prediction response will have a structure like [this][58]. (The notebook above shows how to visualize the local feature importance results using `matplotlib`.)
It’s easy to explore local feature importance through the Cloud Console’s [AutoML Tables UI ][59]as well. After you deploy a model, go to the **TEST & USE** tab of the Tables panel, select **ONLINE PREDICTION**, enter the field values for the prediction, and then check the **Generate feature importance** box at the bottom of the page. The result will show the feature importance values as well as the prediction. This [blog post][60] gives some examples of how these explanations can be used to find potential issues with your data or help you better understand your problem domain.
## The AutoML Tables UI in the Cloud Console
With this example we’ve focused on how you can automate a Tables workflow using Kubeflow pipelines and the Python client libraries.
All of the pipeline steps can also be accomplished via the [AutoML Tables UI][61] in the Cloud Console, including many useful visualizations, and other functionality not implemented by this example pipeline— such as the ability to export the model’s test set and prediction results to BigQuery for further analysis.
## Export the trained model and serve it on a GKE cluster
Recently, Tables launched a feature to let you export your full custom model, packaged so that you can serve it via a Docker container. (Under the hood, it is using TensorFlow Serving). This lets you serve your models anywhere that you can run a container, including a GKE cluster.
This means that you can run a model serving service on your AI Platform Pipelines or Kubeflow installation, both of which run on GKE.
[This blog post][62] walks through the steps to serve the exported model (in this case, using [Cloud Run][63]). Follow the instructions in the post through the “View information about your exported model in TensorBoard” [section][64].
Here, we’ll diverge from the rest of the post and create a GKE service instead.
Make a copy of [`deploy_model_for_tables/model_serve_template.yaml`][65] file and name it `model_serve.yaml`. Edit this new file, **replacing** `MODEL_NAME` with some meaningful name for your model, `IMAGE_NAME` with the name of the container image you built (as described in the [blog post][66], and `NAMESPACE` with the namespace in which you want to run your service (e.g. `default`).
Then, from the command line, run:
```bash
kubectl apply -f model_serve.yaml
```
to set up your model serving _service_ and its underlying _deployment_. (Before you do that, make sure that kubectl is set to use your GKE cluster’s credentials. One way to do that is to visit the [GKE panel in the Cloud Console][67], and click **Connect** for that cluster.)
You can later take down the service and its deployment by running:
```bash
kubectl delete -f model_serve.yaml
```
### Send prediction requests to your deployed model service
Once your model serving service is deployed, you can send prediction requests to it. Because we didn’t set up an external endpoint for our service in this simple example, we’ll connect to the service via port forwarding.
From the command line, run the following, **replacing** `<your-model-name>` with the value you replaced `MODEL_NAME` by, when creating your `yaml` file, and `<service-namespace>` with the namespace in which your service is running— the same namespace value you used in the yaml file.
```bash
kubectl -n <service-namespace> port-forward svc/<your-model-name> 8080:80
```
Then, from the `deploy_model_for_tables` directory, send a prediction request to your service like this:
```bash
curl -X POST --data @./instances.json http://localhost:8080/predict
```
You should see a result like this, with a prediction for each instance in the `instances.json` file:
```bash
{"predictions": [860.79833984375, 460.5323486328125, 1211.7664794921875]}
```
(If you get an error, make sure you’re in the correct directory and see the `instances.json` file listed).
> **Note**: it would be possible to add this deployment step to the pipeline too. (See [`deploy_model_for_tables/exported_model_deploy.py`][68]). However, the [Python client library][69] does not yet support the ‘export’ operation. Once deployment is supported by the client library, this would be a natural addition to the workflow. While not tested, it should also be possible to do the export programmatically via the [REST API][70].
## A deeper dive into the pipeline code
The updated [Tables Python client library][71] makes it very straightforward to build the Pipelines components that support each stage of the workflow.
Kubeflow Pipeline steps are container-based, so that any action you can support via a Docker container image can become a pipeline step.
That doesn’t mean that an end-user necessarily needs to have Docker installed. For many straightforward cases, building your pipeline steps
### Using the ‘lightweight python components’ functionality to build pipeline steps
For most of the components in this example, we’re building them using the [“lightweight python components”][72] functionality as shown in [this example notebook][73], including compilation of the code into a component package. This feature allows you to create components based on Python functions, building on an appropriate base image, so that you do not need to have docker installed or rebuild a container image each time your code changes.
Each component’s python file includes a function definition, and then a `func_to_container_op` call, passing the function definition, to generate the component’s `yaml` package file. As we’ll see below, these component package files make it very straightforward to put these steps together to form a pipeline.
The [`deploy_model_for_tables/tables_deploy_component.py`][74] file is representative. It contains an `automl_deploy_tables_model` function definition.
```
def automl_deploy_tables_model(
gcp_project_id: str,
gcp_region: str,
model_display_name: str,
api_endpoint: str = None,
) -> NamedTuple('Outputs', [('model_display_name', str), ('status', str)]):
...
return (model_display_name, status)
```
The function defines the component’s inputs and outputs, and this information will be used to support static checking when we compose these components to build the pipeline.
To build the component `yaml` file corresponding to this function, we add the following to the components’ Python script, then can run `python <filename>.py` from the command line to generate it (you must have the Kubeflow Pipelines (KFP) sdk [installed][75]).
```python
if __name__ == '__main__':
import kfp
kfp.components.func_to_container_op(
automl_deploy_tables_model, output_component_file='tables_deploy_component.yaml',
base_image='python:3.7')
```
Whenever you change the python function definition, just recompile to regenerate the corresponding component file.
### Specifying the Tables pipeline
With the components packaged into `yaml` files, it becomes very straightforward to specify a pipeline, such as [`tables_pipeline_caip.py`][76], that uses them. Here, we’re just using the `load_component_from_file()` method, since the `yaml` files are all local (in the same repo). However, there is also a `load_component_from_url()` method, which makes it easy to share components. (If your URL points to a file in GitHub, be sure to use raw mode).
```python
create_dataset_op = comp.load_component_from_file(
'./create_dataset_for_tables/tables_component.yaml')
import_data_op = comp.load_component_from_file(
'./import_data_from_bigquery/tables_component.yaml')
set_schema_op = comp.load_component_from_file(
'./import_data_from_bigquery/tables_schema_component.yaml')
train_model_op = comp.load_component_from_file(
'./create_model_for_tables/tables_component.yaml')
eval_model_op = comp.load_component_from_file(
'./create_model_for_tables/tables_eval_component.yaml')
eval_metrics_op = comp.load_component_from_file(
'./create_model_for_tables/tables_eval_metrics_component.yaml')
deploy_model_op = comp.load_component_from_file(
'./deploy_model_for_tables/tables_deploy_component.yaml')
```
Once all our pipeline ops (steps) are defined using the component definitions, then we can specify the pipeline by calling the constructors, e.g.:
```python
create_dataset = create_dataset_op(
gcp_project_id=gcp_project_id,
gcp_region=gcp_region,
dataset_display_name=dataset_display_name,
api_endpoint=api_endpoint,
)
```
If a pipeline component has been defined to have outputs, other components can access those outputs. E.g., here, the ‘eval’ step is grabbing an output from the ‘train’ step, specifically information about the model display name:
```python
eval_model = eval_model_op(
gcp_project_id=gcp_project_id,
gcp_region=gcp_region,
bucket_name=bucket_name,
gcs_path='automl_evals/{}'.format(dsl.RUN_ID_PLACEHOLDER),
api_endpoint=api_endpoint,
model_display_name=train_model.outputs['model_display_name']
)
```
In this manner it is straightforward to put together a pipeline from your component definitions. Just don’t forget to recompile the pipeline script (to generate its corresponding `.tar.gz` archive) if any of its component definitions changed, e.g. `python tables_pipeline_caip.py`.
[1]: https://cloud.google.com/automl-tables/docs/
[2]: https://googleapis.dev/python/automl/latest/gapic/v1beta1/tables.html
[3]: https://cloud.google.com/blog/products/ai-machine-learning/explaining-model-predictions-structured-data
[4]: http://amygdala.github.io/automl/ml/2019/12/05/automl_tables_export.html
[5]: https://cloud.google.com/automl-tables/docs/logging
[6]: %20https://cloud.google.com/blog/products/ai-machine-learning/introducing-cloud-ai-platform-pipelines
[7]: https://cloud.google.com/automl-tables/docs/import#create
[8]: https://cloud.google.com/automl-tables/docs/import#import-data
[9]: https://cloud.google.com/bigquery
[10]: https://cloud.google.com/automl-tables/docs/train
[11]: https://cloud.google.com/automl-tables/docs/evaluate
[12]: https://cloud.google.com/automl-tables/docs/predict
[13]: https://cloud.google.com/blog/products/ai-machine-learning/explaining-model-predictions-structured-data
[14]: https://console.cloud.google.com/automl-tables
[15]: https://github.com/amygdala/code-snippets/blob/master/ml/automl/tables/xai/automl_tables_xai.ipynb
[16]: http://amygdala.github.io/kubeflow/ml/2019/08/22/remote-deploy.html#using-cloud-function-triggers
[17]: https://cloud.google.com/storage
[18]: https://cloud.google.com/bigquery/public-data/
[19]: https://cloud.google.com/bigquery/
[20]: https://console.cloud.google.com/bigquery?p=bigquery-public-data&d=london_bicycles&page=dataset
[21]: https://console.cloud.google.com/bigquery?p=bigquery-public-data&d=noaa_gsod&page=dataset
[22]: https://cloud.google.com/blog/products/ai-machine-learning/explaining-model-predictions-structured-data
[23]: https://cloud.google.com/ai-platform/pipelines/docs
[24]: https://kubeflow.org/
[25]: https://www.kubeflow.org/docs/gke/deploy/
[26]: https://cloud.google.com/blog/products/ai-machine-learning/introducing-cloud-ai-platform-pipelines
[27]: https://console.cloud.google.com/ai-platform/pipelines/clusters
[28]: https://console.cloud.google.com
[29]: https://cloud.google.com/ai-platform/pipelines/docs
[30]: https://github.com/kubeflow/pipelines/tree/master/manifests/gcp_marketplace
[31]: https://www.kubeflow.org/
[32]: https://cloud.google.com/kubernetes-engine
[33]: https://cloud.google.com/iap
[34]: https://deploy.kubeflow.cloud/#/deploy
[35]: https://www.kubeflow.org/docs/gke/deploy/deploy-cli/
[36]: ./tables_pipeline_caip.py.tar.gz
[37]: ./tables_pipeline_caip.py
[38]: https://www.kubeflow.org/docs/pipelines/sdk/install-sdk/
[39]: ./tables_pipeline_kf.py.tar.gz
[40]: ./tables_pipeline_kf.py
[41]: https://cloud.google.com/storage
[42]: https://cloud.google.com/automl-tables/docs/locations#buckets
[43]: https://cloud.google.com/bigquery
[44]: https://cloud.google.com/automl-tables/docs/import#create
[45]: https://cloud.google.com/automl-tables/docs/import#import-data
[46]: https://cloud.google.com/bigquery
[47]: https://cloud.google.com/automl-tables/docs/train
[48]: https://cloud.google.com/automl-tables/docs/evaluate
[49]: https://cloud.google.com/automl-tables/docs/predict
[50]: https://cloud.google.com/bigquery
[51]: https://cloud.google.com/automl-tables/docs/problem-types#regression_problems
[52]: https://cloud.google.com/automl-tables/docs/train#opt-obj
[53]: https://cloud.google.com/automl-tables/docs/logging
[54]: https://console.cloud.google.com/automl-tables
[55]: https://cloud.google.com/blog/products/ai-machine-learning/introducing-cloud-ai-platform-pipelines
[56]: https://cloud.google.com/blog/products/ai-machine-learning/explaining-model-predictions-structured-data
[57]: https://github.com/amygdala/code-snippets/blob/master/ml/automl/tables/xai/automl_tables_xai.ipynb
[58]: https://gist.github.com/amygdala/c96d45bdf694737d77d91597ca3ef1f0
[59]: https://console.cloud.google.com/automl-tables
[60]: https://cloud.google.com/blog/products/ai-machine-learning/explaining-model-predictions-structured-data
[61]: https://console.cloud.google.com/automl-tables
[62]: http://amygdala.github.io/automl/ml/2019/12/05/automl_tables_export.html
[63]: https://cloud.google.com/run
[64]: http://amygdala.github.io/automl/ml/2019/12/05/automl_tables_export.html#view-information-about-your-exported-model-in-tensorboard
[65]: ./deploy_model_for_tables/model_serve_template.yaml
[66]: http://amygdala.github.io/automl/ml/2019/12/05/automl_tables_export.html
[67]: https://console.cloud.google.com/kubernetes/list
[68]: ./deploy_model_for_tables/exported_model_deploy.py
[69]: https://googleapis.dev/python/automl/latest/gapic/v1beta1/tables.html
[70]: https://cloud.google.com/automl/docs/reference/rest/v1/projects.locations.models/export
[71]: https://googleapis.dev/python/automl/latest/gapic/v1beta1/tables.html
[72]: https://www.kubeflow.org/docs/pipelines/sdk/lightweight-python-components/
[73]: https://github.com/kubeflow/pipelines/blob/master/samples/tutorials/mnist/01_Lightweight_Python_Components.ipynb
[74]: ./deploy_model_for_tables/tables_deploy_component.py
[75]: https://www.kubeflow.org/docs/pipelines/sdk/install-sdk/
[76]: ./tables_pipeline_caip.py
================================================
FILE: ml/automl/tables/kfp_e2e/create_dataset_for_tables/tables_component.py
================================================
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import NamedTuple
def automl_create_dataset_for_tables(
gcp_project_id: str,
gcp_region: str,
dataset_display_name: str,
api_endpoint: str = None,
tables_dataset_metadata: dict = {},
) -> NamedTuple('Outputs', [('dataset_path', str), ('create_time', str), ('dataset_id', str)]):
import sys
import subprocess
subprocess.run([sys.executable, '-m', 'pip', 'install', 'googleapis-common-protos==1.6.0',
'--no-warn-script-location'],
env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
subprocess.run([sys.executable, '-m', 'pip', 'install', 'google-cloud-automl==0.9.0',
'--quiet', '--no-warn-script-location'],
env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
import google
import logging
from google.api_core.client_options import ClientOptions
from google.cloud import automl_v1beta1 as automl
logging.getLogger().setLevel(logging.INFO) # TODO: make level configurable
# TODO: we could instead check for region 'eu' and use 'eu-automl.googleapis.com:443'endpoint
# in that case, instead of requiring endpoint to be specified.
if api_endpoint:
client_options = ClientOptions(api_endpoint=api_endpoint)
client = automl.TablesClient(project=gcp_project_id, region=gcp_region,
client_options=client_options)
else:
client = automl.TablesClient(project=gcp_project_id, region=gcp_region)
try:
# Create a dataset with the given display name
dataset = client.create_dataset(dataset_display_name, metadata=tables_dataset_metadata)
# Log info about the created dataset
logging.info("Dataset name: {}".format(dataset.name))
logging.info("Dataset id: {}".format(dataset.name.split("/")[-1]))
logging.info("Dataset display name: {}".format(dataset.display_name))
logging.info("Dataset metadata:")
logging.info("\t{}".format(dataset.tables_dataset_metadata))
logging.info("Dataset example count: {}".format(dataset.example_count))
logging.info("Dataset create time:")
logging.info("\tseconds: {}".format(dataset.create_time.seconds))
logging.info("\tnanos: {}".format(dataset.create_time.nanos))
print(str(dataset))
dataset_id = dataset.name.rsplit('/', 1)[-1]
return (dataset.name, str(dataset.create_time), dataset_id)
except google.api_core.exceptions.GoogleAPICallError as e:
logging.warning(e)
raise e
if __name__ == '__main__':
import kfp
kfp.components.func_to_container_op(automl_create_dataset_for_tables,
output_component_file='tables_component.yaml', base_image='python:3.7')
================================================
FILE: ml/automl/tables/kfp_e2e/create_dataset_for_tables/tables_component.yaml
================================================
name: Automl create dataset for tables
inputs:
- name: gcp_project_id
type: String
- name: gcp_region
type: String
- name: dataset_display_name
type: String
- name: api_endpoint
type: String
optional: true
- name: tables_dataset_metadata
type: JsonObject
default: '{}'
optional: true
outputs:
- name: dataset_path
type: String
- name: create_time
type: String
- name: dataset_id
type: String
implementation:
container:
image: python:3.7
command:
- python3
- -u
- -c
- |
from typing import NamedTuple
def automl_create_dataset_for_tables(
gcp_project_id: str,
gcp_region: str,
dataset_display_name: str,
api_endpoint: str = None,
tables_dataset_metadata: dict = {},
) -> NamedTuple('Outputs', [('dataset_path', str), ('create_time', str), ('dataset_id', str)]):
import sys
import subprocess
subprocess.run([sys.executable, '-m', 'pip', 'install', 'googleapis-common-protos==1.6.0',
'--no-warn-script-location'],
env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
subprocess.run([sys.executable, '-m', 'pip', 'install', 'google-cloud-automl==0.9.0',
'--quiet', '--no-warn-script-location'],
env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
import google
import logging
from google.api_core.client_options import ClientOptions
from google.cloud import automl_v1beta1 as automl
logging.getLogger().setLevel(logging.INFO) # TODO: make level configurable
# TODO: we could instead check for region 'eu' and use 'eu-automl.googleapis.com:443'endpoint
# in that case, instead of requiring endpoint to be specified.
if api_endpoint:
client_options = ClientOptions(api_endpoint=api_endpoint)
client = automl.TablesClient(project=gcp_project_id, region=gcp_region,
client_options=client_options)
else:
client = automl.TablesClient(project=gcp_project_id, region=gcp_region)
try:
# Create a dataset with the given display name
dataset = client.create_dataset(dataset_display_name, metadata=tables_dataset_metadata)
# Log info about the created dataset
logging.info("Dataset name: {}".format(dataset.name))
logging.info("Dataset id: {}".format(dataset.name.split("/")[-1]))
logging.info("Dataset display name: {}".format(dataset.display_name))
logging.info("Dataset metadata:")
logging.info("\t{}".format(dataset.tables_dataset_metadata))
logging.info("Dataset example count: {}".format(dataset.example_count))
logging.info("Dataset create time:")
logging.info("\tseconds: {}".format(dataset.create_time.seconds))
logging.info("\tnanos: {}".format(dataset.create_time.nanos))
print(str(dataset))
dataset_id = dataset.name.rsplit('/', 1)[-1]
return (dataset.name, str(dataset.create_time), dataset_id)
except google.api_core.exceptions.GoogleAPICallError as e:
logging.warning(e)
raise e
import json
def _serialize_str(str_value: str) -> str:
if not isinstance(str_value, str):
raise TypeError('Value "{}" has type "{}" instead of str.'.format(str(str_value), str(type(str_value))))
return str_value
import argparse
_parser = argparse.ArgumentParser(prog='Automl create dataset for tables', description='')
_parser.add_argument("--gcp-project-id", dest="gcp_project_id", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--gcp-region", dest="gcp_region", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--dataset-display-name", dest="dataset_display_name", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--api-endpoint", dest="api_endpoint", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--tables-dataset-metadata", dest="tables_dataset_metadata", type=json.loads, required=False, default=argparse.SUPPRESS)
_parser.add_argument("----output-paths", dest="_output_paths", type=str, nargs=3)
_parsed_args = vars(_parser.parse_args())
_output_files = _parsed_args.pop("_output_paths", [])
_outputs = automl_create_dataset_for_tables(**_parsed_args)
if not hasattr(_outputs, '__getitem__') or isinstance(_outputs, str):
_outputs = [_outputs]
_output_serializers = [
_serialize_str,
_serialize_str,
_serialize_str,
]
import os
for idx, output_file in enumerate(_output_files):
try:
os.makedirs(os.path.dirname(output_file))
except OSError:
pass
with open(output_file, 'w') as f:
f.write(_output_serializers[idx](_outputs[idx]))
args:
- --gcp-project-id
- inputValue: gcp_project_id
- --gcp-region
- inputValue: gcp_region
- --dataset-display-name
- inputValue: dataset_display_name
- if:
cond:
isPresent: api_endpoint
then:
- --api-endpoint
- inputValue: api_endpoint
- if:
cond:
isPresent: tables_dataset_metadata
then:
- --tables-dataset-metadata
- inputValue: tables_dataset_metadata
- '----output-paths'
- outputPath: dataset_path
- outputPath: create_time
- outputPath: dataset_id
================================================
FILE: ml/automl/tables/kfp_e2e/create_model_for_tables/tables_component.py
================================================
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import NamedTuple
def automl_create_model_for_tables(
gcp_project_id: str,
gcp_region: str,
dataset_display_name: str,
api_endpoint: str = None,
model_display_name: str = None,
model_prefix: str = 'bwmodel',
optimization_objective: str = None,
include_column_spec_names: list = None,
exclude_column_spec_names: list = None,
train_budget_milli_node_hours: int = 1000,
) -> NamedTuple('Outputs', [('model_display_name', str), ('model_name', str), ('model_id', str)]):
import subprocess
import sys
# we could build a base image that includes these libraries if we don't want to do
# the dynamic installation when the step runs.
subprocess.run([sys.executable, '-m', 'pip', 'install', 'googleapis-common-protos==1.6.0', '--no-warn-script-location'],
env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
subprocess.run([sys.executable, '-m', 'pip', 'install', 'google-cloud-automl==0.9.0', '--quiet', '--no-warn-script-location'],
env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
import google
import logging
from google.api_core.client_options import ClientOptions
from google.cloud import automl_v1beta1 as automl
import time
logging.getLogger().setLevel(logging.INFO) # TODO: make level configurable
# TODO: we could instead check for region 'eu' and use 'eu-automl.googleapis.com:443'endpoint
# in that case, instead of requiring endpoint to be specified.
if api_endpoint:
client_options = ClientOptions(api_endpoint=api_endpoint)
client = automl.TablesClient(project=gcp_project_id, region=gcp_region,
client_options=client_options)
else:
client = automl.TablesClient(project=gcp_project_id, region=gcp_region)
if not model_display_name:
model_display_name = '{}_{}'.format(model_prefix, str(int(time.time())))
logging.info('Training model {}...'.format(model_display_name))
response = client.create_model(
model_display_name,
train_budget_milli_node_hours=train_budget_milli_node_hours,
dataset_display_name=dataset_display_name,
optimization_objective=optimization_objective,
include_column_spec_names=include_column_spec_names,
exclude_column_spec_names=exclude_column_spec_names,
)
logging.info("Training operation: {}".format(response.operation))
logging.info("Training operation name: {}".format(response.operation.name))
logging.info("Training in progress. This operation may take multiple hours to complete.")
# block termination of the op until training is finished.
result = response.result()
logging.info("Training completed: {}".format(result))
model_name = result.name
model_id = model_name.rsplit('/', 1)[-1]
print('model name: {}, model id: {}'.format(model_name, model_id))
return (model_display_name, model_name, model_id)
if __name__ == '__main__':
import kfp
kfp.components.func_to_container_op(automl_create_model_for_tables,
output_component_file='tables_component.yaml',
base_image='python:3.7')
================================================
FILE: ml/automl/tables/kfp_e2e/create_model_for_tables/tables_component.yaml
================================================
name: Automl create model for tables
inputs:
- name: gcp_project_id
type: String
- name: gcp_region
type: String
- name: dataset_display_name
type: String
- name: api_endpoint
type: String
optional: true
- name: model_display_name
type: String
optional: true
- name: model_prefix
type: String
default: bwmodel
optional: true
- name: optimization_objective
type: String
optional: true
- name: include_column_spec_names
type: JsonArray
optional: true
- name: exclude_column_spec_names
type: JsonArray
optional: true
- name: train_budget_milli_node_hours
type: Integer
default: '1000'
optional: true
outputs:
- name: model_display_name
type: String
- name: model_name
type: String
- name: model_id
type: String
implementation:
container:
image: python:3.7
command:
- python3
- -u
- -c
- |
from typing import NamedTuple
def automl_create_model_for_tables(
gcp_project_id: str,
gcp_region: str,
dataset_display_name: str,
api_endpoint: str = None,
model_display_name: str = None,
model_prefix: str = 'bwmodel',
optimization_objective: str = None,
include_column_spec_names: list = None,
exclude_column_spec_names: list = None,
train_budget_milli_node_hours: int = 1000,
) -> NamedTuple('Outputs', [('model_display_name', str), ('model_name', str), ('model_id', str)]):
import subprocess
import sys
# we could build a base image that includes these libraries if we don't want to do
# the dynamic installation when the step runs.
subprocess.run([sys.executable, '-m', 'pip', 'install', 'googleapis-common-protos==1.6.0', '--no-warn-script-location'],
env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
subprocess.run([sys.executable, '-m', 'pip', 'install', 'google-cloud-automl==0.9.0', '--quiet', '--no-warn-script-location'],
env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
import google
import logging
from google.api_core.client_options import ClientOptions
from google.cloud import automl_v1beta1 as automl
import time
logging.getLogger().setLevel(logging.INFO) # TODO: make level configurable
# TODO: we could instead check for region 'eu' and use 'eu-automl.googleapis.com:443'endpoint
# in that case, instead of requiring endpoint to be specified.
if api_endpoint:
client_options = ClientOptions(api_endpoint=api_endpoint)
client = automl.TablesClient(project=gcp_project_id, region=gcp_region,
client_options=client_options)
else:
client = automl.TablesClient(project=gcp_project_id, region=gcp_region)
if not model_display_name:
model_display_name = '{}_{}'.format(model_prefix, str(int(time.time())))
logging.info('Training model {}...'.format(model_display_name))
response = client.create_model(
model_display_name,
train_budget_milli_node_hours=train_budget_milli_node_hours,
dataset_display_name=dataset_display_name,
optimization_objective=optimization_objective,
include_column_spec_names=include_column_spec_names,
exclude_column_spec_names=exclude_column_spec_names,
)
logging.info("Training operation: {}".format(response.operation))
logging.info("Training operation name: {}".format(response.operation.name))
logging.info("Training in progress. This operation may take multiple hours to complete.")
# block termination of the op until training is finished.
result = response.result()
logging.info("Training completed: {}".format(result))
model_name = result.name
model_id = model_name.rsplit('/', 1)[-1]
print('model name: {}, model id: {}'.format(model_name, model_id))
return (model_display_name, model_name, model_id)
import json
def _serialize_str(str_value: str) -> str:
if not isinstance(str_value, str):
raise TypeError('Value "{}" has type "{}" instead of str.'.format(str(str_value), str(type(str_value))))
return str_value
import argparse
_parser = argparse.ArgumentParser(prog='Automl create model for tables', description='')
_parser.add_argument("--gcp-project-id", dest="gcp_project_id", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--gcp-region", dest="gcp_region", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--dataset-display-name", dest="dataset_display_name", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--api-endpoint", dest="api_endpoint", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--model-display-name", dest="model_display_name", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--model-prefix", dest="model_prefix", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--optimization-objective", dest="optimization_objective", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--include-column-spec-names", dest="include_column_spec_names", type=json.loads, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--exclude-column-spec-names", dest="exclude_column_spec_names", type=json.loads, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--train-budget-milli-node-hours", dest="train_budget_milli_node_hours", type=int, required=False, default=argparse.SUPPRESS)
_parser.add_argument("----output-paths", dest="_output_paths", type=str, nargs=3)
_parsed_args = vars(_parser.parse_args())
_output_files = _parsed_args.pop("_output_paths", [])
_outputs = automl_create_model_for_tables(**_parsed_args)
if not hasattr(_outputs, '__getitem__') or isinstance(_outputs, str):
_outputs = [_outputs]
_output_serializers = [
_serialize_str,
_serialize_str,
_serialize_str,
]
import os
for idx, output_file in enumerate(_output_files):
try:
os.makedirs(os.path.dirname(output_file))
except OSError:
pass
with open(output_file, 'w') as f:
f.write(_output_serializers[idx](_outputs[idx]))
args:
- --gcp-project-id
- inputValue: gcp_project_id
- --gcp-region
- inputValue: gcp_region
- --dataset-display-name
- inputValue: dataset_display_name
- if:
cond:
isPresent: api_endpoint
then:
- --api-endpoint
- inputValue: api_endpoint
- if:
cond:
isPresent: model_display_name
then:
- --model-display-name
- inputValue: model_display_name
- if:
cond:
isPresent: model_prefix
then:
- --model-prefix
- inputValue: model_prefix
- if:
cond:
isPresent: optimization_objective
then:
- --optimization-objective
- inputValue: optimization_objective
- if:
cond:
isPresent: include_column_spec_names
then:
- --include-column-spec-names
- inputValue: include_column_spec_names
- if:
cond:
isPresent: exclude_column_spec_names
then:
- --exclude-column-spec-names
- inputValue: exclude_column_spec_names
- if:
cond:
isPresent: train_budget_milli_node_hours
then:
- --train-budget-milli-node-hours
- inputValue: train_budget_milli_node_hours
- '----output-paths'
- outputPath: model_display_name
- outputPath: model_name
- outputPath: model_id
================================================
FILE: ml/automl/tables/kfp_e2e/create_model_for_tables/tables_eval_component.py
================================================
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import NamedTuple
from kfp.components import InputPath, OutputPath
def automl_eval_tables_model(
gcp_project_id: str,
gcp_region: str,
model_display_name: str,
bucket_name: str,
gcs_path: str,
eval_data_path: OutputPath('evals'),
mlpipeline_ui_metadata_path: OutputPath('UI_metadata'),
api_endpoint: str = None,
) -> NamedTuple('Outputs', [
('feat_list', str)]):
import subprocess
import sys
# we could build a base image that includes these libraries if we don't want to do
# the dynamic installation when the step runs.
subprocess.run([sys.executable, '-m', 'pip', 'install', 'googleapis-common-protos==1.6.0',
'--no-warn-script-location'], env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
subprocess.run([sys.executable, '-m', 'pip', 'install', 'google-cloud-automl==0.9.0',
'--no-warn-script-location'], env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
subprocess.run([sys.executable, '-m', 'pip', 'install',
'matplotlib', 'pathlib2', 'google-cloud-storage',
'--no-warn-script-location'], env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
import google
import json
import logging
import pickle
import pathlib2
from google.api_core.client_options import ClientOptions
from google.api_core import exceptions
from google.cloud import automl_v1beta1 as automl
from google.cloud.automl_v1beta1 import enums
from google.cloud import storage
def upload_blob(bucket_name, source_file_name, destination_blob_name,
public_url=False):
"""Uploads a file to the bucket."""
storage_client = storage.Client()
bucket = storage_client.bucket(bucket_name)
blob = bucket.blob(destination_blob_name)
blob.upload_from_filename(source_file_name)
logging.info("File {} uploaded to {}.".format(
source_file_name, destination_blob_name))
if public_url:
blob.make_public()
logging.info("Blob {} is publicly accessible at {}".format(
blob.name, blob.public_url))
return blob.public_url
def get_model_details(client, model_display_name):
try:
model = client.get_model(model_display_name=model_display_name)
except exceptions.NotFound:
logging.info("Model %s not found." % model_display_name)
return (None, None)
model = client.get_model(model_display_name=model_display_name)
# Retrieve deployment state.
if model.deployment_state == enums.Model.DeploymentState.DEPLOYED:
deployment_state = "deployed"
else:
deployment_state = "undeployed"
# get features of top global importance
feat_list = [
(column.feature_importance, column.column_display_name)
for column in model.tables_model_metadata.tables_model_column_info
]
feat_list.sort(reverse=True)
if len(feat_list) < 10:
feat_to_show = len(feat_list)
else:
feat_to_show = 10
# Log some information about the model
logging.info("Model name: {}".format(model.name))
logging.info("Model id: {}".format(model.name.split("/")[-1]))
logging.info("Model display name: {}".format(model.display_name))
logging.info("Features of top importance:")
for feat in feat_list[:feat_to_show]:
logging.info(feat)
logging.info("Model create time:")
logging.info("\tseconds: {}".format(model.create_time.seconds))
logging.info("\tnanos: {}".format(model.create_time.nanos))
logging.info("Model deployment state: {}".format(deployment_state))
generate_fi_ui(feat_list)
return (model, feat_list)
def generate_fi_ui(feat_list):
import matplotlib.pyplot as plt
image_suffix = '{}/gfi.png'.format(gcs_path)
res = list(zip(*feat_list))
x = list(res[0])
y = list(res[1])
y_pos = list(range(len(y)))
plt.figure(figsize=(10, 6))
plt.barh(y_pos, x, alpha=0.5)
plt.yticks(y_pos, y)
plt.savefig('/gfi.png')
public_url = upload_blob(bucket_name, '/gfi.png', image_suffix, public_url=True)
logging.info('using image url {}'.format(public_url))
html_suffix = '{}/gfi.html'.format(gcs_path)
with open('/gfi.html', 'w') as f:
f.write('<html><head></head><body><h1>Global Feature Importance</h1>\n<img src="{}" width="97%"/></body></html>'.format(public_url))
upload_blob(bucket_name, '/gfi.html', html_suffix)
html_source = 'gs://{}/{}'.format(bucket_name, html_suffix)
logging.info('metadata html source: {}'.format(html_source))
metadata = {
'outputs' : [
{
'type': 'web-app',
'storage': 'gcs',
'source': html_source
}]}
logging.info('using metadata dict {}'.format(json.dumps(metadata)))
logging.info('using metadata ui path: {}'.format(mlpipeline_ui_metadata_path))
with open(mlpipeline_ui_metadata_path, 'w') as mlpipeline_ui_metadata_file:
mlpipeline_ui_metadata_file.write(json.dumps(metadata))
logging.getLogger().setLevel(logging.INFO) # TODO: make level configurable
# TODO: we could instead check for region 'eu' and use 'eu-automl.googleapis.com:443'endpoint
# in that case, instead of requiring endpoint to be specified.
if api_endpoint:
client_options = ClientOptions(api_endpoint=api_endpoint)
client = automl.TablesClient(project=gcp_project_id, region=gcp_region,
client_options=client_options)
else:
client = automl.TablesClient(project=gcp_project_id, region=gcp_region)
(model, feat_list) = get_model_details(client, model_display_name)
evals = list(client.list_model_evaluations(model_display_name=model_display_name))
with open('temp_oput_regression', "w") as f:
f.write('Model evals:\n{}'.format(evals))
pstring = pickle.dumps(evals)
# write to eval_data_path
if eval_data_path:
logging.info("eval_data_path: %s", eval_data_path)
try:
pathlib2.Path(eval_data_path).parent.mkdir(parents=True)
except FileExistsError:
pass
pathlib2.Path(eval_data_path).write_bytes(pstring)
feat_list_string = json.dumps(feat_list)
return feat_list_string
if __name__ == '__main__':
import kfp
kfp.components.func_to_container_op(automl_eval_tables_model,
output_component_file='tables_eval_component.yaml', base_image='python:3.7')
================================================
FILE: ml/automl/tables/kfp_e2e/create_model_for_tables/tables_eval_component.yaml
================================================
name: Automl eval tables model
inputs:
- name: gcp_project_id
type: String
- name: gcp_region
type: String
- name: model_display_name
type: String
- name: bucket_name
type: String
- name: gcs_path
type: String
- name: api_endpoint
type: String
optional: true
outputs:
- name: eval_data
type: evals
- name: mlpipeline_ui_metadata
type: UI_metadata
- name: feat_list
type: String
implementation:
container:
image: python:3.7
command:
- python3
- -u
- -c
- |
def _make_parent_dirs_and_return_path(file_path: str):
import os
os.makedirs(os.path.dirname(file_path), exist_ok=True)
return file_path
class OutputPath:
'''When creating component from function, OutputPath should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''
def __init__(self, type=None):
self.type = type
from typing import NamedTuple
def automl_eval_tables_model(
gcp_project_id: str,
gcp_region: str,
model_display_name: str,
bucket_name: str,
gcs_path: str,
eval_data_path: OutputPath('evals'),
mlpipeline_ui_metadata_path: OutputPath('UI_metadata'),
api_endpoint: str = None,
) -> NamedTuple('Outputs', [
('feat_list', str)]):
import subprocess
import sys
# we could build a base image that includes these libraries if we don't want to do
# the dynamic installation when the step runs.
subprocess.run([sys.executable, '-m', 'pip', 'install', 'googleapis-common-protos==1.6.0',
'--no-warn-script-location'], env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
subprocess.run([sys.executable, '-m', 'pip', 'install', 'google-cloud-automl==0.9.0',
'--no-warn-script-location'], env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
subprocess.run([sys.executable, '-m', 'pip', 'install',
'matplotlib', 'pathlib2', 'google-cloud-storage',
'--no-warn-script-location'], env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
import google
import json
import logging
import pickle
import pathlib2
from google.api_core.client_options import ClientOptions
from google.api_core import exceptions
from google.cloud import automl_v1beta1 as automl
from google.cloud.automl_v1beta1 import enums
from google.cloud import storage
def upload_blob(bucket_name, source_file_name, destination_blob_name,
public_url=False):
"""Uploads a file to the bucket."""
storage_client = storage.Client()
bucket = storage_client.bucket(bucket_name)
blob = bucket.blob(destination_blob_name)
blob.upload_from_filename(source_file_name)
logging.info("File {} uploaded to {}.".format(
source_file_name, destination_blob_name))
if public_url:
blob.make_public()
logging.info("Blob {} is publicly accessible at {}".format(
blob.name, blob.public_url))
return blob.public_url
def get_model_details(client, model_display_name):
try:
model = client.get_model(model_display_name=model_display_name)
except exceptions.NotFound:
logging.info("Model %s not found." % model_display_name)
return (None, None)
model = client.get_model(model_display_name=model_display_name)
# Retrieve deployment state.
if model.deployment_state == enums.Model.DeploymentState.DEPLOYED:
deployment_state = "deployed"
else:
deployment_state = "undeployed"
# get features of top global importance
feat_list = [
(column.feature_importance, column.column_display_name)
for column in model.tables_model_metadata.tables_model_column_info
]
feat_list.sort(reverse=True)
if len(feat_list) < 10:
feat_to_show = len(feat_list)
else:
feat_to_show = 10
# Log some information about the model
logging.info("Model name: {}".format(model.name))
logging.info("Model id: {}".format(model.name.split("/")[-1]))
logging.info("Model display name: {}".format(model.display_name))
logging.info("Features of top importance:")
for feat in feat_list[:feat_to_show]:
logging.info(feat)
logging.info("Model create time:")
logging.info("\tseconds: {}".format(model.create_time.seconds))
logging.info("\tnanos: {}".format(model.create_time.nanos))
logging.info("Model deployment state: {}".format(deployment_state))
generate_fi_ui(feat_list)
return (model, feat_list)
def generate_fi_ui(feat_list):
import matplotlib.pyplot as plt
image_suffix = '{}/gfi.png'.format(gcs_path)
res = list(zip(*feat_list))
x = list(res[0])
y = list(res[1])
y_pos = list(range(len(y)))
plt.figure(figsize=(10, 6))
plt.barh(y_pos, x, alpha=0.5)
plt.yticks(y_pos, y)
plt.savefig('/gfi.png')
public_url = upload_blob(bucket_name, '/gfi.png', image_suffix, public_url=True)
logging.info('using image url {}'.format(public_url))
html_suffix = '{}/gfi.html'.format(gcs_path)
with open('/gfi.html', 'w') as f:
f.write('<html><head></head><body><h1>Global Feature Importance</h1>\n<img src="{}" width="97%"/></body></html>'.format(public_url))
upload_blob(bucket_name, '/gfi.html', html_suffix)
html_source = 'gs://{}/{}'.format(bucket_name, html_suffix)
logging.info('metadata html source: {}'.format(html_source))
metadata = {
'outputs' : [
{
'type': 'web-app',
'storage': 'gcs',
'source': html_source
}]}
logging.info('using metadata dict {}'.format(json.dumps(metadata)))
logging.info('using metadata ui path: {}'.format(mlpipeline_ui_metadata_path))
with open(mlpipeline_ui_metadata_path, 'w') as mlpipeline_ui_metadata_file:
mlpipeline_ui_metadata_file.write(json.dumps(metadata))
logging.getLogger().setLevel(logging.INFO) # TODO: make level configurable
# TODO: we could instead check for region 'eu' and use 'eu-automl.googleapis.com:443'endpoint
# in that case, instead of requiring endpoint to be specified.
if api_endpoint:
client_options = ClientOptions(api_endpoint=api_endpoint)
client = automl.TablesClient(project=gcp_project_id, region=gcp_region,
client_options=client_options)
else:
client = automl.TablesClient(project=gcp_project_id, region=gcp_region)
(model, feat_list) = get_model_details(client, model_display_name)
evals = list(client.list_model_evaluations(model_display_name=model_display_name))
with open('temp_oput_regression', "w") as f:
f.write('Model evals:\n{}'.format(evals))
pstring = pickle.dumps(evals)
# write to eval_data_path
if eval_data_path:
logging.info("eval_data_path: %s", eval_data_path)
try:
pathlib2.Path(eval_data_path).parent.mkdir(parents=True)
except FileExistsError:
pass
pathlib2.Path(eval_data_path).write_bytes(pstring)
feat_list_string = json.dumps(feat_list)
return feat_list_string
def _serialize_str(str_value: str) -> str:
if not isinstance(str_value, str):
raise TypeError('Value "{}" has type "{}" instead of str.'.format(str(str_value), str(type(str_value))))
return str_value
import argparse
_parser = argparse.ArgumentParser(prog='Automl eval tables model', description='')
_parser.add_argument("--gcp-project-id", dest="gcp_project_id", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--gcp-region", dest="gcp_region", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--model-display-name", dest="model_display_name", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--bucket-name", dest="bucket_name", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--gcs-path", dest="gcs_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--api-endpoint", dest="api_endpoint", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--eval-data", dest="eval_data_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--mlpipeline-ui-metadata", dest="mlpipeline_ui_metadata_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parser.add_argument("----output-paths", dest="_output_paths", type=str, nargs=1)
_parsed_args = vars(_parser.parse_args())
_output_files = _parsed_args.pop("_output_paths", [])
_outputs = automl_eval_tables_model(**_parsed_args)
if not hasattr(_outputs, '__getitem__') or isinstance(_outputs, str):
_outputs = [_outputs]
_output_serializers = [
_serialize_str,
]
import os
for idx, output_file in enumerate(_output_files):
try:
os.makedirs(os.path.dirname(output_file))
except OSError:
pass
with open(output_file, 'w') as f:
f.write(_output_serializers[idx](_outputs[idx]))
args:
- --gcp-project-id
- inputValue: gcp_project_id
- --gcp-region
- inputValue: gcp_region
- --model-display-name
- inputValue: model_display_name
- --bucket-name
- inputValue: bucket_name
- --gcs-path
- inputValue: gcs_path
- if:
cond:
isPresent: api_endpoint
then:
- --api-endpoint
- inputValue: api_endpoint
- --eval-data
- outputPath: eval_data
- --mlpipeline-ui-metadata
- outputPath: mlpipeline_ui_metadata
- '----output-paths'
- outputPath: feat_list
================================================
FILE: ml/automl/tables/kfp_e2e/create_model_for_tables/tables_eval_metrics_component.py
================================================
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import NamedTuple
from kfp.components import InputPath, OutputPath
# An example of how the model eval info could be used to make decisions about whether or not
# to deploy the model.
def automl_eval_metrics(
eval_data_path: InputPath('evals'),
mlpipeline_ui_metadata_path: OutputPath('UI_metadata'),
mlpipeline_metrics_path: OutputPath('UI_metrics'),
# thresholds: str = '{"au_prc": 0.9}',
thresholds: str = '{"mean_absolute_error": 460}',
confidence_threshold: float = 0.5 # for classification
) -> NamedTuple('Outputs', [('deploy', str)]):
import subprocess
import sys
subprocess.run([sys.executable, '-m', 'pip', 'install', 'googleapis-common-protos==1.6.0',
'--no-warn-script-location'], env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
subprocess.run([sys.executable, '-m', 'pip', 'install', 'google-cloud-automl==0.9.0',
'google-cloud-storage',
'--no-warn-script-location'], env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
import google
import json
import logging
import pickle
logging.getLogger().setLevel(logging.INFO) # TODO: make level configurable
thresholds_dict = json.loads(thresholds)
logging.info('thresholds dict: {}'.format(thresholds_dict))
def regression_threshold_check(eval_info):
eresults = {}
rmetrics = eval_info[1].regression_evaluation_metrics
logging.info('got regression eval {}'.format(eval_info[1]))
eresults['root_mean_squared_error'] = rmetrics.root_mean_squared_error
eresults['mean_absolute_error'] = rmetrics.mean_absolute_error
eresults['r_squared'] = rmetrics.r_squared
eresults['mean_absolute_percentage_error'] = rmetrics.mean_absolute_percentage_error
eresults['root_mean_squared_log_error'] = rmetrics.root_mean_squared_log_error
for k, v in thresholds_dict.items():
logging.info('k {}, v {}'.format(k, v))
if k in ['root_mean_squared_error', 'mean_absolute_error', 'mean_absolute_percentage_error']:
if eresults[k] > v:
logging.info('{} > {}; returning False'.format(
eresults[k], v))
return ('False', eresults)
elif eresults[k] < v:
logging.info('{} < {}; returning False'.format(
eresults[k], v))
return ('False', eresults)
return ('deploy', eresults)
def classif_threshold_check(eval_info):
eresults = {}
example_count = eval_info[0].evaluated_example_count
print('Looking for example_count {}'.format(example_count))
for e in eval_info[1:]: # we know we don't want the first elt
if e.evaluated_example_count == example_count:
eresults['au_prc'] = e.classification_evaluation_metrics.au_prc
eresults['au_roc'] = e.classification_evaluation_metrics.au_roc
eresults['log_loss'] = e.classification_evaluation_metrics.log_loss
for i in e.classification_evaluation_metrics.confidence_metrics_entry:
if i.confidence_threshold >= confidence_threshold:
eresults['recall'] = i.recall
eresults['precision'] = i.precision
eresults['f1_score'] = i.f1_score
break
break
logging.info('eresults: {}'.format(eresults))
for k, v in thresholds_dict.items():
logging.info('k {}, v {}'.format(k, v))
if k == 'log_loss':
if eresults[k] > v:
logging.info('{} > {}; returning False'.format(
eresults[k], v))
return ('False', eresults)
else:
if eresults[k] < v:
logging.info('{} < {}; returning False'.format(
eresults[k], v))
return ('False', eresults)
return ('deploy', eresults)
with open(eval_data_path, 'rb') as f:
logging.info('successfully opened eval_data_path {}'.format(eval_data_path))
try:
eval_info = pickle.loads(f.read())
classif = False
regression = False
# TODO: what's the right way to figure out the model type?
if eval_info[1].regression_evaluation_metrics and eval_info[1].regression_evaluation_metrics.root_mean_squared_error:
regression=True
logging.info('found regression metrics {}'.format(
eval_info[1].regression_evaluation_metrics))
elif eval_info[1].classification_evaluation_metrics and eval_info[1].classification_evaluation_metrics.au_prc:
classif = True
logging.info('found classification metrics {}'.format(
eval_info[1].classification_evaluation_metrics))
if regression and thresholds_dict:
res, eresults = regression_threshold_check(eval_info)
# logging.info('eresults: {}'.format(eresults))
metadata = {
'outputs' : [
{
'storage': 'inline',
'source': '# Regression metrics:\n\n```{}```\n'.format(eresults),
'type': 'markdown',
}]}
metrics = {
'metrics': [{
'name': 'mae',
'numberValue': eresults['mean_absolute_error'],
'format': "RAW",
}]
}
logging.info('using metadata dict {}'.format(json.dumps(metadata)))
logging.info('using metadata ui path: {}'.format(mlpipeline_ui_metadata_path))
with open(mlpipeline_ui_metadata_path, 'w') as mlpipeline_ui_metadata_file:
mlpipeline_ui_metadata_file.write(json.dumps(metadata))
logging.info('using metrics path: {}'.format(mlpipeline_metrics_path))
with open(mlpipeline_metrics_path, 'w') as mlpipeline_metrics_file:
mlpipeline_metrics_file.write(json.dumps(metrics))
logging.info('deploy flag: {}'.format(res))
return (res,)
if classif and thresholds_dict:
res, eresults = classif_threshold_check(eval_info)
# logging.info('eresults: {}'.format(eresults))
metadata = {
'outputs' : [
{
'storage': 'inline',
'source': '# classification metrics for confidence threshold {}:\n\n```{}```\n'.format(
confidence_threshold, eresults),
'type': 'markdown',
}]}
logging.info('using metadata dict {}'.format(json.dumps(metadata)))
logging.info('using metadata ui path: {}'.format(mlpipeline_ui_metadata_path))
with open(mlpipeline_ui_metadata_path, 'w') as mlpipeline_ui_metadata_file:
mlpipeline_ui_metadata_file.write(json.dumps(metadata))
logging.info('deploy flag: {}'.format(res))
return (res,)
return ('deploy',)
except Exception as e:
logging.warning(e)
# If can't reconstruct the eval, or don't have thresholds defined,
# return True as a signal to deploy.
# TODO: is this the right default?
return ('deploy',)
if __name__ == '__main__':
import kfp
kfp.components.func_to_container_op(automl_eval_metrics,
output_component_file='tables_eval_metrics_component.yaml', base_image='python:3.7')
================================================
FILE: ml/automl/tables/kfp_e2e/create_model_for_tables/tables_eval_metrics_component.yaml
================================================
name: Automl eval metrics
inputs:
- name: eval_data
type: evals
- name: thresholds
type: String
default: '{"mean_absolute_error": 460}'
optional: true
- name: confidence_threshold
type: Float
default: '0.5'
optional: true
outputs:
- name: mlpipeline_ui_metadata
type: UI_metadata
- name: mlpipeline_metrics
type: UI_metrics
- name: deploy
type: String
implementation:
container:
image: python:3.7
command:
- python3
- -u
- -c
- |
def _make_parent_dirs_and_return_path(file_path: str):
import os
os.makedirs(os.path.dirname(file_path), exist_ok=True)
return file_path
def automl_eval_metrics(
eval_data_path ,
mlpipeline_ui_metadata_path ,
mlpipeline_metrics_path ,
# thresholds: str = '{"au_prc": 0.9}',
thresholds = '{"mean_absolute_error": 460}',
confidence_threshold = 0.5 # for classification
) :
import subprocess
import sys
subprocess.run([sys.executable, '-m', 'pip', 'install', 'googleapis-common-protos==1.6.0',
'--no-warn-script-location'], env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
subprocess.run([sys.executable, '-m', 'pip', 'install', 'google-cloud-automl==0.9.0',
'google-cloud-storage',
'--no-warn-script-location'], env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
import google
import json
import logging
import pickle
logging.getLogger().setLevel(logging.INFO) # TODO: make level configurable
thresholds_dict = json.loads(thresholds)
logging.info('thresholds dict: {}'.format(thresholds_dict))
def regression_threshold_check(eval_info):
eresults = {}
rmetrics = eval_info[1].regression_evaluation_metrics
logging.info('got regression eval {}'.format(eval_info[1]))
eresults['root_mean_squared_error'] = rmetrics.root_mean_squared_error
eresults['mean_absolute_error'] = rmetrics.mean_absolute_error
eresults['r_squared'] = rmetrics.r_squared
eresults['mean_absolute_percentage_error'] = rmetrics.mean_absolute_percentage_error
eresults['root_mean_squared_log_error'] = rmetrics.root_mean_squared_log_error
for k, v in thresholds_dict.items():
logging.info('k {}, v {}'.format(k, v))
if k in ['root_mean_squared_error', 'mean_absolute_error', 'mean_absolute_percentage_error']:
if eresults[k] > v:
logging.info('{} > {}; returning False'.format(
eresults[k], v))
return ('False', eresults)
elif eresults[k] < v:
logging.info('{} < {}; returning False'.format(
eresults[k], v))
return ('False', eresults)
return ('deploy', eresults)
def classif_threshold_check(eval_info):
eresults = {}
example_count = eval_info[0].evaluated_example_count
print('Looking for example_count {}'.format(example_count))
for e in eval_info[1:]: # we know we don't want the first elt
if e.evaluated_example_count == example_count:
eresults['au_prc'] = e.classification_evaluation_metrics.au_prc
eresults['au_roc'] = e.classification_evaluation_metrics.au_roc
eresults['log_loss'] = e.classification_evaluation_metrics.log_loss
for i in e.classification_evaluation_metrics.confidence_metrics_entry:
if i.confidence_threshold >= confidence_threshold:
eresults['recall'] = i.recall
eresults['precision'] = i.precision
eresults['f1_score'] = i.f1_score
break
break
logging.info('eresults: {}'.format(eresults))
for k, v in thresholds_dict.items():
logging.info('k {}, v {}'.format(k, v))
if k == 'log_loss':
if eresults[k] > v:
logging.info('{} > {}; returning False'.format(
eresults[k], v))
return ('False', eresults)
else:
if eresults[k] < v:
logging.info('{} < {}; returning False'.format(
eresults[k], v))
return ('False', eresults)
return ('deploy', eresults)
with open(eval_data_path, 'rb') as f:
logging.info('successfully opened eval_data_path {}'.format(eval_data_path))
try:
eval_info = pickle.loads(f.read())
classif = False
regression = False
# TODO: what's the right way to figure out the model type?
if eval_info[1].regression_evaluation_metrics and eval_info[1].regression_evaluation_metrics.root_mean_squared_error:
regression=True
logging.info('found regression metrics {}'.format(
eval_info[1].regression_evaluation_metrics))
elif eval_info[1].classification_evaluation_metrics and eval_info[1].classification_evaluation_metrics.au_prc:
classif = True
logging.info('found classification metrics {}'.format(
eval_info[1].classification_evaluation_metrics))
if regression and thresholds_dict:
res, eresults = regression_threshold_check(eval_info)
# logging.info('eresults: {}'.format(eresults))
metadata = {
'outputs' : [
{
'storage': 'inline',
'source': '# Regression metrics:\n\n```{}```\n'.format(eresults),
'type': 'markdown',
}]}
metrics = {
'metrics': [{
'name': 'mae',
'numberValue': eresults['mean_absolute_error'],
'format': "RAW",
}]
}
logging.info('using metadata dict {}'.format(json.dumps(metadata)))
logging.info('using metadata ui path: {}'.format(mlpipeline_ui_metadata_path))
with open(mlpipeline_ui_metadata_path, 'w') as mlpipeline_ui_metadata_file:
mlpipeline_ui_metadata_file.write(json.dumps(metadata))
logging.info('using metrics path: {}'.format(mlpipeline_metrics_path))
with open(mlpipeline_metrics_path, 'w') as mlpipeline_metrics_file:
mlpipeline_metrics_file.write(json.dumps(metrics))
logging.info('deploy flag: {}'.format(res))
return (res,)
if classif and thresholds_dict:
res, eresults = classif_threshold_check(eval_info)
# logging.info('eresults: {}'.format(eresults))
metadata = {
'outputs' : [
{
'storage': 'inline',
'source': '# classification metrics for confidence threshold {}:\n\n```{}```\n'.format(
confidence_threshold, eresults),
'type': 'markdown',
}]}
logging.info('using metadata dict {}'.format(json.dumps(metadata)))
logging.info('using metadata ui path: {}'.format(mlpipeline_ui_metadata_path))
with open(mlpipeline_ui_metadata_path, 'w') as mlpipeline_ui_metadata_file:
mlpipeline_ui_metadata_file.write(json.dumps(metadata))
logging.info('deploy flag: {}'.format(res))
return (res,)
return ('deploy',)
except Exception as e:
logging.warning(e)
# If can't reconstruct the eval, or don't have thresholds defined,
# return True as a signal to deploy.
# TODO: is this the right default?
return ('deploy',)
def _serialize_str(str_value: str) -> str:
if not isinstance(str_value, str):
raise TypeError('Value "{}" has type "{}" instead of str.'.format(str(str_value), str(type(str_value))))
return str_value
import argparse
_parser = argparse.ArgumentParser(prog='Automl eval metrics', description='')
_parser.add_argument("--eval-data", dest="eval_data_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--thresholds", dest="thresholds", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--confidence-threshold", dest="confidence_threshold", type=float, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--mlpipeline-ui-metadata", dest="mlpipeline_ui_metadata_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--mlpipeline-metrics", dest="mlpipeline_metrics_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parser.add_argument("----output-paths", dest="_output_paths", type=str, nargs=1)
_parsed_args = vars(_parser.parse_args())
_output_files = _parsed_args.pop("_output_paths", [])
_outputs = automl_eval_metrics(**_parsed_args)
_output_serializers = [
_serialize_str,
]
import os
for idx, output_file in enumerate(_output_files):
try:
os.makedirs(os.path.dirname(output_file))
except OSError:
pass
with open(output_file, 'w') as f:
f.write(_output_serializers[idx](_outputs[idx]))
args:
- --eval-data
- inputPath: eval_data
- if:
cond:
isPresent: thresholds
then:
- --thresholds
- inputValue: thresholds
- if:
cond:
isPresent: confidence_threshold
then:
- --confidence-threshold
- inputValue: confidence_threshold
- --mlpipeline-ui-metadata
- outputPath: mlpipeline_ui_metadata
- --mlpipeline-metrics
- outputPath: mlpipeline_metrics
- '----output-paths'
- outputPath: deploy
================================================
FILE: ml/automl/tables/kfp_e2e/deploy_model_for_tables/convert_oss.py
================================================
# Copyright 2019 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# tested with TF1.14
import sys
import tensorflow as tf
from absl import app
from absl import flags
from tensorflow.core.protobuf import saved_model_pb2
from tensorflow.python.summary import summary
FLAGS = flags.FLAGS
flags.DEFINE_string('saved_model', '', 'The location of the saved_model.pb to visualize.')
flags.DEFINE_string('output_dir', '',
'The location for the Tensorboard log to begin visualization from.')
def import_to_tensorboard(saved_model, output_dir):
"""View an imported saved_model.pb as a graph in Tensorboard.
Args:
saved_model: The location of the saved_model.pb to visualize.
output_dir: The location for the Tensorboard log to begin visualization from.
Usage:
Call this function with your model location and desired log directory.
Launch Tensorboard by pointing it to the log directory.
View your imported `.pb` model as a graph.
"""
with open(saved_model, "rb") as f:
sm = saved_model_pb2.SavedModel()
sm.ParseFromString(f.read())
if 1 != len(sm.meta_graphs):
print('More than one graph found. Not sure which to write')
sys.exit(1)
graph_def = sm.meta_graphs[0].graph_def
pb_visual_writer = summary.FileWriter(output_dir)
pb_visual_writer.add_graph(None, graph_def=graph_def)
print("Model Imported. Visualize by running: "
"tensorboard --logdir={}".format(output_dir))
def main(argv):
import_to_tensorboard(FLAGS.saved_model, FLAGS.output_dir)
if __name__ == '__main__':
app.run(main)
================================================
FILE: ml/automl/tables/kfp_e2e/deploy_model_for_tables/exported_model_deploy.py
================================================
# Copyright 2020 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import logging
import subprocess
def main():
parser = argparse.ArgumentParser(description='Serving webapp')
parser.add_argument(
'--model_name',
required=True)
parser.add_argument(
'--image_name',
required=True)
parser.add_argument(
'--namespace',
default='default')
args = parser.parse_args()
NAMESPACE = 'default'
logging.getLogger().setLevel(logging.INFO)
logging.info('Generating training template.')
template_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'model_serve_template.yaml')
target_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'model_serve.yaml')
mname = args.model_name.replace('_', '-')
logging.info("using model name: {}, image {}, and namespace: {}".format(
mname, args.image_name, NAMESPACE))
with open(template_file, 'r') as f:
with open(target_file, "w") as target:
data = f.read()
changed = data.replace('MODEL_NAME', mname).replace(
'IMAGE_NAME', args.image_name).replace('NAMESPACE', NAMESPACE)
target.write(changed)
logging.info('deploying...')
subprocess.call(['kubectl', 'create', '-f', '/ml/model_serve.yaml'])
# kubectl -n default port-forward svc/<mname> 8080:80
# curl -X POST --data @./instances.json http://localhost:8080/predict
if __name__ == "__main__":
main()
================================================
FILE: ml/automl/tables/kfp_e2e/deploy_model_for_tables/instances.json
================================================
{
"instances": [
{
"bike_id": "6179",
"day_of_week": "6",
"end_latitude": 51.50379168,
"end_longitude": -0.11282408,
"end_station_id": "154",
"euclidean": 2513.254047872678,
"loc_cross": "POINT(-0.08 51.52)POINT(-0.11 51.5)",
"max": 56.8,
"min": 50.9,
"prcp": 0,
"ts": 1445624280,
"start_latitude": 51.51615461,
"start_longitude": -0.082422399,
"start_station_id": "217",
"temp": 54,
"dewp": 44
},
{
"bike_id": "5373",
"day_of_week": "3",
"end_latitude": 51.52059681,
"end_longitude": -0.116688468,
"end_station_id": "68",
"euclidean": 1181.215448450556,
"loc_cross": "POINT(-0.13 51.53)POINT(-0.12 51.52)",
"max": 56.7,
"min": 45.9,
"prcp": 0,
"ts": 1494317220,
"start_latitude": 51.52683806,
"start_longitude": -0.130504336,
"start_station_id": "214",
"temp": 50.5,
"dewp": 37.1
},
{
"bike_id": "5373",
"day_of_week": "3",
"end_latitude": 51.52059681,
"end_longitude": -0.116688468,
"end_station_id": "68",
"euclidean": 3589.5146210024977,
"loc_cross": "POINT(-0.07 51.52)POINT(-0.12 51.52)",
"max": 44.6,
"min": 34.0,
"prcp": 0,
"ts": 1480407420,
"start_latitude": 51.52388,
"start_longitude": -0.065076,
"start_station_id": "445",
"temp": 38.2,
"dewp": 28.6
}
]
}
================================================
FILE: ml/automl/tables/kfp_e2e/deploy_model_for_tables/model_serve_template.yaml
================================================
---
apiVersion: v1
kind: Service
metadata:
labels:
app: MODEL_NAME
name: MODEL_NAME
namespace: NAMESPACE
spec:
ports:
- name: model-serving
port: 80
targetPort: "http-server"
selector:
app: MODEL_NAME
type: ClusterIP
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
labels:
app: MODEL_NAME
name: MODEL_NAME-dep
namespace: NAMESPACE
spec:
replicas: 2
template:
metadata:
labels:
app: MODEL_NAME
version: v1
spec:
containers:
- name: MODEL_NAME
image: IMAGE_NAME
imagePullPolicy: Always
livenessProbe:
initialDelaySeconds: 30
periodSeconds: 30
tcpSocket:
port: 8080
ports:
- name: http-server
containerPort: 8080
resources:
limits:
cpu: "4"
memory: 4Gi
requests:
cpu: "1"
memory: 1Gi
================================================
FILE: ml/automl/tables/kfp_e2e/deploy_model_for_tables/tables_deploy_component.py
================================================
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import NamedTuple
def automl_deploy_tables_model(
gcp_project_id: str,
gcp_region: str,
model_display_name: str,
api_endpoint: str = None,
) -> NamedTuple('Outputs', [('model_display_name', str), ('status', str)]):
import subprocess
import sys
subprocess.run([sys.executable, '-m', 'pip', 'install', 'googleapis-common-protos==1.6.0', '--no-warn-script-location'],
env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
subprocess.run([sys.executable, '-m', 'pip', 'install', 'google-cloud-automl==0.9.0', '--quiet', '--no-warn-script-location'],
env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
import google
import logging
from google.api_core.client_options import ClientOptions
from google.api_core import exceptions
from google.cloud import automl_v1beta1 as automl
from google.cloud.automl_v1beta1 import enums
logging.getLogger().setLevel(logging.INFO) # TODO: make level configurable
# TODO: we could instead check for region 'eu' and use 'eu-automl.googleapis.com:443'endpoint
# in that case, instead of requiring endpoint to be specified.
if api_endpoint:
client_options = ClientOptions(api_endpoint=api_endpoint)
client = automl.TablesClient(project=gcp_project_id, region=gcp_region,
client_options=client_options)
else:
client = automl.TablesClient(project=gcp_project_id, region=gcp_region)
try:
model = client.get_model(model_display_name=model_display_name)
if model.deployment_state == enums.Model.DeploymentState.DEPLOYED:
status = 'deployed'
logging.info('Model {} already deployed'.format(model_display_name))
else:
logging.info('Deploying model {}'.format(model_display_name))
response = client.deploy_model(model_display_name=model_display_name)
# synchronous wait
logging.info("Model deployed. {}".format(response.result()))
status = 'deployed'
except exceptions.NotFound as e:
logging.warning(e)
status = 'not_found'
except Exception as e:
logging.warning(e)
status = 'undeployed'
logging.info('Model status: {}'.format(status))
return (model_display_name, status)
if __name__ == '__main__':
import kfp
kfp.components.func_to_container_op(
automl_deploy_tables_model, output_component_file='tables_deploy_component.yaml',
base_image='python:3.7')
================================================
FILE: ml/automl/tables/kfp_e2e/deploy_model_for_tables/tables_deploy_component.yaml
================================================
name: Automl deploy tables model
inputs:
- name: gcp_project_id
type: String
- name: gcp_region
type: String
- name: model_display_name
type: String
- name: api_endpoint
type: String
optional: true
outputs:
- name: model_display_name
type: String
- name: status
type: String
implementation:
container:
image: python:3.7
command:
- python3
- -u
- -c
- |
from typing import NamedTuple
def automl_deploy_tables_model(
gcp_project_id: str,
gcp_region: str,
model_display_name: str,
api_endpoint: str = None,
) -> NamedTuple('Outputs', [('model_display_name', str), ('status', str)]):
import subprocess
import sys
subprocess.run([sys.executable, '-m', 'pip', 'install', 'googleapis-common-protos==1.6.0', '--no-warn-script-location'],
env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
subprocess.run([sys.executable, '-m', 'pip', 'install', 'google-cloud-automl==0.9.0', '--quiet', '--no-warn-script-location'],
env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
import google
import logging
from google.api_core.client_options import ClientOptions
from google.api_core import exceptions
from google.cloud import automl_v1beta1 as automl
from google.cloud.automl_v1beta1 import enums
logging.getLogger().setLevel(logging.INFO) # TODO: make level configurable
# TODO: we could instead check for region 'eu' and use 'eu-automl.googleapis.com:443'endpoint
# in that case, instead of requiring endpoint to be specified.
if api_endpoint:
client_options = ClientOptions(api_endpoint=api_endpoint)
client = automl.TablesClient(project=gcp_project_id, region=gcp_region,
client_options=client_options)
else:
client = automl.TablesClient(project=gcp_project_id, region=gcp_region)
try:
model = client.get_model(model_display_name=model_display_name)
if model.deployment_state == enums.Model.DeploymentState.DEPLOYED:
status = 'deployed'
logging.info('Model {} already deployed'.format(model_display_name))
else:
logging.info('Deploying model {}'.format(model_display_name))
response = client.deploy_model(model_display_name=model_display_name)
# synchronous wait
logging.info("Model deployed. {}".format(response.result()))
status = 'deployed'
except exceptions.NotFound as e:
logging.warning(e)
status = 'not_found'
except Exception as e:
logging.warning(e)
status = 'undeployed'
logging.info('Model status: {}'.format(status))
return (model_display_name, status)
def _serialize_str(str_value: str) -> str:
if not isinstance(str_value, str):
raise TypeError('Value "{}" has type "{}" instead of str.'.format(str(str_value), str(type(str_value))))
return str_value
import argparse
_parser = argparse.ArgumentParser(prog='Automl deploy tables model', description='')
_parser.add_argument("--gcp-project-id", dest="gcp_project_id", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--gcp-region", dest="gcp_region", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--model-display-name", dest="model_display_name", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--api-endpoint", dest="api_endpoint", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("----output-paths", dest="_output_paths", type=str, nargs=2)
_parsed_args = vars(_parser.parse_args())
_output_files = _parsed_args.pop("_output_paths", [])
_outputs = automl_deploy_tables_model(**_parsed_args)
if not hasattr(_outputs, '__getitem__') or isinstance(_outputs, str):
_outputs = [_outputs]
_output_serializers = [
_serialize_str,
_serialize_str,
]
import os
for idx, output_file in enumerate(_output_files):
try:
os.makedirs(os.path.dirname(output_file))
except OSError:
pass
with open(output_file, 'w') as f:
f.write(_output_serializers[idx](_outputs[idx]))
args:
- --gcp-project-id
- inputValue: gcp_project_id
- --gcp-region
- inputValue: gcp_region
- --model-display-name
- inputValue: model_display_name
- if:
cond:
isPresent: api_endpoint
then:
- --api-endpoint
- inputValue: api_endpoint
- '----output-paths'
- outputPath: model_display_name
- outputPath: status
================================================
FILE: ml/automl/tables/kfp_e2e/import_data_from_bigquery/tables_component.py
================================================
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import NamedTuple
def automl_import_data_for_tables(
# dataset_path,
path: str,
gcp_project_id: str,
gcp_region: str,
dataset_display_name: str,
api_endpoint: str = None,
) -> NamedTuple('Outputs', [('dataset_display_name', str)]):
import sys
import subprocess
subprocess.run([sys.executable, '-m', 'pip', 'install', 'googleapis-common-protos==1.6.0',
'--no-warn-script-location'], env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
subprocess.run([sys.executable, '-m', 'pip', 'install', 'google-cloud-automl==0.9.0', '--quiet',
'--no-warn-script-location'], env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
import google
import logging
from google.api_core.client_options import ClientOptions
from google.cloud import automl_v1beta1 as automl
def list_column_specs(client,
dataset_display_name,
filter_=None):
"""List all column specs."""
result = []
# List all the table specs in the dataset
response = client.list_column_specs(
dataset_display_name=dataset_display_name, filter_=filter_)
logging.info("List of column specs:")
for column_spec in response:
# Display the column_spec information.
logging.info("Column spec name: {}".format(column_spec.name))
logging.info("Column spec id: {}".format(column_spec.name.split("/")[-1]))
logging.info("Column spec display name: {}".format(column_spec.display_name))
logging.info("Column spec data type: {}".format(column_spec.data_type))
result.append(column_spec)
return result
logging.getLogger().setLevel(logging.INFO) # TODO: make level configurable
# TODO: we could instead check for region 'eu' and use 'eu-automl.googleapis.com:443'endpoint
# in that case, instead of requiring endpoint to be specified.
if api_endpoint:
client_options = ClientOptions(api_endpoint=api_endpoint)
client = automl.TablesClient(project=gcp_project_id, region=gcp_region,
client_options=client_options)
else:
client = automl.TablesClient(project=gcp_project_id, region=gcp_region)
response = None
if path.startswith('bq'):
response = client.import_data(
dataset_display_name=dataset_display_name, bigquery_input_uri=path
)
else:
# Get the multiple Google Cloud Storage URIs.
input_uris = path.split(",")
response = client.import_data(
dataset_display_name=dataset_display_name,
gcs_input_uris=input_uris)
logging.info("Processing import... This can take a while.")
# synchronous check of operation status.
logging.info("Data imported. {}".format(response.result()))
logging.info("Response metadata: {}".format(response.metadata))
logging.info("Operation name: {}".format(response.operation.name))
# now list the inferred col schema
list_column_specs(client, dataset_display_name)
return dataset_display_name
if __name__ == '__main__':
import kfp
kfp.components.func_to_container_op(automl_import_data_for_tables,
output_component_file='tables_component.yaml', base_image='python:3.7')
================================================
FILE: ml/automl/tables/kfp_e2e/import_data_from_bigquery/tables_component.yaml
================================================
name: Automl import data for tables
inputs:
- name: path
type: String
- name: gcp_project_id
type: String
- name: gcp_region
type: String
- name: dataset_display_name
type: String
- name: api_endpoint
type: String
optional: true
outputs:
- name: dataset_display_name
type: String
implementation:
container:
image: python:3.7
command:
- python3
- -u
- -c
- |
from typing import NamedTuple
def automl_import_data_for_tables(
# dataset_path,
path: str,
gcp_project_id: str,
gcp_region: str,
dataset_display_name: str,
api_endpoint: str = None,
) -> NamedTuple('Outputs', [('dataset_display_name', str)]):
import sys
import subprocess
subprocess.run([sys.executable, '-m', 'pip', 'install', 'googleapis-common-protos==1.6.0',
'--no-warn-script-location'], env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
subprocess.run([sys.executable, '-m', 'pip', 'install', 'google-cloud-automl==0.9.0', '--quiet',
'--no-warn-script-location'], env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
import google
import logging
from google.api_core.client_options import ClientOptions
from google.cloud import automl_v1beta1 as automl
def list_column_specs(client,
dataset_display_name,
filter_=None):
"""List all column specs."""
result = []
# List all the table specs in the dataset
response = client.list_column_specs(
dataset_display_name=dataset_display_name, filter_=filter_)
logging.info("List of column specs:")
for column_spec in response:
# Display the column_spec information.
logging.info("Column spec name: {}".format(column_spec.name))
logging.info("Column spec id: {}".format(column_spec.name.split("/")[-1]))
logging.info("Column spec display name: {}".format(column_spec.display_name))
logging.info("Column spec data type: {}".format(column_spec.data_type))
result.append(column_spec)
return result
logging.getLogger().setLevel(logging.INFO) # TODO: make level configurable
# TODO: we could instead check for region 'eu' and use 'eu-automl.googleapis.com:443'endpoint
# in that case, instead of requiring endpoint to be specified.
if api_endpoint:
client_options = ClientOptions(api_endpoint=api_endpoint)
client = automl.TablesClient(project=gcp_project_id, region=gcp_region,
client_options=client_options)
else:
client = automl.TablesClient(project=gcp_project_id, region=gcp_region)
response = None
if path.startswith('bq'):
response = client.import_data(
dataset_display_name=dataset_display_name, bigquery_input_uri=path
)
else:
# Get the multiple Google Cloud Storage URIs.
input_uris = path.split(",")
response = client.import_data(
dataset_display_name=dataset_display_name,
gcs_input_uris=input_uris)
logging.info("Processing import... This can take a while.")
# synchronous check of operation status.
logging.info("Data imported. {}".format(response.result()))
logging.info("Response metadata: {}".format(response.metadata))
logging.info("Operation name: {}".format(response.operation.name))
# now list the inferred col schema
list_column_specs(client, dataset_display_name)
return dataset_display_name
def _serialize_str(str_value: str) -> str:
if not isinstance(str_value, str):
raise TypeError('Value "{}" has type "{}" instead of str.'.format(str(str_value), str(type(str_value))))
return str_value
import argparse
_parser = argparse.ArgumentParser(prog='Automl import data for tables', description='')
_parser.add_argument("--path", dest="path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--gcp-project-id", dest="gcp_project_id", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--gcp-region", dest="gcp_region", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--dataset-display-name", dest="dataset_display_name", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--api-endpoint", dest="api_endpoint", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("----output-paths", dest="_output_paths", type=str, nargs=1)
_parsed_args = vars(_parser.parse_args())
_output_files = _parsed_args.pop("_output_paths", [])
_outputs = automl_import_data_for_tables(**_parsed_args)
if not hasattr(_outputs, '__getitem__') or isinstance(_outputs, str):
_outputs = [_outputs]
_output_serializers = [
_serialize_str,
]
import os
for idx, output_file in enumerate(_output_files):
try:
os.makedirs(os.path.dirname(output_file))
except OSError:
pass
with open(output_file, 'w') as f:
f.write(_output_serializers[idx](_outputs[idx]))
args:
- --path
- inputValue: path
- --gcp-project-id
- inputValue: gcp_project_id
- --gcp-region
- inputValue: gcp_region
- --dataset-display-name
- inputValue: dataset_display_name
- if:
cond:
isPresent: api_endpoint
then:
- --api-endpoint
- inputValue: api_endpoint
- '----output-paths'
- outputPath: dataset_display_name
================================================
FILE: ml/automl/tables/kfp_e2e/import_data_from_bigquery/tables_schema_component.py
================================================
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import NamedTuple
def automl_set_dataset_schema(
gcp_project_id: str,
gcp_region: str,
display_name: str,
target_col_name: str,
schema_info: str = '{}', # dict with key of col name, value an array with [type, nullable]
time_col_name: str = None,
test_train_col_name: str = None,
api_endpoint: str = None,
) -> NamedTuple('Outputs', [('display_name', str)]):
import sys
import subprocess
subprocess.run([sys.executable, '-m', 'pip', 'install', 'googleapis-common-protos==1.6.0',
'--no-warn-script-location'], env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
subprocess.run([sys.executable, '-m', 'pip', 'install', 'google-cloud-automl==0.9.0',
'--quiet', '--no-warn-script-location'],
env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
import json
import google
import logging
from google.api_core.client_options import ClientOptions
from google.cloud import automl_v1beta1 as automl
def update_column_spec(client,
dataset_display_name,
column_spec_display_name,
type_code,
nullable=None
):
logging.info("Setting {} to type {} and nullable {}".format(
column_spec_display_name, type_code, nullable))
response = client.update_column_spec(
dataset_display_name=dataset_display_name,
column_spec_display_name=column_spec_display_name,
type_code=type_code,
nullable=nullable
)
# synchronous check of operation status.
print("Table spec updated. {}".format(response))
def update_dataset(client,
dataset_display_name,
target_column_spec_name=None,
time_column_spec_name=None,
test_train_column_spec_name=None):
if target_column_spec_name:
response = client.set_target_column(
dataset_display_name=dataset_display_name,
column_spec_display_name=target_column_spec_name
)
print("Target column updated. {}".format(response))
if time_column_spec_name:
response = client.set_time_column(
dataset_display_name=dataset_display_name,
column_spec_display_name=time_column_spec_name
)
print("Time column updated. {}".format(response))
logging.getLogger().setLevel(logging.INFO) # TODO: make level configurable
# TODO: we could instead check for region 'eu' and use 'eu-automl.googleapis.com:443'endpoint
# in that case, instead of requiring endpoint to be specified.
if api_endpoint:
client_options = ClientOptions(api_endpoint=api_endpoint)
client = automl.TablesClient(project=gcp_project_id, region=gcp_region,
client_options=client_options)
else:
client = automl.TablesClient(project=gcp_project_id, region=gcp_region)
schema_dict = json.loads(schema_info)
# Update cols for which the desired schema was not inferred.
if schema_dict:
for k, v in schema_dict.items():
update_column_spec(client, display_name, k, v[0], nullable=v[1])
# Update the dataset with info about the target col, plus optionally info on how to split on
# a time col or a test/train col.
update_dataset(client, display_name,
target_column_spec_name=target_col_name,
time_column_spec_name=time_col_name,
test_train_column_spec_name=test_train_col_name)
return display_name
if __name__ == '__main__':
import kfp
kfp.components.func_to_container_op(automl_set_dataset_schema,
output_component_file='tables_schema_component.yaml', base_image='python:3.7')
================================================
FILE: ml/automl/tables/kfp_e2e/import_data_from_bigquery/tables_schema_component.yaml
================================================
name: Automl set dataset schema
inputs:
- name: gcp_project_id
type: String
- name: gcp_region
type: String
- name: display_name
type: String
- name: target_col_name
type: String
- name: schema_info
type: String
default: '{}'
optional: true
- name: time_col_name
type: String
optional: true
- name: test_train_col_name
type: String
optional: true
- name: api_endpoint
type: String
optional: true
outputs:
- name: display_name
type: String
implementation:
container:
image: python:3.7
command:
- python3
- -u
- -c
- |
from typing import NamedTuple
def automl_set_dataset_schema(
gcp_project_id: str,
gcp_region: str,
display_name: str,
target_col_name: str,
schema_info: str = '{}', # dict with key of col name, value an array with [type, nullable]
time_col_name: str = None,
test_train_col_name: str = None,
api_endpoint: str = None,
) -> NamedTuple('Outputs', [('display_name', str)]):
import sys
import subprocess
subprocess.run([sys.executable, '-m', 'pip', 'install', 'googleapis-common-protos==1.6.0',
'--no-warn-script-location'], env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
subprocess.run([sys.executable, '-m', 'pip', 'install', 'google-cloud-automl==0.9.0',
'--quiet', '--no-warn-script-location'],
env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
import json
import google
import logging
from google.api_core.client_options import ClientOptions
from google.cloud import automl_v1beta1 as automl
def update_column_spec(client,
dataset_display_name,
column_spec_display_name,
type_code,
nullable=None
):
logging.info("Setting {} to type {} and nullable {}".format(
column_spec_display_name, type_code, nullable))
response = client.update_column_spec(
dataset_display_name=dataset_display_name,
column_spec_display_name=column_spec_display_name,
type_code=type_code,
nullable=nullable
)
# synchronous check of operation status.
print("Table spec updated. {}".format(response))
def update_dataset(client,
dataset_display_name,
target_column_spec_name=None,
time_column_spec_name=None,
test_train_column_spec_name=None):
if target_column_spec_name:
response = client.set_target_column(
dataset_display_name=dataset_display_name,
column_spec_display_name=target_column_spec_name
)
print("Target column updated. {}".format(response))
if time_column_spec_name:
response = client.set_time_column(
dataset_display_name=dataset_display_name,
column_spec_display_name=time_column_spec_name
)
print("Time column updated. {}".format(response))
logging.getLogger().setLevel(logging.INFO) # TODO: make level configurable
# TODO: we could instead check for region 'eu' and use 'eu-automl.googleapis.com:443'endpoint
# in that case, instead of requiring endpoint to be specified.
if api_endpoint:
client_options = ClientOptions(api_endpoint=api_endpoint)
client = automl.TablesClient(project=gcp_project_id, region=gcp_region,
client_options=client_options)
else:
client = automl.TablesClient(project=gcp_project_id, region=gcp_region)
schema_dict = json.loads(schema_info)
# Update cols for which the desired schema was not inferred.
if schema_dict:
for k, v in schema_dict.items():
update_column_spec(client, display_name, k, v[0], nullable=v[1])
# Update the dataset with info about the target col, plus optionally info on how to split on
# a time col or a test/train col.
update_dataset(client, display_name,
target_column_spec_name=target_col_name,
time_column_spec_name=time_col_name,
test_train_column_spec_name=test_train_col_name)
return display_name
def _serialize_str(str_value: str) -> str:
if not isinstance(str_value, str):
raise TypeError('Value "{}" has type "{}" instead of str.'.format(str(str_value), str(type(str_value))))
return str_value
import argparse
_parser = argparse.ArgumentParser(prog='Automl set dataset schema', description='')
_parser.add_argument("--gcp-project-id", dest="gcp_project_id", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--gcp-region", dest="gcp_region", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--display-name", dest="display_name", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--target-col-name", dest="target_col_name", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--schema-info", dest="schema_info", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--time-col-name", dest="time_col_name", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--test-train-col-name", dest="test_train_col_name", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--api-endpoint", dest="api_endpoint", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("----output-paths", dest="_output_paths", type=str, nargs=1)
_parsed_args = vars(_parser.parse_args())
_output_files = _parsed_args.pop("_output_paths", [])
_outputs = automl_set_dataset_schema(**_parsed_args)
if not hasattr(_outputs, '__getitem__') or isinstance(_outputs, str):
_outputs = [_outputs]
_output_serializers = [
_serialize_str,
]
import os
for idx, output_file in enumerate(_output_files):
try:
os.makedirs(os.path.dirname(output_file))
except OSError:
pass
with open(output_file, 'w') as f:
f.write(_output_serializers[idx](_outputs[idx]))
args:
- --gcp-project-id
- inputValue: gcp_project_id
- --gcp-region
- inputValue: gcp_region
- --display-name
- inputValue: display_name
- --target-col-name
- inputValue: target_col_name
- if:
cond:
isPresent: schema_info
then:
- --schema-info
- inputValue: schema_info
- if:
cond:
isPresent: time_col_name
then:
- --time-col-name
- inputValue: time_col_name
- if:
cond:
isPresent: test_train_col_name
then:
- --test-train-col-name
- inputValue: test_train_col_name
- if:
cond:
isPresent: api_endpoint
then:
- --api-endpoint
- inputValue: api_endpoint
- '----output-paths'
- outputPath: display_name
================================================
FILE: ml/automl/tables/kfp_e2e/tables_containers/model-service-launcher/Dockerfile
================================================
# Copyright 2020 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
FROM ubuntu:18.04
RUN apt-get update \
&& apt-get install -y python3-pip python3-dev \
&& cd /usr/local/bin \
&& ln -s /usr/bin/python3 python \
&& pip3 install --upgrade pip
RUN apt-get install -y wget unzip git
RUN pip install --upgrade pip
RUN pip install urllib3 certifi retrying
# RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools wget unzip
# RUN pip install pyyaml==3.12 six==1.11.0 requests==2.18.4
RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \
unzip -qq google-cloud-sdk.zip -d tools && \
rm google-cloud-sdk.zip && \
tools/google-cloud-sdk/install.sh --usage-reporting=false \
--path-update=false --bash-completion=false \
--disable-installation-options && \
tools/google-cloud-sdk/bin/gcloud -q components update \
gcloud core gsutil && \
tools/google-cloud-sdk/bin/gcloud -q components install kubectl && \
tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \
touch /tools/google-cloud-sdk/lib/third_party/google.py
ENV PATH $PATH:/tools/google-cloud-sdk/bin
ADD build /ml
ENTRYPOINT ["python", "/ml/exported_model_deploy.py"]
================================================
FILE: ml/automl/tables/kfp_e2e/tables_containers/model-service-launcher/build.sh
================================================
#!/bin/bash -e
# Copyright 2020 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if [ -z "$1" ]
then
PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)")
else
PROJECT_ID=$1
fi
mkdir -p ./build
rsync -arvp "../../deploy_model_for_tables"/ ./build/
docker build -t model-service-launcher .
rm -rf ./build
docker tag model-service-launcher gcr.io/${PROJECT_ID}/model-service-launcher
docker push gcr.io/${PROJECT_ID}/model-service-launcher
================================================
FILE: ml/automl/tables/kfp_e2e/tables_pipeline_caip.py
================================================
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import kfp.dsl as dsl
import kfp.gcp as gcp
import kfp.components as comp
import json
import time
DEFAULT_SCHEMA = json.dumps({"end_station_id": ["CATEGORY", True], "start_station_id": ["CATEGORY", True],
"loc_cross": ["CATEGORY", True], "bike_id": ["CATEGORY", True]})
create_dataset_op = comp.load_component_from_file(
'./create_dataset_for_tables/tables_component.yaml'
)
import_data_op = comp.load_component_from_file(
'./import_data_from_bigquery/tables_component.yaml'
)
set_schema_op = comp.load_component_from_file(
'./import_data_from_bigquery/tables_schema_component.yaml'
)
train_model_op = comp.load_component_from_file(
'./create_model_for_tables/tables_component.yaml')
eval_model_op = comp.load_component_from_file(
'./create_model_for_tables/tables_eval_component.yaml')
eval_metrics_op = comp.load_component_from_file(
'./create_model_for_tables/tables_eval_metrics_component.yaml')
deploy_model_op = comp.load_component_from_file(
'./deploy_model_for_tables/tables_deploy_component.yaml'
)
@dsl.pipeline(
name='AutoML Tables',
description='Demonstrate an AutoML Tables workflow'
)
def automl_tables( #pylint: disable=unused-argument
gcp_project_id: str = 'YOUR_PROJECT_HERE',
gcp_region: str = 'us-central1',
dataset_display_name: str = 'YOUR_DATASET_NAME',
api_endpoint: str = '',
path: str = 'bq://aju-dev-demos.london_bikes_weather.bikes_weather',
target_col_name: str = 'duration',
time_col_name: str = '',
# test_train_col_name: str = '',
# schema dict with col name as key, type as value
schema_info: str = DEFAULT_SCHEMA,
train_budget_milli_node_hours: 'Integer' = 1000,
model_prefix: str = 'bwmodel',
# one of strings: [MAXIMIZE_AU_ROC, MAXIMIZE_AU_PRC, MINIMIZE_LOG_LOSS, MAXIMIZE_RECALL_AT_PRECISION, MAXIMIZE_PRECISION_AT_RECALL, MINIMIZE_RMSE, MINIMIZE_MAE, MINIMIZE_RMSLE]
optimization_objective: str = '', # if not set, will use default
include_column_spec_names: str = '',
exclude_column_spec_names: str = '',
bucket_name: str = 'YOUR_BUCKET_NAME',
thresholds: str = '{"mean_absolute_error": 480}',
):
create_dataset = create_dataset_op(
gcp_project_id=gcp_project_id,
gcp_region=gcp_region,
dataset_display_name=dataset_display_name,
api_endpoint=api_endpoint,
)
import_data = import_data_op(
gcp_project_id=gcp_project_id,
gcp_region=gcp_region,
dataset_display_name=dataset_display_name,
api_endpoint=api_endpoint,
path=path
)
set_schema = set_schema_op(
gcp_project_id=gcp_project_id,
gcp_region=gcp_region,
display_name=dataset_display_name,
api_endpoint=api_endpoint,
target_col_name=target_col_name,
schema_info=schema_info,
time_col_name=time_col_name
# test_train_col_name=test_train_col_name
)
import_data.after(create_dataset)
set_schema.after(import_data)
train_model = train_model_op(
gcp_project_id=gcp_project_id,
gcp_region=gcp_region,
dataset_display_name=dataset_display_name,
api_endpoint=api_endpoint,
model_prefix=model_prefix,
train_budget_milli_node_hours=train_budget_milli_node_hours,
optimization_objective=optimization_objective
)
train_model.after(set_schema)
eval_model = eval_model_op(
gcp_project_id=gcp_project_id,
gcp_region=gcp_region,
bucket_name=bucket_name,
# gcs_path='automl_evals/{}'.format(dsl.RUN_ID_PLACEHOLDER),
gcs_path='automl_evals/{}'.format(train_model.outputs['model_display_name']),
api_endpoint=api_endpoint,
model_display_name=train_model.outputs['model_display_name']
)
eval_metrics = eval_metrics_op(
thresholds=thresholds,
eval_data=eval_model.outputs['eval_data'],
)
with dsl.Condition(eval_metrics.outputs['deploy'] == 'deploy'):
deploy_model = deploy_model_op(
gcp_project_id=gcp_project_id,
gcp_region=gcp_region,
api_endpoint=api_endpoint,
model_display_name=train_model.outputs['model_display_name'],
)
if __name__ == '__main__':
import kfp.compiler as compiler
compiler.Compiler().compile(automl_tables, __file__ + '.tar.gz')
================================================
FILE: ml/automl/tables/kfp_e2e/tables_pipeline_kf.py
================================================
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import kfp.dsl as dsl
import kfp.gcp as gcp
import kfp.components as comp
import json
import time
DEFAULT_SCHEMA = json.dumps({"end_station_id": ["CATEGORY", True], "start_station_id": ["CATEGORY", True],
"loc_cross": ["CATEGORY", True], "bike_id": ["CATEGORY", True]})
create_dataset_op = comp.load_component_from_file(
'./create_dataset_for_tables/tables_component.yaml'
)
import_data_op = comp.load_component_from_file(
'./import_data_from_bigquery/tables_component.yaml'
)
set_schema_op = comp.load_component_from_file(
'./import_data_from_bigquery/tables_schema_component.yaml'
)
train_model_op = comp.load_component_from_file(
'./create_model_for_tables/tables_component.yaml')
eval_model_op = comp.load_component_from_file(
'./create_model_for_tables/tables_eval_component.yaml')
eval_metrics_op = comp.load_component_from_file(
'./create_model_for_tables/tables_eval_metrics_component.yaml')
deploy_model_op = comp.load_component_from_file(
'./deploy_model_for_tables/tables_deploy_component.yaml'
)
@dsl.pipeline(
name='AutoML Tables',
description='Demonstrate an AutoML Tables workflow'
)
def automl_tables( #pylint: disable=unused-argument
gcp_project_id: str = 'YOUR_PROJECT_HERE',
gcp_region: str = 'us-central1',
dataset_display_name: str = 'YOUR_DATASET_NAME',
api_endpoint: str = '',
path: str = 'bq://aju-dev-demos.london_bikes_weather.bikes_weather',
target_col_name: str = 'duration',
time_col_name: str = '',
# test_train_col_name: str = '',
# schema dict with col name as key, type as value
schema_info: str = DEFAULT_SCHEMA,
train_budget_milli_node_hours: 'Integer' = 1000,
model_prefix: str = 'bwmodel',
# one of strings: [MAXIMIZE_AU_ROC, MAXIMIZE_AU_PRC, MINIMIZE_LOG_LOSS, MAXIMIZE_RECALL_AT_PRECISION, MAXIMIZE_PRECISION_AT_RECALL, MINIMIZE_RMSE, MINIMIZE_MAE, MINIMIZE_RMSLE]
optimization_objective: str = '', # if not set, will use default
include_column_spec_names: str = '',
exclude_column_spec_names: str = '',
bucket_name: str = 'YOUR_BUCKET_NAME',
thresholds: str = '{"mean_absolute_error": 480}',
):
create_dataset = create_dataset_op(
gcp_project_id=gcp_project_id,
gcp_region=gcp_region,
dataset_display_name=dataset_display_name,
api_endpoint=api_endpoint,
).apply(gcp.use_gcp_secret('user-gcp-sa'))
import_data = import_data_op(
gcp_project_id=gcp_project_id,
gcp_region=gcp_region,
dataset_display_name=dataset_display_name,
api_endpoint=api_endpoint,
path=path
).apply(gcp.use_gcp_secret('user-gcp-sa'))
set_schema = set_schema_op(
gcp_project_id=gcp_project_id,
gcp_region=gcp_region,
display_name=dataset_display_name,
api_endpoint=api_endpoint,
target_col_name=target_col_name,
schema_info=schema_info,
time_col_name=time_col_name
# test_train_col_name=test_train_col_name
).apply(gcp.use_gcp_secret('user-gcp-sa'))
import_data.after(create_dataset)
set_schema.after(import_data)
train_model = train_model_op(
gcp_project_id=gcp_project_id,
gcp_region=gcp_region,
dataset_display_name=dataset_display_name,
api_endpoint=api_endpoint,
model_prefix=model_prefix,
train_budget_milli_node_hours=train_budget_milli_node_hours,
optimization_objective=optimization_objective
).apply(gcp.use_gcp_secret('user-gcp-sa'))
train_model.after(set_schema)
eval_model = eval_model_op(
gcp_project_id=gcp_project_id,
gcp_region=gcp_region,
bucket_name=bucket_name,
gcs_path='automl_evals/{}'.format(train_model.outputs['model_display_name']),
api_endpoint=api_endpoint,
model_display_name=train_model.outputs['model_display_name']
).apply(gcp.use_gcp_secret('user-gcp-sa'))
eval_metrics = eval_metrics_op(
thresholds=thresholds,
eval_data=eval_model.outputs['eval_data'],
).apply(gcp.use_gcp_secret('user-gcp-sa'))
with dsl.Condition(eval_metrics.outputs['deploy'] == 'deploy'):
deploy_model = deploy_model_op(
gcp_project_id=gcp_project_id,
gcp_region=gcp_region,
api_endpoint=api_endpoint,
model_display_name=train_model.outputs['model_display_name'],
).apply(gcp.use_gcp_secret('user-gcp-sa'))
if __name__ == '__main__':
import kfp.compiler as compiler
compiler.Compiler().compile(automl_tables, __file__ + '.tar.gz')
================================================
FILE: ml/automl/tables/model_export/Dockerfile.template
================================================
# Copyright 2019 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
FROM gcr.io/cloud-automl-tables-public/model_server
ADD model-export/tbl/YOUR_RENAMED_DIRECTORY /models/default/0000001
================================================
FILE: ml/automl/tables/model_export/automl_tables_model_export_cloud_run.md
================================================
# AutoML Tables: Exporting and serving your trained model to Cloud Run
## Introduction
Google Cloud’s [AutoML Tables][1] lets you automatically build and deploy state-of-the-art machine learning models using your own structured data.
Recently, Tables launched a feature to let you [export][2] your full custom model, packaged such that you can serve it via a Docker container. This lets you serve your models anywhere that you can run a container.
In this example, we'll show how you can package an exported Tables model to serve on [Cloud Run][3]. With Cloud Run, your model serving automatically scales out with traffic, and scales to 0 when it’s not being used. We’ll also show how you can examine your trained custom model in [TensorBoard][4].
For the following steps, the [documentation][5] provides more detail.
We'll use the [Cloud Console UI][6], but all of these steps could also be accomplished by accessing the API via the command line or using the [AutoML Tables client libraries][7].
## Create a Dataset and edit its schema
The first step in training a Tables model is to create a *dataset*, using your data.
For this example, we'll use data that is essentially a join of the [London Bikes][8] and [NOAA weather][9] public datasets in BigQuery, with some additional processing to derive additional GIS and day-of-week fields. (If you like, you can follow along with your own tabular dataset instead, but you will need to construct your own prediction instances as well).
Visit the [Tables page][10] in the Cloud Console, and enable the API as necessary.
<figure>
<a href="https://storage.googleapis.com/amy-jo/images/automl/tables_export/enable_api.png" target="_blank"><img src="https://storage.googleapis.com/amy-jo/images/automl/tables_export/enable_api.png" width="40%"/></a>
<figcaption><br/><i>Enable the AutoML Tables API.</i></figcaption>
</figure>
Then, create a new Tables *dataset*.
<figure>
<a href="https://storage.googleapis.com/amy-jo/images/automl/tables_export/create_dataset.png" target="_blank"><img src="https://storage.googleapis.com/amy-jo/images/automl/tables_export/create_dataset.png" width="50%"/></a>
<figcaption><br/><i>Create a new Tables dataset.</i></figcaption>
</figure>
Import your data into the dataset. To ingest the example data, select "Import data from BigQuery". Then, as shown in the figure below, use `aju-dev-demos` as the BigQuery Project ID, `london_bikes_weather` as the dataset ID, and `bikes_weather` as the table name.
<figure>
<a href="https://storage.googleapis.com/amy-jo/images/automl/tables_export/import_data.png" target="_blank"><img src="https://storage.googleapis.com/amy-jo/images/automl/tables_export/import_data.png" width="60%"/></a>
<figcaption><br/><i>Import the <code>bikes_weather</code> BigQuery table into the dataset.</i></figcaption>
</figure>
### Edit the dataset’s schema
Once the import is complete, edit the dataset schema. We'll need to change a few of the inferred types. Make sure your schema reflects that in the figure below. In particular, change `bike_id`, `end_station_id`, `start_station_id`, and `loc_cross` to be of type *Categorical*. (Note that useful stats are generated for the columns).
Then, we'll set `duration` as the _target_ column.
<figure>
<a href="https://storage.googleapis.com/amy-jo/images/automl/tables_export/schema.png" target="_blank"><img src="https://storage.googleapis.com/amy-jo/images/automl/tables_export/schema.png" width="90%"/></a>
<figcaption><br/><i>Adjust the dataset schema.</i></figcaption>
</figure>
## Train and export your Tables model
Now you're ready to train a model on that dataset. After the model is trained, you will be able not only to deploy it to the Cloud AI Platform, where you can access it via API, but also can *export* it to run in a container.
### Train the model
We'll train a model to predict ride `duration` given all the other dataset inputs. So, we'll be training a [regression][11] model.
For this example, enter a training budget of 1 hours, and include all the other feature columns.
<figure>
<a href="https://storage.googleapis.com/amy-jo/images/automl/tables_export/train.png" target="_blank"><img src="https://storage.googleapis.com/amy-jo/images/automl/tables_export/train.png" width="50%"/></a>
<figcaption><br/><i>Train a model to predict ride <code>duration</code>.</i></figcaption>
</figure>
### Export the trained model
Once the model is trained, we'll export the result, so that it can be served from any environment in which you can run a container. (Note that you could also [deploy][12] your model to the Cloud AI Platform for online prediction).
You'll find the export option under **TEST & USE**. (See the [documentation][13] for detail on the export process).
You'll need to create a *regional* GCS bucket, in the same region as your model. You also might want to create a sub-folder for the model export in the GCS bucket, so that if you have multiple exports, you can keep track of . An easy way to create the folder is via the web UI. Here, I've created a `model_export_1` sub-folder.
Click the "Container" card to export your trained model to be run from a Docker container.
<figure>
<a href="https://storage.googleapis.com/amy-jo/images/automl/tables_export/export1.png" target="_blank"><img src="https://storage.googleapis.com/amy-jo/images/automl/tables_export/export1.png" width="60%"/></a>
<figcaption><br/><i>Click the "Container" card to export your trained model to be run from a Docker container.</i></figcaption>
</figure>
Browse to select the GCS folder into which you want to export your model, then click the **EXPORT** button.
<figure>
<a href="https://storage.googleapis.com/amy-jo/images/automl/tables_export/export2-2.png" target="_blank"><img src="https://storage.googleapis.com/amy-jo/images/automl/tables_export/export2-2.png" width="60%"/></a>
<figcaption><br/><i>Browse to the GCS folder into which you want to export your model.</i></figcaption>
</figure>
When the export is finished, create a local directory to hold your model, e.g. something like `bikes_weather`.
Copy the download command in the cloud console, which will look something like the following:
`gsutil cp -r gs://<your-bucket>/model_export_1//* ./download_dir`
Edit this command: add quotes around the `gs` URI, and remove one of the end slashes. Then edit `download_dir` to point to the directory you created. The result should look something like the following. Run it from the parent directory of your `bikes_weather` directory:
```sh
gsutil cp -r 'gs://<your-bucket>/model_export_1/*' ./bikes_weather
```
The exported model will be copied to `./bikes_weather`.
**Note**: to run `gsutil`, you will need [`gcloud`][14] installed. You can run these commands from the [Cloud Shell][15] instead of your local machine if you don't want to install the SDK locally.
## Test your exported model locally
Once you've downloaded your model, you can run and test it locally. This provides a good sanity check before deploying to Cloud Run.
The process is described in the [documentation][16]; we'll summarize here.
- change to the `bikes_weather` directory (or whatever you named it). You should see a `model_export` subdirectory, the result of your download.
- rename the subdirectory as described in the [documentation][17], to remove the timestamp suffix.
Then, create and run a container to serve your new trained model. Edit the following to point to your renamed directory path:
```sh
docker run -v `pwd`/model-export/tbl/<your_renamed_directory>:/models/default/0000001 -p 8080:8080 -it gcr.io/cloud-automl-tables-public/model_server
```
This starts up a model server to which you can send requests. Note that we're using the `gcr.io/cloud-automl-tables-public/model_server` container image and mounting our local directory.
Next, [download this `instances.json`][18] file. If you take a look at it, you can see that it holds data for three prediction instances.
From the directory where you placed `instances.json`, run:
```sh
curl -X POST --data @instances.json http://localhost:8080/predict
```
It may take a second or two for the first request to return, but subsequent requests will be faster. You’ll get back predictions for all of the instances in the `json` file.
The actual duration for the third instance is 1200.
## View information about your exported model in TensorBoard
You can view your exported custom model in [TensorBoard][19]. This requires a conversion step.
You will need to have TensorFlow 1.14 or 1.15 installed to run the the conversion script.
Then, download [this script][20], e.g. via `curl -O https://raw.githubusercontent.com/amygdala/code-snippets/master/ml/automl/tables/model_export/convert_oss.py`, to the parent directory of `model_export`. Create a directory for the output (e.g. `converted_export`), then run the script as follows:
```sh
mkdir converted_export
python ./convert_oss.py --saved_model ./model-export/tbl/<your_renamed_directory>/saved_model.pb --output_dir converted_export
```
Then, point TensorBoard to the converted model graph:
```sh
tensorboard --logdir=converted_export
```
You will see a rendering of the model graph, and can pan and zoom to view model sub-graphs in more detail.
<figure>
<a href="https://storage.googleapis.com/amy-jo/images/automl/tables_export/tb1.png" target="_blank"><img src="https://storage.googleapis.com/amy-jo/images/automl/tables_export/tb1.png" /></a>
<figcaption><br/><i>You can view an exported custom Tables model in Tensorboard.</i></figcaption>
</figure>
<figure>
<a href="https://storage.googleapis.com/amy-jo/images/automl/tables_export/tb2.png" target="_blank"><img src="https://storage.googleapis.com/amy-jo/images/automl/tables_export/tb2.png" /></a>
<figcaption><br/><i></i></figcaption>
</figure>
<figure>
<a href="https://storage.googleapis.com/amy-jo/images/automl/tables_export/tb3.png" target="_blank"><img src="https://storage.googleapis.com/amy-jo/images/automl/tables_export/tb3.png" /></a>
<figcaption><br/><i>Zooming in to see part of the model graph in more detail.</i></figcaption>
</figure>
## Create a Google Cloud Run service based on your exported model
At this point, we have a trained model that we've exported and tested locally. Now we are almost ready to deploy it to [Cloud Run][21].
As the last step of prep, we'll create a container image that uses `gcr.io/cloud-automl-tables-public/model_server` as a base image and `ADD`s the model directory, and push that image to the [Google Container Registry][22], so that Cloud Run can access it.
### Build a container to use for Cloud Run
In the same `bikes_weather` directory that holds the `model_export` subdir, create a file called `Dockerfile` that contains the following two lines. The template is [here][23] as well; **edit the second line to use your correct path to the exported model, the same path that you used above when running locally**.
```
FROM gcr.io/cloud-automl-tables-public/model_server
ADD model-export/tbl/YOUR_RENAMED_DIRECTORY/models/default/0000001
```
Then, build a container from the `Dockerfile`. In this example we'll call it `bw-serve`.
You can do this as follows (**replace `[PROJECT_ID]` with the id of your project**):
```
docker build . -t gcr.io/[PROJECT_ID]/bw-serve
```
Then push it to the Google Container Registry (again replacing `[PROJECT_ID]` with the id of your project):
```
docker push gcr.io/[PROJECT_ID]/bw-serve
```
(If you get an error, you may need to configure Docker to use gcloud to [authenticate requests to Container Registry][24].)
Alternately, you can use [Cloud Build][25] to build the container instead, as follows:
```
gcloud builds submit --tag gcr.io/[PROJECT_ID]/bw-serve .
```
### Create your Cloud Run service
Now we're ready to deploy the container we built to Cloud Run, where we can scalably serve it for predictions. Visit the [Cloud Run page in the console][26]. (Click the “START USING..” button if necessary). Then click the **CREATE SERVICE** button.
<figure>
<a href="https://storage.googleapis.com/amy-jo/images/automl/cloud_run1%202.png" target="_blank"><img src="https://storage.googleapis.com/amy-jo/images/automl/cloud_run1%202.png" width="40%"/></a>
<figcaption><br/><i>Creating a Cloud Run Service</i></figcaption>
</figure>
For the container URL, enter the name of the container that you just built above. Select the “Cloud Run (fully managed)” option. Create a service name (it can be anything you like). Select the **Require Authentication** option.
Then, click on **SHOW OPTIONAL REVISION SETTINGS**. Change the **Memory allocated** option to **2GiB**.
Leave the rest of the defaults as they are, and click **CREATE**.
<figure>
<a href="https://storage.googleapis.com/amy-jo/images/automl/cloud_run2.png" target="_blank"><img src="https://storage.googleapis.com/amy-jo/images/automl/cloud_run2.png" width="50%"/></a>
<figcaption><br/><i>Set your service instances to use 2GiB of memory</i></figcaption>
</figure>
### Send prediction requests to the Cloud Run service
Once your Cloud Run service is deployed, you can send prediction requests to it. Your new service will have a URL that starts with your service name (and ends with `run.app`). You can send JSON predictions to the Cloud Run service just as with the local server you tested earlier; but with Cloud Run, the service will scale up and down based on demand.
Assuming you selected the **Require Authentication** option, you can make prediction requests like this:
```bash
curl -X POST -H \
"Authorization: Bearer $(gcloud auth print-identity-token)" --data @./instances.json \
https://<your-service-url>/predict
```
(If you set up your Cloud Run service endpoint so that it does not require authentication, you don’t need to include the authorization header in your `curl` request).
## What’s next?
In this post, we walked through how to export a custom AutoML Tables trained model, view model information in TensorBoard, and build a container image that lets you serve the model from any environment. Then we showed how you can deploy that image to Cloud Run for scalable serving.
Once you’ve built a model-serving container image, it’s easy to deploy it to other environments as well. For example, if you have installed [Knative serving][27] on a [Kubernetes][28] cluster, you can create a Knative *service* like this, using the same container image (again replacing `[PROJECT_ID]` with your project):
```yaml
apiVersion: serving.knative.dev/v1
kind: Service
metadata:
name: bikes-weather
spec:
template:
spec:
containers:
- image: gcr.io/[PROJECT_ID]/bw-serve
```
(While our example model fits on a 2GiB Cloud Run instance, it’s possible that other of your models may be too large for the managed Cloud Run service, and serving it via Kubernetes/[GKE][29] is a good alternative).
If you’re curious about the details of your custom model, you can use Stackdriver Logging to [view information about your AutoML Tables model][30]. Using Logging, you can see the final model hyperparameters as well as the hyperparameters and object values used during model training and tuning.
You may also be interested in exploring the updated [AutoML Tables client libraries][31], which make it easy for you to [train and use Tables programmatically][32], or reading about how to create a _contextual bandit_ model pipeline [using AutoML Tables, without needing a specialist for tuning or feature engineering][33].
[1]: https://cloud.google.com/automl-tables/docs/
[2]: https://cloud.google.com/automl-tables/docs/model-export
[3]: https://cloud.google.com/run/docs/
[4]: https://www.tensorflow.org/tensorboard
[5]: https://cloud.google.com/automl-tables/docs/
[6]: https://console.cloud.google.com/automl-tables/datasets
[7]: https://googleapis.dev/python/automl/latest/gapic/v1beta1/tables.html
[8]: https://console.cloud.google.com/bigquery?p=bigquery-public-data&d=london_bicycles&page=dataset
[9]: https://console.cloud.google.com/bigquery?p=bigquery-public-data&d=noaa_gsod&page=dataset
[10]: https://console.cloud.google.com/automl-tables/datasets
[11]: https://cloud.google.com/automl-tables/docs/problem-types
[12]: https://cloud.google.com/automl-tables/docs/predict
[13]: https://cloud.google.com/automl-tables/docs/model-export
[14]: https://cloud.google.com/sdk/install
[15]: https://cloud.google.com/shell/
[16]: https://cloud.google.com/automl-tables/docs/model-export
[17]: https://cloud.google.com/automl-tables/docs/model-export#run-server
[18]: https://raw.githubusercontent.com/amygdala/code-snippets/master/ml/automl/tables/model_export/instances.json
[19]: https://www.tensorflow.org/tensorboard
[20]: https://github.com/amygdala/code-snippets/blob/master/ml/automl/tables/model_export/convert_oss.py
[21]: https://cloud.google.com/run/docs/
[22]: https://cloud.google.com/container-registry/
[23]: https://raw.githubusercontent.com/amygdala/code-snippets/master/ml/automl/tables/model_export/Dockerfile.template
[24]: https://cloud.google.com/container-registry/docs/quickstart#add_the_image_to
[25]: https://cloud.google.com/cloud-build/docs/quickstart-docker
[26]: https://console.cloud.google.com/marketplace/details/google-cloud-platform/cloud-run
[27]: https://github.com/knative/serving
[28]: https://kubernetes.io/
[29]: https://cloud.google.com/kubernetes-engine/
[30]: https://cloud.google.com/automl-tables/docs/logging
[31]: https://googleapis.dev/python/automl/latest/gapic/v1beta1/tables.html
[32]: https://github.com/GoogleCloudPlatform/python-docs-samples/tree/master/tables/automl/notebooks
[33]: https://cloud.google.com/blog/products/ai-machine-learning/how-to-build-better-contextual-bandits-machine-learning-models
================================================
FILE: ml/automl/tables/model_export/convert_oss.py
================================================
# Copyright 2019 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# tested with TF1.14
import sys
import tensorflow as tf
from absl import app
from absl import flags
from tensorflow.core.protobuf import saved_model_pb2
from tensorflow.python.summary import summary
FLAGS = flags.FLAGS
flags.DEFINE_string('saved_model', '', 'The location of the saved_model.pb to visualize.')
flags.DEFINE_string('output_dir', '', 'The location for the Tensorboard log to begin visualization from.')
def import_to_tensorboard(saved_model, output_dir):
"""View an imported saved_model.pb as a graph in Tensorboard.
Args:
saved_model: The location of the saved_model.pb to visualize.
output_dir: The location for the Tensorboard log to begin visualization from.
Usage:
Call this function with your model location and desired log directory.
Launch Tensorboard by pointing it to the log directory.
View your imported `.pb` model as a graph.
"""
with open(saved_model, "rb") as f:
sm = saved_model_pb2.SavedModel()
sm.ParseFromString(f.read())
if 1 != len(sm.meta_graphs):
print('More than one graph found. Not sure which to write')
sys.exit(1)
graph_def = sm.meta_graphs[0].graph_def
pb_visual_writer = summary.FileWriter(output_dir)
pb_visual_writer.add_graph(None, graph_def=graph_def)
print("Model Imported. Visualize by running: "
"tensorboard --logdir={}".format(output_dir))
def main(argv):
import_to_tensorboard(FLAGS.saved_model, FLAGS.output_dir)
if __name__ == '__main__':
app.run(main)
================================================
FILE: ml/automl/tables/model_export/instances.json
================================================
{
"instances": [
{
"bike_id": "6179",
"day_of_week": "6",
"end_latitude": 51.50379168,
"end_longitude": -0.11282408,
"end_station_id": "154",
"euclidean": 2513.254047872678,
"loc_cross": "POINT(-0.08 51.52)POINT(-0.11 51.5)",
"max": 56.8,
"min": 50.9,
"prcp": 0,
"ts": 1445624280,
"start_latitude": 51.51615461,
"start_longitude": -0.082422399,
"start_station_id": "217",
"temp": 54,
"dewp": 44
},
{
"bike_id": "5373",
"day_of_week": "3",
"end_latitude": 51.52059681,
"end_longitude": -0.116688468,
"end_station_id": "68",
"euclidean": 1181.215448450556,
"loc_cross": "POINT(-0.13 51.53)POINT(-0.12 51.52)",
"max": 56.7,
"min": 45.9,
"prcp": 0,
"ts": 1494317220,
"start_latitude": 51.52683806,
"start_longitude": -0.130504336,
"start_station_id": "214",
"temp": 50.5,
"dewp": 37.1
},
{
"bike_id": "5373",
"day_of_week": "3",
"end_latitude": 51.52059681,
"end_longitude": -0.116688468,
"end_station_id": "68",
"euclidean": 3589.5146210024977,
"loc_cross": "POINT(-0.07 51.52)POINT(-0.12 51.52)",
"max": 44.6,
"min": 34.0,
"prcp": 0,
"ts": 1480407420,
"start_latitude": 51.52388,
"start_longitude": -0.065076,
"start_station_id": "445",
"temp": 38.2,
"dewp": 28.6
}
]
}
================================================
FILE: ml/automl/tables/xai/README.md
================================================
# AutoML Tables examples
This directory contains a notebook that shows examples of using the [AutoML Tables](https://cloud.google.com/automl-tables/docs/) client library. For these examples, we’ll use data that is essentially a join of two public datasets stored in [BigQuery](https://cloud.google.com/bigquery/): [London Bike rentals](https://console.cloud.google.com/bigquery?p=bigquery-public-data&d=london_bicycles&page=dataset) and [NOAA weather data](https://console.cloud.google.com/bigquery?p=bigquery-public-data&d=noaa_gsod&page=dataset), with some additional processing to clean up outliers and derive additional GIS and day-of-week fields.
The [automl_tables_xai.ipynb notebook](automl_tables_xai.ipynb) shows how to create a custom [AutoML Tables](https://cloud.google.com/automl-tables/docs/) model to predict duration of London bike rentals given information about local weather as well as info about the rental trip. It walks through examples of using the Tables client libraries for creating a dataset, training a custom model, deploying the model, and using it to make predictions; and shows how you can programmatically request *local feature importance* explanations.
AutoML Tables allows you to [export a model's test dataset to BigQuery](https://cloud.google.com/automl-tables/docs/evaluate#downloading_your_test_dataset_to) after training. The [bigquery_examples.md](bigquery_examples.md) file shows some examples of how you can use BigQuery to analyze this dataset.
================================================
FILE: ml/automl/tables/xai/automl_tables_xai.ipynb
================================================
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Introduction\n",
"\n",
"Google Cloud’s [AutoML Tables](https://cloud.google.com/automl-tables/docs/) lets you automatically build and deploy state-of-the-art machine learning models using your own structured data. \n",
"\n",
"AutoML Tables now has an easier-to-use [Tables-specific Python client library](https://googleapis.dev/python/automl/latest/gapic/v1beta1/tables.html), \n",
"as well as a new ability to **explain** online prediction results— called *local feature importance*— which gives visibility into how the features in a specific prediction request informed the resulting prediction.\n",
"\n",
"In this notebook, we'll create a custom Tables model to predict duration of London bike rentals given information about local weather as well as info about the rental trip.\n",
"We'll walk through examples of using the Tables client libraries for creating a dataset, training a custom model, deploying the model, and using it to make predictions; and show how you can programmatically request local feature importance information.\n",
"\n",
"We recommend running this notebook using [AI Platform Notebooks](https://cloud.google.com/ai-platform-notebooks/).\n",
"If you want to run the notebook on [colab](https://colab.research.google.com/) (or locally), it's possible, but you'll need to do a bit more setup. See the Appendix section of this notebook for details."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Before you begin\n",
"\n",
"Follow the [AutoML Tables documentation](https://cloud.google.com/automl-tables/docs/) to:\n",
"\n",
"- [Select or create a GCP project](https://console.cloud.google.com/cloud-resource-manager).\n",
"- [Make sure that billing is enabled](https://cloud.google.com/billing/docs/how-to/modify-project) for your project\n",
"- Enable the [Cloud AutoML and Storage APIs](https://console.cloud.google.com/flows/enableapi?apiid=storage-component.googleapis.com,automl.googleapis.com,storage-api.googleapis.com).\n",
"- (Recommended) Create an [AI Platform Notebook](https://cloud.google.com/ai-platform-notebooks/) instance and upload this notebook to it.\n",
"\n",
"(See also the [Quickstart guide](https://cloud.google.com/automl-tables/docs/quickstart) for a getting-started walkthrough on AutoML Tables).\n",
"\n",
"Then, install the AutoML Python client libraries into your notebook environment:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip3 install -U google-cloud-automl"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You may need to **restart your notebook kernel** after running the above to pick up the installation."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Enter your GCP project ID in the cell below, then run the cell."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"PROJECT_ID = \"<your-project-id>\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Do some imports\n",
"\n",
"Next, import some libraries and set some variables."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import argparse\n",
"import os\n",
"from google.api_core.client_options import ClientOptions\n",
"from google.cloud import automl_v1beta1 as automl\n",
"import google.cloud.automl_v1beta1.proto.data_types_pb2 as data_types"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"REGION = 'us-central1'\n",
"DATASET_NAME = 'bikes-weather'\n",
"BIGQUERY_PROJECT_ID = 'aju-dev-demos'\n",
"DATASET_ID = 'london_bikes_weather'\n",
"TABLE_ID = 'bikes_weather'\n",
"IMPORT_URI = 'bq://%s.%s.%s' % (BIGQUERY_PROJECT_ID, DATASET_ID, TABLE_ID)\n",
"print(IMPORT_URI)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"DATASET_NAME = 'bikes_weather'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create a dataset, and import data\n",
"\n",
"Next, we'll define some utility functions to create a dataset, and to import data into a dataset. The `client.import_data()` call returns an operation *future* that can be used to check for completion synchronously or asynchronously— in this case we wait synchronously."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def create_dataset(client, dataset_display_name):\n",
" \"\"\"Create a dataset.\"\"\"\n",
"\n",
" # Create a dataset with the given display name\n",
" dataset = client.create_dataset(dataset_display_name)\n",
"\n",
" # Display the dataset information.\n",
" print(\"Dataset name: {}\".format(dataset.name))\n",
" print(\"Dataset id: {}\".format(dataset.name.split(\"/\")[-1]))\n",
" print(\"Dataset di
gitextract_gz1rnp0c/
├── LICENSE
├── README.md
├── cloud_run/
│ └── twilio_vision/
│ ├── Dockerfile
│ ├── README.md
│ └── src/
│ ├── requirements.txt
│ └── whats_that.py
├── datalab/
│ └── facets/
│ ├── README.md
│ └── facets_snippets.ipynb
└── ml/
├── README.md
├── automl/
│ └── tables/
│ ├── kfp_e2e/
│ │ ├── README.md
│ │ ├── create_dataset_for_tables/
│ │ │ ├── tables_component.py
│ │ │ └── tables_component.yaml
│ │ ├── create_model_for_tables/
│ │ │ ├── tables_component.py
│ │ │ ├── tables_component.yaml
│ │ │ ├── tables_eval_component.py
│ │ │ ├── tables_eval_component.yaml
│ │ │ ├── tables_eval_metrics_component.py
│ │ │ └── tables_eval_metrics_component.yaml
│ │ ├── deploy_model_for_tables/
│ │ │ ├── convert_oss.py
│ │ │ ├── exported_model_deploy.py
│ │ │ ├── instances.json
│ │ │ ├── model_serve_template.yaml
│ │ │ ├── tables_deploy_component.py
│ │ │ └── tables_deploy_component.yaml
│ │ ├── import_data_from_bigquery/
│ │ │ ├── tables_component.py
│ │ │ ├── tables_component.yaml
│ │ │ ├── tables_schema_component.py
│ │ │ └── tables_schema_component.yaml
│ │ ├── tables_containers/
│ │ │ └── model-service-launcher/
│ │ │ ├── Dockerfile
│ │ │ └── build.sh
│ │ ├── tables_pipeline_caip.py
│ │ └── tables_pipeline_kf.py
│ ├── model_export/
│ │ ├── Dockerfile.template
│ │ ├── automl_tables_model_export_cloud_run.md
│ │ ├── convert_oss.py
│ │ └── instances.json
│ └── xai/
│ ├── README.md
│ ├── automl_tables_xai.ipynb
│ └── bigquery_examples.md
├── census_train_and_eval/
│ ├── README.md
│ ├── config_custom_gpus.yaml
│ ├── hptuning_config.yaml
│ ├── test.json
│ ├── trainer/
│ │ ├── __init__.py
│ │ ├── model.py
│ │ └── task.py
│ └── using_tf.estimator.train_and_evaluate.ipynb
├── kubeflow-pipelines/
│ ├── README.md
│ ├── README_github_summ.md
│ ├── README_taxidata_examples.md
│ ├── components/
│ │ ├── README.md
│ │ ├── automl/
│ │ │ ├── container/
│ │ │ │ ├── Dockerfile
│ │ │ │ └── build.sh
│ │ │ └── dataset_train/
│ │ │ └── dataset_model.py
│ │ ├── cmle/
│ │ │ ├── containers/
│ │ │ │ ├── base/
│ │ │ │ │ ├── Dockerfile
│ │ │ │ │ └── build.sh
│ │ │ │ └── cmle_deploy/
│ │ │ │ ├── Dockerfile
│ │ │ │ └── build.sh
│ │ │ └── deploy/
│ │ │ └── deploy_model.py
│ │ └── older/
│ │ ├── dataflow/
│ │ │ ├── containers/
│ │ │ │ ├── base/
│ │ │ │ │ ├── Dockerfile
│ │ │ │ │ └── build.sh
│ │ │ │ ├── tfma/
│ │ │ │ │ ├── Dockerfile
│ │ │ │ │ └── build.sh
│ │ │ │ └── tft/
│ │ │ │ ├── Dockerfile
│ │ │ │ └── build.sh
│ │ │ ├── taxi_schema/
│ │ │ │ └── taxi_schema/
│ │ │ │ ├── __init__.py
│ │ │ │ └── taxi_schema.py
│ │ │ ├── tfma/
│ │ │ │ ├── analysis/
│ │ │ │ │ └── setup.py
│ │ │ │ ├── model_analysis-taxi.py
│ │ │ │ └── tfma_expers.ipynb
│ │ │ └── tft/
│ │ │ ├── mcsv_coder.py
│ │ │ ├── preprocessing.py
│ │ │ ├── preprocessing2.py
│ │ │ ├── schema.pbtxt
│ │ │ ├── taxi_preprocess_bq.py
│ │ │ └── transform/
│ │ │ └── setup.py
│ │ ├── kubeflow/
│ │ │ ├── containers/
│ │ │ │ ├── launcher/
│ │ │ │ │ ├── Dockerfile
│ │ │ │ │ └── build.sh
│ │ │ │ ├── tf-serving/
│ │ │ │ │ ├── Dockerfile
│ │ │ │ │ └── build.sh
│ │ │ │ ├── tf-serving-gh/
│ │ │ │ │ ├── Dockerfile
│ │ │ │ │ └── build.sh
│ │ │ │ └── trainer/
│ │ │ │ ├── Dockerfile
│ │ │ │ └── build.sh
│ │ │ ├── launcher/
│ │ │ │ ├── train.py
│ │ │ │ └── train.template.yaml
│ │ │ ├── taxi_model/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── data/
│ │ │ │ │ ├── eval/
│ │ │ │ │ │ └── data.csv
│ │ │ │ │ └── train/
│ │ │ │ │ └── data.csv
│ │ │ │ ├── schema.pbtxt
│ │ │ │ ├── setup.py
│ │ │ │ └── trainer/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── model.py
│ │ │ │ ├── task.py
│ │ │ │ └── taxi.py
│ │ │ ├── tf-serving/
│ │ │ │ ├── chicago_taxi_client.py
│ │ │ │ ├── deploy-tf-serve.py
│ │ │ │ ├── schema.pbtxt
│ │ │ │ └── tf-serve-template.yaml
│ │ │ └── tf-serving-gh/
│ │ │ ├── deploy-tf-serve.py
│ │ │ └── tf-serve-template.yaml
│ │ └── t2t/
│ │ ├── containers/
│ │ │ ├── base/
│ │ │ │ ├── Dockerfile
│ │ │ │ └── build.sh
│ │ │ ├── t2t_app/
│ │ │ │ ├── Dockerfile
│ │ │ │ └── build.sh
│ │ │ ├── t2t_proc/
│ │ │ │ ├── Dockerfile
│ │ │ │ └── build.sh
│ │ │ ├── t2t_train/
│ │ │ │ ├── Dockerfile
│ │ │ │ └── build.sh
│ │ │ └── webapp-launcher/
│ │ │ ├── Dockerfile
│ │ │ └── build.sh
│ │ ├── t2t-app/
│ │ │ └── app/
│ │ │ ├── ghsumm/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── setup.py
│ │ │ │ └── trainer/
│ │ │ │ ├── __init__.py
│ │ │ │ └── problem.py
│ │ │ ├── github_issues_sample.csv
│ │ │ ├── main.py
│ │ │ └── templates/
│ │ │ └── index.html
│ │ ├── t2t-proc/
│ │ │ └── ghsumm/
│ │ │ ├── __init__.py
│ │ │ ├── setup.py
│ │ │ └── trainer/
│ │ │ ├── __init__.py
│ │ │ └── problem.py
│ │ ├── t2t-train/
│ │ │ ├── ghsumm/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── setup.py
│ │ │ │ └── trainer/
│ │ │ │ ├── __init__.py
│ │ │ │ └── problem.py
│ │ │ └── train_model.py
│ │ └── webapp-launcher/
│ │ ├── deploy-webapp.py
│ │ └── t2tapp-template.yaml
│ ├── keras_tuner/
│ │ ├── README.md
│ │ ├── components/
│ │ │ ├── eval_metrics_component.yaml
│ │ │ ├── kubeflow-resources/
│ │ │ │ ├── bikesw_training/
│ │ │ │ │ ├── bikes_weather_limited.py
│ │ │ │ │ ├── bw_hptune_standalone.py
│ │ │ │ │ ├── bwmodel/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ └── model.py
│ │ │ │ │ ├── deploy_tuner.py
│ │ │ │ │ ├── eval_metrics.py
│ │ │ │ │ ├── kchief_deployment_templ.yaml
│ │ │ │ │ └── ktuners_deployment_templ.yaml
│ │ │ │ ├── cloudbuild.yaml
│ │ │ │ ├── containers/
│ │ │ │ │ ├── bikesw_training/
│ │ │ │ │ │ ├── Dockerfile
│ │ │ │ │ │ ├── build.sh
│ │ │ │ │ │ ├── cloudbuild.yaml
│ │ │ │ │ │ └── copydir.sh
│ │ │ │ │ ├── bikesw_training_hptune/
│ │ │ │ │ │ ├── Dockerfile
│ │ │ │ │ │ ├── build.sh
│ │ │ │ │ │ ├── cloudbuild.yaml
│ │ │ │ │ │ └── copydir.sh
│ │ │ │ │ ├── deploy_jobs/
│ │ │ │ │ │ ├── Dockerfile
│ │ │ │ │ │ ├── build.sh
│ │ │ │ │ │ ├── cloudbuild.yaml
│ │ │ │ │ │ └── copydir.sh
│ │ │ │ │ └── tf-serving/
│ │ │ │ │ ├── Dockerfile
│ │ │ │ │ ├── build.sh
│ │ │ │ │ ├── cloudbuild.yaml
│ │ │ │ │ └── copydir.sh
│ │ │ │ └── tf-serving/
│ │ │ │ ├── deploy-tfserve.py
│ │ │ │ └── tf-serve-template.yaml
│ │ │ ├── serve_component.yaml
│ │ │ ├── tfdv/
│ │ │ │ ├── Dockerfile
│ │ │ │ ├── requirements.txt
│ │ │ │ ├── tfdv.py
│ │ │ │ └── tfdv_compare.py
│ │ │ ├── tfdv_component.yaml
│ │ │ ├── tfdv_drift_component.yaml
│ │ │ └── train_component.yaml
│ │ ├── example_pipelines/
│ │ │ ├── bw_ktune.py
│ │ │ ├── bw_ktune_metrics.py
│ │ │ ├── bw_tfdv.py
│ │ │ ├── bw_train.py
│ │ │ └── bw_train_metrics.py
│ │ └── notebooks/
│ │ └── metrics_eval_component.ipynb
│ ├── samples/
│ │ ├── automl/
│ │ │ ├── README.md
│ │ │ └── dataset_and_train.py
│ │ └── kubeflow-tf/
│ │ ├── README.md
│ │ └── older/
│ │ ├── README.md
│ │ ├── gh_summ.py
│ │ ├── gh_summ_serve.py
│ │ ├── pipelines-kubecon.ipynb
│ │ ├── workflow1.py
│ │ └── workflow2.py
│ └── sbtb/
│ ├── README.md
│ ├── components/
│ │ ├── kubeflow-resources/
│ │ │ ├── bikesw_training/
│ │ │ │ └── bikes_weather.py
│ │ │ ├── containers/
│ │ │ │ ├── bikesw_training/
│ │ │ │ │ ├── Dockerfile
│ │ │ │ │ └── build.sh
│ │ │ │ └── tf-serving/
│ │ │ │ ├── Dockerfile
│ │ │ │ └── build.sh
│ │ │ └── tf-serving/
│ │ │ ├── deploy-tfserve.py
│ │ │ └── tf-serve-template.yaml
│ │ ├── serve_component.yaml
│ │ └── train_component.yaml
│ └── example_pipelines/
│ └── bw.py
├── notebook_examples/
│ ├── TF_linear_regressor.ipynb
│ ├── caipp/
│ │ ├── caipp_connect.ipynb
│ │ └── kfp_in_a_notebook.ipynb
│ ├── functions/
│ │ ├── hosted_kfp_gcf.ipynb
│ │ ├── main.py
│ │ └── requirements.txt
│ ├── hosted_kfp/
│ │ └── event_triggered_kfp_pipeline_bw.ipynb
│ ├── keras_linear_regressor.ipynb
│ └── mnist_estimator.ipynb
└── vertex_pipelines/
└── pytorch/
└── cifar/
├── Dockerfile
├── Dockerfile-gpu
├── Dockerfile-gpu-ct
├── LICENSE
├── README.md
├── input.json
├── pytorch-pipeline/
│ ├── .gitignore
│ ├── README.md
│ ├── cifar10_datamodule.py
│ ├── cifar10_pytorch.py
│ ├── cifar10_train.py
│ ├── process_test.py
│ ├── pytorch_pipeline/
│ │ ├── Dockerfile
│ │ ├── __init__.py
│ │ ├── components/
│ │ │ ├── base/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base_component.py
│ │ │ │ └── base_executor.py
│ │ │ └── trainer/
│ │ │ ├── __init__.py
│ │ │ ├── component.py
│ │ │ ├── executor.py
│ │ │ └── generic_executor.py
│ │ └── examples/
│ │ ├── __init__.py
│ │ └── cifar10/
│ │ ├── cifar10_datamodule.py
│ │ ├── cifar10_pre_process.py
│ │ ├── cifar10_pytorch.py
│ │ ├── cifar10_train.py
│ │ ├── input.json
│ │ └── utils.py
│ └── training_task.py
├── pytorch_cifar10_vertex_pipelines.ipynb
└── requirements.txt
SYMBOL INDEX (237 symbols across 68 files)
FILE: cloud_run/twilio_vision/src/whats_that.py
function receive_message (line 46) | def receive_message():
function construct_message (line 84) | def construct_message(labels, face_annotations, logos):
function extract_sentiment (line 130) | def extract_sentiment(emotions):
function get_labels (line 148) | def get_labels(image, num_retries=3, max_labels=3, max_faces=10, max_log...
FILE: ml/automl/tables/kfp_e2e/create_dataset_for_tables/tables_component.py
function automl_create_dataset_for_tables (line 18) | def automl_create_dataset_for_tables(
FILE: ml/automl/tables/kfp_e2e/create_model_for_tables/tables_component.py
function automl_create_model_for_tables (line 18) | def automl_create_model_for_tables(
FILE: ml/automl/tables/kfp_e2e/create_model_for_tables/tables_eval_component.py
function automl_eval_tables_model (line 19) | def automl_eval_tables_model(
FILE: ml/automl/tables/kfp_e2e/create_model_for_tables/tables_eval_metrics_component.py
function automl_eval_metrics (line 21) | def automl_eval_metrics(
FILE: ml/automl/tables/kfp_e2e/deploy_model_for_tables/convert_oss.py
function import_to_tensorboard (line 30) | def import_to_tensorboard(saved_model, output_dir):
function main (line 56) | def main(argv):
FILE: ml/automl/tables/kfp_e2e/deploy_model_for_tables/exported_model_deploy.py
function main (line 21) | def main():
FILE: ml/automl/tables/kfp_e2e/deploy_model_for_tables/tables_deploy_component.py
function automl_deploy_tables_model (line 17) | def automl_deploy_tables_model(
FILE: ml/automl/tables/kfp_e2e/import_data_from_bigquery/tables_component.py
function automl_import_data_for_tables (line 18) | def automl_import_data_for_tables(
FILE: ml/automl/tables/kfp_e2e/import_data_from_bigquery/tables_schema_component.py
function automl_set_dataset_schema (line 19) | def automl_set_dataset_schema(
FILE: ml/automl/tables/kfp_e2e/tables_pipeline_caip.py
function automl_tables (line 49) | def automl_tables( #pylint: disable=unused-argument
FILE: ml/automl/tables/kfp_e2e/tables_pipeline_kf.py
function automl_tables (line 49) | def automl_tables( #pylint: disable=unused-argument
FILE: ml/automl/tables/model_export/convert_oss.py
function import_to_tensorboard (line 29) | def import_to_tensorboard(saved_model, output_dir):
function main (line 55) | def main(argv):
FILE: ml/census_train_and_eval/trainer/model.py
function build_estimator (line 89) | def build_estimator(config, embedding_size=8, hidden_units=None):
function parse_label_column (line 176) | def parse_label_column(label_string_tensor):
function csv_serving_input_fn (line 198) | def csv_serving_input_fn():
function example_serving_input_fn (line 209) | def example_serving_input_fn():
function json_serving_input_fn (line 225) | def json_serving_input_fn():
function parse_csv (line 241) | def parse_csv(rows_string_tensor):
function input_fn (line 258) | def input_fn(filenames,
FILE: ml/census_train_and_eval/trainer/task.py
function run_experiment (line 23) | def run_experiment(hparams):
FILE: ml/kubeflow-pipelines/components/automl/dataset_train/dataset_model.py
function create_dataset (line 35) | def create_dataset(project_id, compute_region, dataset_name, multilabel=...
function import_data (line 74) | def import_data(project_id, compute_region, dataset_id, csv_path):
function create_model (line 97) | def create_model(
function main (line 127) | def main(argv=None):
FILE: ml/kubeflow-pipelines/components/cmle/deploy/deploy_model.py
function main (line 24) | def main(argv=None):
FILE: ml/kubeflow-pipelines/components/older/dataflow/taxi_schema/taxi_schema/taxi_schema.py
function transformed_name (line 81) | def transformed_name(key):
function transformed_names (line 85) | def transformed_names(keys):
function get_raw_feature_spec (line 90) | def get_raw_feature_spec(schema):
function make_proto_coder (line 94) | def make_proto_coder(schema):
function make_csv_coder (line 100) | def make_csv_coder(schema):
function clean_raw_data_dict (line 107) | def clean_raw_data_dict(input_dict, raw_feature_spec):
function make_sql (line 119) | def make_sql(table_name, max_rows=None, for_eval=False):
function read_schema (line 167) | def read_schema(path):
FILE: ml/kubeflow-pipelines/components/older/dataflow/tfma/model_analysis-taxi.py
function run_tfma (line 77) | def run_tfma(slice_spec, eval_model_base_dir, tfma_run_dir, input_csv,
function parse_arguments (line 162) | def parse_arguments():
function main (line 192) | def main():
FILE: ml/kubeflow-pipelines/components/older/dataflow/tft/mcsv_coder.py
function _utf8 (line 32) | def _utf8(s):
function _to_string (line 39) | def _to_string(x):
function _make_cast_fn (line 44) | def _make_cast_fn(dtype):
function _decode_with_reader (line 78) | def _decode_with_reader(value, reader):
class _FixedLenFeatureHandler (line 96) | class _FixedLenFeatureHandler(object):
method __init__ (line 104) | def __init__(self, name, feature_spec, index, reader=None, encoder=None):
method name (line 124) | def name(self):
method parse_value (line 127) | def parse_value(self, string_list):
method encode_value (line 159) | def encode_value(self, string_list, values):
class _VarLenFeatureHandler (line 182) | class _VarLenFeatureHandler(object):
method __init__ (line 190) | def __init__(self, name, feature_spec, index, reader=None, encoder=None):
method name (line 198) | def name(self):
method parse_value (line 201) | def parse_value(self, string_list):
method encode_value (line 212) | def encode_value(self, string_list, values):
class _SparseFeatureHandler (line 220) | class _SparseFeatureHandler(object):
method __init__ (line 227) | def __init__(self, name, feature_spec, value_index, index_index,
method name (line 240) | def name(self):
method parse_value (line 243) | def parse_value(self, string_list):
method encode_value (line 279) | def encode_value(self, string_list, sparse_value):
class DecodeError (line 297) | class DecodeError(Exception):
class EncodeError (line 302) | class EncodeError(Exception):
class _LineGenerator (line 307) | class _LineGenerator(object):
method __init__ (line 310) | def __init__(self):
method push_line (line 313) | def push_line(self, line):
method __iter__ (line 318) | def __iter__(self):
method next (line 321) | def next(self):
class CsvCoder (line 336) | class CsvCoder(object):
class _ReaderWrapper (line 339) | class _ReaderWrapper(object):
method __init__ (line 342) | def __init__(self, delimiter):
method read_record (line 347) | def read_record(self, x):
method __getstate__ (line 351) | def __getstate__(self):
method __setstate__ (line 354) | def __setstate__(self, state):
class _WriterWrapper (line 357) | class _WriterWrapper(object):
method __init__ (line 360) | def __init__(self, delimiter):
method encode_record (line 376) | def encode_record(self, record):
method __getstate__ (line 384) | def __getstate__(self):
method __setstate__ (line 387) | def __setstate__(self, state):
method __init__ (line 390) | def __init__(self, column_names, schema, delimiter=',',
method __reduce__ (line 461) | def __reduce__(self):
method encode (line 468) | def encode(self, instance):
method decode (line 490) | def decode(self, csv_string):
FILE: ml/kubeflow-pipelines/components/older/dataflow/tft/preprocessing.py
function _fill_in_missing (line 50) | def _fill_in_missing(x):
function preprocessing_fn (line 69) | def preprocessing_fn(inputs):
FILE: ml/kubeflow-pipelines/components/older/dataflow/tft/preprocessing2.py
function _fill_in_missing (line 50) | def _fill_in_missing(x):
function preprocessing_fn (line 69) | def preprocessing_fn(inputs):
FILE: ml/kubeflow-pipelines/components/older/dataflow/tft/taxi_preprocess_bq.py
function make_mcsv_coder (line 44) | def make_mcsv_coder(schema):
function _fill_in_missing (line 50) | def _fill_in_missing(x):
function make_sql (line 68) | def make_sql(table_name, ts1, ts2, stage, max_rows=None, for_eval=False):
function transform_data (line 127) | def transform_data(input_handle,
function main (line 284) | def main():
FILE: ml/kubeflow-pipelines/components/older/kubeflow/launcher/train.py
function _generate_train_yaml (line 35) | def _generate_train_yaml(src_filename, tfjob_ns, workers, pss, args_list):
function main (line 61) | def main(argv=None):
FILE: ml/kubeflow-pipelines/components/older/kubeflow/taxi_model/trainer/model.py
function build_estimator (line 28) | def build_estimator(tf_transform_dir, config, hidden_units=None):
function example_serving_receiver_fn (line 76) | def example_serving_receiver_fn(tf_transform_dir, schema):
function eval_input_receiver_fn (line 103) | def eval_input_receiver_fn(tf_transform_dir, schema):
function _gzip_reader_fn (line 148) | def _gzip_reader_fn():
function input_fn (line 155) | def input_fn(filenames, tf_transform_dir, batch_size=200):
FILE: ml/kubeflow-pipelines/components/older/kubeflow/taxi_model/trainer/task.py
function train_and_maybe_evaluate (line 42) | def train_and_maybe_evaluate(train_files, eval_files, hparams):
function run_experiment (line 99) | def run_experiment(train_files, eval_files, hparams):
FILE: ml/kubeflow-pipelines/components/older/kubeflow/taxi_model/trainer/taxi.py
function transformed_name (line 81) | def transformed_name(key):
function transformed_names (line 85) | def transformed_names(keys):
function get_raw_feature_spec (line 90) | def get_raw_feature_spec(schema):
function make_proto_coder (line 94) | def make_proto_coder(schema):
function make_csv_coder (line 100) | def make_csv_coder(schema):
function clean_raw_data_dict (line 107) | def clean_raw_data_dict(input_dict, raw_feature_spec):
function make_sql (line 119) | def make_sql(table_name, max_rows=None, for_eval=False):
function read_schema (line 167) | def read_schema(path):
FILE: ml/kubeflow-pipelines/components/older/kubeflow/tf-serving-gh/deploy-tf-serve.py
function main (line 26) | def main():
FILE: ml/kubeflow-pipelines/components/older/kubeflow/tf-serving/chicago_taxi_client.py
function _do_local_inference (line 37) | def _do_local_inference(host, port, serialized_examples, model_name):
function _do_mlengine_inference (line 57) | def _do_mlengine_inference(model, version, serialized_examples):
function _do_inference (line 76) | def _do_inference(model_handle, examples_file, num_examples, schema, mod...
function main (line 128) | def main(_):
FILE: ml/kubeflow-pipelines/components/older/kubeflow/tf-serving/deploy-tf-serve.py
function main (line 30) | def main(argv=None):
FILE: ml/kubeflow-pipelines/components/older/t2t/t2t-app/app/ghsumm/trainer/problem.py
class GhProblem (line 9) | class GhProblem(text_problems.Text2TextProblem):
method approx_vocab_size (line 13) | def approx_vocab_size(self):
method is_generate_per_split (line 17) | def is_generate_per_split(self):
method max_subtoken_length (line 22) | def max_subtoken_length(self):
method dataset_splits (line 26) | def dataset_splits(self):
method generate_samples (line 37) | def generate_samples(self, data_dir, tmp_dir, dataset_split): #pylint...
FILE: ml/kubeflow-pipelines/components/older/t2t/t2t-app/app/main.py
function get_issue_body (line 62) | def get_issue_body(issue_url):
function index (line 76) | def index():
function random_github_issue (line 80) | def random_github_issue():
function summary (line 92) | def summary():
function init (line 120) | def init():
function make_tfserving_rest_request_fn (line 131) | def make_tfserving_rest_request_fn():
function server_error (line 155) | def server_error(e):
FILE: ml/kubeflow-pipelines/components/older/t2t/t2t-proc/ghsumm/trainer/problem.py
class GhProblem (line 9) | class GhProblem(text_problems.Text2TextProblem):
method approx_vocab_size (line 13) | def approx_vocab_size(self):
method is_generate_per_split (line 17) | def is_generate_per_split(self):
method max_subtoken_length (line 22) | def max_subtoken_length(self):
method dataset_splits (line 26) | def dataset_splits(self):
method generate_samples (line 37) | def generate_samples(self, data_dir, tmp_dir, dataset_split): #pylint...
FILE: ml/kubeflow-pipelines/components/older/t2t/t2t-train/ghsumm/trainer/problem.py
class GhProblem (line 9) | class GhProblem(text_problems.Text2TextProblem):
method approx_vocab_size (line 13) | def approx_vocab_size(self):
method is_generate_per_split (line 17) | def is_generate_per_split(self):
method max_subtoken_length (line 22) | def max_subtoken_length(self):
method dataset_splits (line 26) | def dataset_splits(self):
method generate_samples (line 37) | def generate_samples(self, data_dir, tmp_dir, dataset_split): #pylint...
FILE: ml/kubeflow-pipelines/components/older/t2t/t2t-train/train_model.py
function main (line 22) | def main():
FILE: ml/kubeflow-pipelines/components/older/t2t/webapp-launcher/deploy-webapp.py
function main (line 23) | def main():
FILE: ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/bikesw_training/bikes_weather_limited.py
function create_model (line 35) | def create_model(learning_rate, hidden_size, num_hidden_layers):
function main (line 60) | def main():
FILE: ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/bikesw_training/bw_hptune_standalone.py
function create_model (line 35) | def create_model(hp):
function main (line 60) | def main():
FILE: ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/bikesw_training/bwmodel/model.py
function load_dataset (line 31) | def load_dataset(pattern, batch_size=1):
function features_and_labels (line 34) | def features_and_labels(features):
function read_dataset (line 39) | def read_dataset(pattern, batch_size, mode=tf.estimator.ModeKeys.TRAIN, ...
function get_layers (line 51) | def get_layers():
function wide_and_deep_classifier (line 96) | def wide_and_deep_classifier(inputs, linear_feature_columns, dnn_feature...
FILE: ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/bikesw_training/deploy_tuner.py
function main (line 27) | def main():
FILE: ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/bikesw_training/eval_metrics.py
function eval_metrics (line 21) | def eval_metrics(
FILE: ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/tf-serving/deploy-tfserve.py
function main (line 26) | def main():
FILE: ml/kubeflow-pipelines/keras_tuner/components/tfdv/tfdv.py
function generate_tfdv_stats (line 18) | def generate_tfdv_stats(input_data: str, output_path: str, job_name: str...
FILE: ml/kubeflow-pipelines/keras_tuner/components/tfdv/tfdv_compare.py
function tfdv_detect_drift (line 18) | def tfdv_detect_drift(
FILE: ml/kubeflow-pipelines/keras_tuner/example_pipelines/bw_ktune.py
function bikes_weather_hptune (line 38) | def bikes_weather_hptune( #pylint: disable=unused-argument
FILE: ml/kubeflow-pipelines/keras_tuner/example_pipelines/bw_ktune_metrics.py
function bikes_weather_hptune (line 41) | def bikes_weather_hptune( #pylint: disable=unused-argument
FILE: ml/kubeflow-pipelines/keras_tuner/example_pipelines/bw_tfdv.py
function bikes_weather_tfdv (line 48) | def bikes_weather_tfdv(
FILE: ml/kubeflow-pipelines/keras_tuner/example_pipelines/bw_train.py
function bikes_weather (line 38) | def bikes_weather( #pylint: disable=unused-argument
FILE: ml/kubeflow-pipelines/keras_tuner/example_pipelines/bw_train_metrics.py
function bikes_weather_metrics (line 41) | def bikes_weather_metrics( #pylint: disable=unused-argument
FILE: ml/kubeflow-pipelines/samples/automl/dataset_and_train.py
function automl1 (line 26) | def automl1( #pylint: disable=unused-argument
FILE: ml/kubeflow-pipelines/samples/kubeflow-tf/older/gh_summ.py
function gh_summ (line 24) | def gh_summ( #pylint: disable=unused-argument
FILE: ml/kubeflow-pipelines/samples/kubeflow-tf/older/gh_summ_serve.py
function gh_summ (line 22) | def gh_summ(
FILE: ml/kubeflow-pipelines/samples/kubeflow-tf/older/workflow1.py
function workflow1 (line 24) | def workflow1(
FILE: ml/kubeflow-pipelines/samples/kubeflow-tf/older/workflow2.py
function workflow2 (line 26) | def workflow2(
FILE: ml/kubeflow-pipelines/sbtb/components/kubeflow-resources/bikesw_training/bikes_weather.py
function load_dataset (line 48) | def load_dataset(pattern, batch_size=1):
function features_and_labels (line 51) | def features_and_labels(features):
function read_dataset (line 56) | def read_dataset(pattern, batch_size, mode=tf.estimator.ModeKeys.TRAIN, ...
function wide_and_deep_classifier (line 70) | def wide_and_deep_classifier(inputs, linear_feature_columns, dnn_feature...
function create_model (line 86) | def create_model(learning_rate, load_checkpoint):
function main (line 155) | def main():
FILE: ml/kubeflow-pipelines/sbtb/components/kubeflow-resources/tf-serving/deploy-tfserve.py
function main (line 26) | def main():
FILE: ml/kubeflow-pipelines/sbtb/example_pipelines/bw.py
function bikes_weather (line 40) | def bikes_weather( #pylint: disable=unused-argument
FILE: ml/notebook_examples/functions/main.py
function sequential_pipeline (line 20) | def sequential_pipeline(filename='gs://ml-pipeline-playground/shakespear...
function get_access_token (line 35) | def get_access_token():
function hosted_kfp_test (line 42) | def hosted_kfp_test(data, context):
FILE: ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/cifar10_datamodule.py
class CIFAR10DataModule (line 44) | class CIFAR10DataModule(pl.LightningDataModule):
method __init__ (line 45) | def __init__(self, **kwargs):
method prepare_data (line 75) | def prepare_data(self):
method getNumFiles (line 81) | def getNumFiles(input_path):
method setup (line 84) | def setup(self, stage=None):
method create_data_loader (line 141) | def create_data_loader(self, dataset, batch_size, num_workers):
method train_dataloader (line 144) | def train_dataloader(self):
method val_dataloader (line 155) | def val_dataloader(self):
method test_dataloader (line 166) | def test_dataloader(self):
FILE: ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/cifar10_train.py
class CIFAR10Classifier (line 25) | class CIFAR10Classifier(pl.LightningModule):
method __init__ (line 26) | def __init__(self, **kwargs):
method forward (line 49) | def forward(self, x):
method training_step (line 53) | def training_step(self, train_batch, batch_idx):
method test_step (line 68) | def test_step(self, test_batch, batch_idx):
method validation_step (line 86) | def validation_step(self, val_batch, batch_idx):
method configure_optimizers (line 101) | def configure_optimizers(self):
method makegrid (line 120) | def makegrid(self, output, numrows):
method showActivations (line 139) | def showActivations(self, x):
method training_epoch_end (line 151) | def training_epoch_end(self, outputs):
FILE: ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/components/base/base_component.py
class BaseComponent (line 4) | class BaseComponent(with_metaclass(abc.ABCMeta, object)):
method __init__ (line 5) | def __init__(self):
method _validate_component_class (line 9) | def _validate_component_class(cls):
FILE: ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/components/base/base_executor.py
class BaseExecutor (line 5) | class BaseExecutor(with_metaclass(abc.ABCMeta, object)):
method __init__ (line 7) | def __init__(self):
method Do (line 11) | def Do(self, model_class, data_module_class=None, data_module_args=Non...
FILE: ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/components/trainer/component.py
class Trainer (line 22) | class Trainer(BaseComponent):
method __init__ (line 23) | def __init__(self,
FILE: ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/components/trainer/executor.py
class Executor (line 21) | class Executor(GenericExecutor):
method __init__ (line 22) | def __init__(self):
method Do (line 25) | def Do(
FILE: ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/components/trainer/generic_executor.py
class GenericExecutor (line 17) | class GenericExecutor(BaseExecutor):
method Do (line 19) | def Do(self, model_class, data_module_class=None, data_module_args=Non...
method _GetFnArgs (line 23) | def _GetFnArgs(self):
FILE: ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/examples/cifar10/cifar10_datamodule.py
class CIFAR10DataModule (line 30) | class CIFAR10DataModule(pl.LightningDataModule):
method __init__ (line 31) | def __init__(self, **kwargs):
method prepare_data (line 61) | def prepare_data(self):
method getNumFiles (line 67) | def getNumFiles(input_path):
method setup (line 70) | def setup(self, stage=None):
method create_data_loader (line 127) | def create_data_loader(self, dataset, batch_size, num_workers):
method train_dataloader (line 130) | def train_dataloader(self):
method val_dataloader (line 141) | def val_dataloader(self):
method test_dataloader (line 152) | def test_dataloader(self):
FILE: ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/examples/cifar10/cifar10_train.py
class CIFAR10Classifier (line 11) | class CIFAR10Classifier(pl.LightningModule):
method __init__ (line 12) | def __init__(self, **kwargs):
method forward (line 35) | def forward(self, x):
method training_step (line 39) | def training_step(self, train_batch, batch_idx):
method test_step (line 54) | def test_step(self, test_batch, batch_idx):
method validation_step (line 72) | def validation_step(self, val_batch, batch_idx):
method configure_optimizers (line 87) | def configure_optimizers(self):
method makegrid (line 106) | def makegrid(self, output, numrows):
method showActivations (line 125) | def showActivations(self, x):
method training_epoch_end (line 137) | def training_epoch_end(self, outputs):
FILE: ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/examples/cifar10/utils.py
class Visualization (line 10) | class Visualization:
method __init__ (line 11) | def __init__(self):
method _generate_confusion_matrix_metadata (line 14) | def _generate_confusion_matrix_metadata(self, confusion_matrix_path, v...
method _write_ui_metadata (line 32) | def _write_ui_metadata(self, metadata_filepath, metadata_dict, key="ou...
method _enable_tensorboard_visualization (line 45) | def _enable_tensorboard_visualization(self, tensorboard_root):
method _visualize_accuracy_metric (line 60) | def _visualize_accuracy_metric(self, accuracy):
method _generate_confusion_matrix (line 70) | def _generate_confusion_matrix(self, confusion_matrix_dict):
method generate_visualization (line 100) | def generate_visualization(
Condensed preview — 232 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (5,990K chars).
[
{
"path": "LICENSE",
"chars": 11325,
"preview": "Apache License\n Version 2.0, January 2004\n http://www.apache.org/licens"
},
{
"path": "README.md",
"chars": 183,
"preview": "\nThis is a repo for small Google Cloud Platform (GCP) snippets and examples used in blog posts etc.\n\nContributions are n"
},
{
"path": "cloud_run/twilio_vision/Dockerfile",
"chars": 880,
"preview": "# Copyright 2019 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "cloud_run/twilio_vision/README.md",
"chars": 3634,
"preview": "\n# Cloud Run example: 'Twilio-vision'\n\nThis directory contains a simple [Cloud Run](https://cloud.google.com/run/docs/) "
},
{
"path": "cloud_run/twilio_vision/src/requirements.txt",
"chars": 69,
"preview": "google-api-python-client\noauth2client\ngunicorn\nflask\ntwilio\nrequests\n"
},
{
"path": "cloud_run/twilio_vision/src/whats_that.py",
"chars": 7176,
"preview": "#!/usr/bin/env python\n# Copyright 2019 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2"
},
{
"path": "datalab/facets/README.md",
"chars": 15,
"preview": "\n[To be added.]"
},
{
"path": "datalab/facets/facets_snippets.ipynb",
"chars": 2652,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"```\\n\",\n \"Copyright 2017 Google "
},
{
"path": "ml/README.md",
"chars": 179,
"preview": "\nThis directory contains various ML-related examples.\n\n(The Cloud Shell tutorials have moved [here](https://github.com/G"
},
{
"path": "ml/automl/tables/kfp_e2e/README.md",
"chars": 35835,
"preview": "<!-- START doctoc generated TOC please keep comment here to allow auto update -->\n\n\n\n# AutoML Tables: end-to-end workflo"
},
{
"path": "ml/automl/tables/kfp_e2e/create_dataset_for_tables/tables_component.py",
"chars": 3142,
"preview": "# Copyright 2020 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/automl/tables/kfp_e2e/create_dataset_for_tables/tables_component.yaml",
"chars": 5575,
"preview": "name: Automl create dataset for tables\ninputs:\n- name: gcp_project_id\n type: String\n- name: gcp_region\n type: String\n-"
},
{
"path": "ml/automl/tables/kfp_e2e/create_model_for_tables/tables_component.py",
"chars": 3584,
"preview": "# Copyright 2020 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/automl/tables/kfp_e2e/create_model_for_tables/tables_component.yaml",
"chars": 7899,
"preview": "name: Automl create model for tables\ninputs:\n- name: gcp_project_id\n type: String\n- name: gcp_region\n type: String\n- n"
},
{
"path": "ml/automl/tables/kfp_e2e/create_model_for_tables/tables_eval_component.py",
"chars": 6806,
"preview": "# Copyright 2020 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/automl/tables/kfp_e2e/create_model_for_tables/tables_eval_component.yaml",
"chars": 10527,
"preview": "name: Automl eval tables model\ninputs:\n- name: gcp_project_id\n type: String\n- name: gcp_region\n type: String\n- name: m"
},
{
"path": "ml/automl/tables/kfp_e2e/create_model_for_tables/tables_eval_metrics_component.py",
"chars": 7485,
"preview": "# Copyright 2020 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/automl/tables/kfp_e2e/create_model_for_tables/tables_eval_metrics_component.yaml",
"chars": 10068,
"preview": "name: Automl eval metrics\ninputs:\n- name: eval_data\n type: evals\n- name: thresholds\n type: String\n default: '{\"mean_a"
},
{
"path": "ml/automl/tables/kfp_e2e/deploy_model_for_tables/convert_oss.py",
"chars": 2102,
"preview": "# Copyright 2019 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/automl/tables/kfp_e2e/deploy_model_for_tables/exported_model_deploy.py",
"chars": 1980,
"preview": "# Copyright 2020 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/automl/tables/kfp_e2e/deploy_model_for_tables/instances.json",
"chars": 1484,
"preview": "{\n \"instances\": [\n {\n \"bike_id\": \"6179\",\n \"day_of_week\": \"6\",\n \"end_latitude\": 51.50379168,\n \"en"
},
{
"path": "ml/automl/tables/kfp_e2e/deploy_model_for_tables/model_serve_template.yaml",
"chars": 949,
"preview": "---\napiVersion: v1\nkind: Service\nmetadata:\n labels:\n app: MODEL_NAME\n name: MODEL_NAME\n namespace: NAMESPACE\nspec:"
},
{
"path": "ml/automl/tables/kfp_e2e/deploy_model_for_tables/tables_deploy_component.py",
"chars": 2946,
"preview": "# Copyright 2020 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/automl/tables/kfp_e2e/deploy_model_for_tables/tables_deploy_component.yaml",
"chars": 4835,
"preview": "name: Automl deploy tables model\ninputs:\n- name: gcp_project_id\n type: String\n- name: gcp_region\n type: String\n- name:"
},
{
"path": "ml/automl/tables/kfp_e2e/import_data_from_bigquery/tables_component.py",
"chars": 3697,
"preview": "# Copyright 2020 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/automl/tables/kfp_e2e/import_data_from_bigquery/tables_component.yaml",
"chars": 5807,
"preview": "name: Automl import data for tables\ninputs:\n- name: path\n type: String\n- name: gcp_project_id\n type: String\n- name: gc"
},
{
"path": "ml/automl/tables/kfp_e2e/import_data_from_bigquery/tables_schema_component.py",
"chars": 4251,
"preview": "# Copyright 2020 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/automl/tables/kfp_e2e/import_data_from_bigquery/tables_schema_component.yaml",
"chars": 7391,
"preview": "name: Automl set dataset schema\ninputs:\n- name: gcp_project_id\n type: String\n- name: gcp_region\n type: String\n- name: "
},
{
"path": "ml/automl/tables/kfp_e2e/tables_containers/model-service-launcher/Dockerfile",
"chars": 1823,
"preview": "# Copyright 2020 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/automl/tables/kfp_e2e/tables_containers/model-service-launcher/build.sh",
"chars": 1021,
"preview": "#!/bin/bash -e\n# Copyright 2020 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the"
},
{
"path": "ml/automl/tables/kfp_e2e/tables_pipeline_caip.py",
"chars": 4707,
"preview": "# Copyright 2020 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/automl/tables/kfp_e2e/tables_pipeline_kf.py",
"chars": 4929,
"preview": "# Copyright 2020 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/automl/tables/model_export/Dockerfile.template",
"chars": 714,
"preview": "# Copyright 2019 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/automl/tables/model_export/automl_tables_model_export_cloud_run.md",
"chars": 17760,
"preview": "\n# AutoML Tables: Exporting and serving your trained model to Cloud Run\n\n## Introduction\n\nGoogle Cloud’s [AutoML Tables]"
},
{
"path": "ml/automl/tables/model_export/convert_oss.py",
"chars": 2098,
"preview": "# Copyright 2019 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/automl/tables/model_export/instances.json",
"chars": 1484,
"preview": "{\n \"instances\": [\n {\n \"bike_id\": \"6179\",\n \"day_of_week\": \"6\",\n \"end_latitude\": 51.50379168,\n \"en"
},
{
"path": "ml/automl/tables/xai/README.md",
"chars": 1494,
"preview": "\n# AutoML Tables examples\n\nThis directory contains a notebook that shows examples of using the [AutoML Tables](https://c"
},
{
"path": "ml/automl/tables/xai/automl_tables_xai.ipynb",
"chars": 31503,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"## Introduction\\n\",\n \"\\n\",\n \""
},
{
"path": "ml/automl/tables/xai/bigquery_examples.md",
"chars": 2165,
"preview": "\n# Examples of inspecting the \"London bikes and weather\" test dataset in BigQuery\n\n\nAutoML Tables allows you to [export "
},
{
"path": "ml/census_train_and_eval/README.md",
"chars": 25924,
"preview": "\n# Easy distributed training with TensorFlow using `tf.estimator.train_and_evaluate` and Cloud ML Engine\n\n## Introductio"
},
{
"path": "ml/census_train_and_eval/config_custom_gpus.yaml",
"chars": 166,
"preview": "trainingInput:\n scaleTier: CUSTOM\n masterType: standard_p100\n workerType: standard_p100\n parameterServerType: standa"
},
{
"path": "ml/census_train_and_eval/hptuning_config.yaml",
"chars": 564,
"preview": "trainingInput:\n hyperparameters:\n goal: MAXIMIZE\n hyperparameterMetricTag: accuracy\n maxTrials: 6\n maxParal"
},
{
"path": "ml/census_train_and_eval/test.json",
"chars": 314,
"preview": "{\"age\": 25, \"workclass\": \" Private\", \"education\": \" 11th\", \"education_num\": 7, \"marital_status\": \" Never-married\", \"occu"
},
{
"path": "ml/census_train_and_eval/trainer/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "ml/census_train_and_eval/trainer/model.py",
"chars": 11260,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved. Licensed under the Apache\n# License, Version 2.0 (the \"License\"); you "
},
{
"path": "ml/census_train_and_eval/trainer/task.py",
"chars": 5258,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved. Licensed under the Apache\n# License, Version 2.0 (the \"License\"); you "
},
{
"path": "ml/census_train_and_eval/using_tf.estimator.train_and_evaluate.ipynb",
"chars": 42381,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# Easy distributed training with Te"
},
{
"path": "ml/kubeflow-pipelines/README.md",
"chars": 1710,
"preview": "\n# Kubeflow Pipelines examples\n\n[Kubeflow](https://www.kubeflow.org/) is an OSS project to support a machine learning st"
},
{
"path": "ml/kubeflow-pipelines/README_github_summ.md",
"chars": 23802,
"preview": "\n\n**This tutorial is out of date (and probably doesn't work). A newer version is [here](https://github.com/kubeflow/exam"
},
{
"path": "ml/kubeflow-pipelines/README_taxidata_examples.md",
"chars": 16558,
"preview": "\n# (Deprecated) Kubeflow Pipelines examples\n\n**These examples are not currently maintained and are probably no longer wo"
},
{
"path": "ml/kubeflow-pipelines/components/README.md",
"chars": 504,
"preview": "\n# Workflow Components\n\nThis directory contains the definitions of the Argo workflow steps used in the example workflows"
},
{
"path": "ml/kubeflow-pipelines/components/automl/container/Dockerfile",
"chars": 1555,
"preview": "# Copyright 2019 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/components/automl/container/build.sh",
"chars": 979,
"preview": "#!/bin/bash -e\n# Copyright 2019 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the"
},
{
"path": "ml/kubeflow-pipelines/components/automl/dataset_train/dataset_model.py",
"chars": 15498,
"preview": "#!/usr/bin/env python\n\n# Copyright 2019 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n#"
},
{
"path": "ml/kubeflow-pipelines/components/cmle/containers/base/Dockerfile",
"chars": 1546,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/components/cmle/containers/base/build.sh",
"chars": 720,
"preview": "#!/bin/bash -e\n# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the"
},
{
"path": "ml/kubeflow-pipelines/components/cmle/containers/cmle_deploy/Dockerfile",
"chars": 666,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/components/cmle/containers/cmle_deploy/build.sh",
"chars": 972,
"preview": "#!/bin/bash -e\n# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the"
},
{
"path": "ml/kubeflow-pipelines/components/cmle/deploy/deploy_model.py",
"chars": 2799,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/components/older/dataflow/containers/base/Dockerfile",
"chars": 1713,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/components/older/dataflow/containers/base/build.sh",
"chars": 901,
"preview": "#!/bin/bash -e\n# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the"
},
{
"path": "ml/kubeflow-pipelines/components/older/dataflow/containers/tfma/Dockerfile",
"chars": 932,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/components/older/dataflow/containers/tfma/build.sh",
"chars": 957,
"preview": "#!/bin/bash -e\n# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the"
},
{
"path": "ml/kubeflow-pipelines/components/older/dataflow/containers/tft/Dockerfile",
"chars": 931,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/components/older/dataflow/containers/tft/build.sh",
"chars": 1033,
"preview": "#!/bin/bash -e\n# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the"
},
{
"path": "ml/kubeflow-pipelines/components/older/dataflow/taxi_schema/taxi_schema/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "ml/kubeflow-pipelines/components/older/dataflow/taxi_schema/taxi_schema/taxi_schema.py",
"chars": 5338,
"preview": "# Copyright 2018 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/kubeflow-pipelines/components/older/dataflow/tfma/analysis/setup.py",
"chars": 1066,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/components/older/dataflow/tfma/model_analysis-taxi.py",
"chars": 7600,
"preview": "#!/bin/env python\n\n# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 "
},
{
"path": "ml/kubeflow-pipelines/components/older/dataflow/tfma/tfma_expers.ipynb",
"chars": 5489,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"execution_count\": null,\n \"metadata\": {},\n \"outputs\": [],\n \"source\": "
},
{
"path": "ml/kubeflow-pipelines/components/older/dataflow/tft/mcsv_coder.py",
"chars": 18894,
"preview": "# Copyright 2017 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/components/older/dataflow/tft/preprocessing.py",
"chars": 3764,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/components/older/dataflow/tft/preprocessing2.py",
"chars": 3758,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/components/older/dataflow/tft/schema.pbtxt",
"chars": 4365,
"preview": "feature {\n name: \"fare\"\n value_count {\n min: 1\n max: 1\n }\n type: FLOAT\n presence {\n min_fraction: 1.0\n "
},
{
"path": "ml/kubeflow-pipelines/components/older/dataflow/tft/taxi_preprocess_bq.py",
"chars": 13603,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/components/older/dataflow/tft/transform/setup.py",
"chars": 1094,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/components/older/kubeflow/containers/launcher/Dockerfile",
"chars": 2372,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/components/older/kubeflow/containers/launcher/build.sh",
"chars": 1030,
"preview": "#!/bin/bash -e\n# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the"
},
{
"path": "ml/kubeflow-pipelines/components/older/kubeflow/containers/tf-serving/Dockerfile",
"chars": 1929,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/components/older/kubeflow/containers/tf-serving/build.sh",
"chars": 1052,
"preview": "#!/bin/bash -e\n# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the"
},
{
"path": "ml/kubeflow-pipelines/components/older/kubeflow/containers/tf-serving-gh/Dockerfile",
"chars": 1929,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/components/older/kubeflow/containers/tf-serving-gh/build.sh",
"chars": 1035,
"preview": "#!/bin/bash -e\n# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the"
},
{
"path": "ml/kubeflow-pipelines/components/older/kubeflow/containers/trainer/Dockerfile",
"chars": 931,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/components/older/kubeflow/containers/trainer/build.sh",
"chars": 1067,
"preview": "#!/bin/bash -e\n# Copyright 2018 Google Inc. All rights reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the"
},
{
"path": "ml/kubeflow-pipelines/components/older/kubeflow/launcher/train.py",
"chars": 8978,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/components/older/kubeflow/launcher/train.template.yaml",
"chars": 2721,
"preview": "# Copyright 2018 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/kubeflow-pipelines/components/older/kubeflow/taxi_model/__init__.py",
"chars": 572,
"preview": "# Copyright 2018 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/kubeflow-pipelines/components/older/kubeflow/taxi_model/data/eval/data.csv",
"chars": 641080,
"preview": "pickup_community_area,fare,trip_start_month,trip_start_hour,trip_start_day,trip_start_timestamp,pickup_latitude,pickup_l"
},
{
"path": "ml/kubeflow-pipelines/components/older/kubeflow/taxi_model/data/train/data.csv",
"chars": 1281866,
"preview": "pickup_community_area,fare,trip_start_month,trip_start_hour,trip_start_day,trip_start_timestamp,pickup_latitude,pickup_l"
},
{
"path": "ml/kubeflow-pipelines/components/older/kubeflow/taxi_model/schema.pbtxt",
"chars": 4365,
"preview": "feature {\n name: \"fare\"\n value_count {\n min: 1\n max: 1\n }\n type: FLOAT\n presence {\n min_fraction: 1.0\n "
},
{
"path": "ml/kubeflow-pipelines/components/older/kubeflow/taxi_model/setup.py",
"chars": 942,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/components/older/kubeflow/taxi_model/trainer/__init__.py",
"chars": 597,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/components/older/kubeflow/taxi_model/trainer/model.py",
"chars": 7093,
"preview": "# Copyright 2018 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/kubeflow-pipelines/components/older/kubeflow/taxi_model/trainer/task.py",
"chars": 5884,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/components/older/kubeflow/taxi_model/trainer/taxi.py",
"chars": 5337,
"preview": "# Copyright 2018 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/kubeflow-pipelines/components/older/kubeflow/tf-serving/chicago_taxi_client.py",
"chars": 5382,
"preview": "# Copyright 2018 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/kubeflow-pipelines/components/older/kubeflow/tf-serving/deploy-tf-serve.py",
"chars": 3660,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/components/older/kubeflow/tf-serving/schema.pbtxt",
"chars": 4365,
"preview": "feature {\n name: \"fare\"\n value_count {\n min: 1\n max: 1\n }\n type: FLOAT\n presence {\n min_fraction: 1.0\n "
},
{
"path": "ml/kubeflow-pipelines/components/older/kubeflow/tf-serving/tf-serve-template.yaml",
"chars": 1267,
"preview": "---\napiVersion: v1\nkind: Service\nmetadata:\n labels:\n app: MODEL_NAME\n name: MODEL_NAME\n namespace: KUBEFLOW_NAMESP"
},
{
"path": "ml/kubeflow-pipelines/components/older/kubeflow/tf-serving-gh/deploy-tf-serve.py",
"chars": 3573,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/components/older/kubeflow/tf-serving-gh/tf-serve-template.yaml",
"chars": 1550,
"preview": "---\napiVersion: v1\nkind: Service\nmetadata:\n annotations:\n getambassador.io/config: |-\n ---\n apiVersion: am"
},
{
"path": "ml/kubeflow-pipelines/components/older/t2t/containers/base/Dockerfile",
"chars": 1657,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/components/older/t2t/containers/base/build.sh",
"chars": 722,
"preview": "#!/bin/bash -e\n# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the"
},
{
"path": "ml/kubeflow-pipelines/components/older/t2t/containers/t2t_app/Dockerfile",
"chars": 1757,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/components/older/t2t/containers/t2t_app/build.sh",
"chars": 989,
"preview": "#!/bin/bash -e\n# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the"
},
{
"path": "ml/kubeflow-pipelines/components/older/t2t/containers/t2t_proc/Dockerfile",
"chars": 1763,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/components/older/t2t/containers/t2t_proc/build.sh",
"chars": 993,
"preview": "#!/bin/bash -e\n# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the"
},
{
"path": "ml/kubeflow-pipelines/components/older/t2t/containers/t2t_train/Dockerfile",
"chars": 664,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/components/older/t2t/containers/t2t_train/build.sh",
"chars": 976,
"preview": "#!/bin/bash -e\n# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the"
},
{
"path": "ml/kubeflow-pipelines/components/older/t2t/containers/webapp-launcher/Dockerfile",
"chars": 1927,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/components/older/t2t/containers/webapp-launcher/build.sh",
"chars": 1033,
"preview": "#!/bin/bash -e\n# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the"
},
{
"path": "ml/kubeflow-pipelines/components/older/t2t/t2t-app/app/ghsumm/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "ml/kubeflow-pipelines/components/older/t2t/t2t-app/app/ghsumm/setup.py",
"chars": 384,
"preview": "from setuptools import find_packages\nfrom setuptools import setup\n\nREQUIRED_PACKAGES = [\n 'tensor2tensor'\n]\n\nsetup(\n "
},
{
"path": "ml/kubeflow-pipelines/components/older/t2t/t2t-app/app/ghsumm/trainer/__init__.py",
"chars": 22,
"preview": "from . import problem\n"
},
{
"path": "ml/kubeflow-pipelines/components/older/t2t/t2t-app/app/ghsumm/trainer/problem.py",
"chars": 1355,
"preview": "import csv\n\nfrom tensor2tensor.utils import registry\nfrom tensor2tensor.data_generators import problem\nfrom tensor2tenso"
},
{
"path": "ml/kubeflow-pipelines/components/older/t2t/t2t-app/app/github_issues_sample.csv",
"chars": 2699744,
"preview": ",issue_url,issue_title,body\n505832,\"\"\"https://github.com/citra-emu/citra/issues/2736\"\"\",feature request the mouse input "
},
{
"path": "ml/kubeflow-pipelines/components/older/t2t/t2t-app/app/main.py",
"chars": 5423,
"preview": "# -*- coding: utf-8 -*-\n\n# Copyright 2018 Google Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\")"
},
{
"path": "ml/kubeflow-pipelines/components/older/t2t/t2t-app/app/templates/index.html",
"chars": 4061,
"preview": "<!doctype html>\n<html lang=\"en\">\n <head>\n <!-- Required meta tags -->\n <meta charset=\"utf-8\">\n <meta name=\"vie"
},
{
"path": "ml/kubeflow-pipelines/components/older/t2t/t2t-proc/ghsumm/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "ml/kubeflow-pipelines/components/older/t2t/t2t-proc/ghsumm/setup.py",
"chars": 384,
"preview": "from setuptools import find_packages\nfrom setuptools import setup\n\nREQUIRED_PACKAGES = [\n 'tensor2tensor'\n]\n\nsetup(\n "
},
{
"path": "ml/kubeflow-pipelines/components/older/t2t/t2t-proc/ghsumm/trainer/__init__.py",
"chars": 22,
"preview": "from . import problem\n"
},
{
"path": "ml/kubeflow-pipelines/components/older/t2t/t2t-proc/ghsumm/trainer/problem.py",
"chars": 1357,
"preview": "import csv\n\nfrom tensor2tensor.utils import registry\nfrom tensor2tensor.data_generators import problem\nfrom tensor2tenso"
},
{
"path": "ml/kubeflow-pipelines/components/older/t2t/t2t-train/ghsumm/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "ml/kubeflow-pipelines/components/older/t2t/t2t-train/ghsumm/setup.py",
"chars": 384,
"preview": "from setuptools import find_packages\nfrom setuptools import setup\n\nREQUIRED_PACKAGES = [\n 'tensor2tensor'\n]\n\nsetup(\n "
},
{
"path": "ml/kubeflow-pipelines/components/older/t2t/t2t-train/ghsumm/trainer/__init__.py",
"chars": 22,
"preview": "from . import problem\n"
},
{
"path": "ml/kubeflow-pipelines/components/older/t2t/t2t-train/ghsumm/trainer/problem.py",
"chars": 1355,
"preview": "import csv\n\nfrom tensor2tensor.utils import registry\nfrom tensor2tensor.data_generators import problem\nfrom tensor2tenso"
},
{
"path": "ml/kubeflow-pipelines/components/older/t2t/t2t-train/train_model.py",
"chars": 2959,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/components/older/t2t/webapp-launcher/deploy-webapp.py",
"chars": 3090,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/components/older/t2t/webapp-launcher/t2tapp-template.yaml",
"chars": 1244,
"preview": "apiVersion: v1\nkind: Service\nmetadata:\n annotations:\n getambassador.io/config: |-\n ---\n apiVersion: ambass"
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/README.md",
"chars": 25939,
"preview": "# Kubeflow Pipelines Distributed Keras Tuner example\n\n> **Note**: this example may take a long time to run, and **incur "
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/eval_metrics_component.yaml",
"chars": 2614,
"preview": "name: Eval metrics\ninputs:\n- {name: metrics, type: String}\n- {name: thresholds, type: String}\noutputs:\n- {name: deploy, "
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/bikesw_training/bikes_weather_limited.py",
"chars": 6301,
"preview": "\n# Copyright 2020 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n#"
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/bikesw_training/bw_hptune_standalone.py",
"chars": 5967,
"preview": "# Copyright 2020 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/bikesw_training/bwmodel/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/bikesw_training/bwmodel/model.py",
"chars": 5134,
"preview": "# Copyright 2020 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/bikesw_training/deploy_tuner.py",
"chars": 6635,
"preview": "# Copyright 2020 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/bikesw_training/eval_metrics.py",
"chars": 1921,
"preview": "# Copyright 2020 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/bikesw_training/kchief_deployment_templ.yaml",
"chars": 1488,
"preview": "---\n apiVersion: v1\n kind: Service\n metadata:\n labels:\n app: KTUNER_CHIEF\n apptype: ktuner-chief\n nam"
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/bikesw_training/ktuners_deployment_templ.yaml",
"chars": 1067,
"preview": "---\napiVersion: batch/v1\nkind: Job\nmetadata:\n labels:\n app: ktuner-tuner\n name: KTUNER_DEP_NAME\n namespace: NAMESP"
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/cloudbuild.yaml",
"chars": 2843,
"preview": "# Copyright 2020 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training/Dockerfile",
"chars": 801,
"preview": "# Copyright 2019 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training/build.sh",
"chars": 993,
"preview": "#!/bin/bash -e\n# Copyright 2019 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the"
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training/cloudbuild.yaml",
"chars": 1104,
"preview": "# Copyright 2020 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training/copydir.sh",
"chars": 651,
"preview": "# Copyright 2020 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training_hptune/Dockerfile",
"chars": 823,
"preview": "# Copyright 2019 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training_hptune/build.sh",
"chars": 1018,
"preview": "#!/bin/bash -e\n# Copyright 2019 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the"
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training_hptune/cloudbuild.yaml",
"chars": 1171,
"preview": "# Copyright 2020 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training_hptune/copydir.sh",
"chars": 651,
"preview": "# Copyright 2020 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/deploy_jobs/Dockerfile",
"chars": 1726,
"preview": "# Copyright 2020 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/deploy_jobs/build.sh",
"chars": 1011,
"preview": "#!/bin/bash -e\n# Copyright 2020 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the"
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/deploy_jobs/cloudbuild.yaml",
"chars": 1144,
"preview": "# Copyright 2020 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/deploy_jobs/copydir.sh",
"chars": 651,
"preview": "# Copyright 2020 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/tf-serving/Dockerfile",
"chars": 1595,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/tf-serving/build.sh",
"chars": 996,
"preview": "#!/bin/bash -e\n# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the"
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/tf-serving/cloudbuild.yaml",
"chars": 1114,
"preview": "# Copyright 2020 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/tf-serving/copydir.sh",
"chars": 648,
"preview": "# Copyright 2020 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/tf-serving/deploy-tfserve.py",
"chars": 3793,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/tf-serving/tf-serve-template.yaml",
"chars": 1380,
"preview": "---\napiVersion: v1\nkind: Service\nmetadata:\n labels:\n app: SERVICE_NAME\n apptype: tf-serving\n name: SERVICE_NAME\n"
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/serve_component.yaml",
"chars": 1133,
"preview": "# Copyright 2019 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/tfdv/Dockerfile",
"chars": 884,
"preview": "# Copyright 2020 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/tfdv/requirements.txt",
"chars": 40,
"preview": "ipython==7.16.1\nipython-genutils==0.2.0\n"
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/tfdv/tfdv.py",
"chars": 2924,
"preview": "# Copyright 2021 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/tfdv/tfdv_compare.py",
"chars": 2145,
"preview": "# Copyright 2021 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/tfdv_component.yaml",
"chars": 5878,
"preview": "name: Generate tfdv stats\ninputs:\n- {name: input_data, type: String}\n- {name: output_path, type: String}\n- {name: job_na"
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/tfdv_drift_component.yaml",
"chars": 3189,
"preview": "name: Tfdv detect drift\ninputs:\n- {name: stats_older_path, type: String}\n- {name: stats_new_path, type: String}\noutputs:"
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/components/train_component.yaml",
"chars": 1839,
"preview": "# Copyright 2019 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/example_pipelines/bw_ktune.py",
"chars": 3433,
"preview": "# Copyright 2019 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/example_pipelines/bw_ktune_metrics.py",
"chars": 3794,
"preview": "# Copyright 2019 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/example_pipelines/bw_tfdv.py",
"chars": 4931,
"preview": "# Copyright 2019 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/example_pipelines/bw_train.py",
"chars": 2486,
"preview": "# Copyright 2019 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/example_pipelines/bw_train_metrics.py",
"chars": 2861,
"preview": "# Copyright 2019 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/kubeflow-pipelines/keras_tuner/notebooks/metrics_eval_component.ipynb",
"chars": 23060,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# Keras Tuner KFP example, part II:"
},
{
"path": "ml/kubeflow-pipelines/samples/automl/README.md",
"chars": 735,
"preview": "\nThe pipeline in this directory shows how you can make calls to the AutoML Vision API to build a pipeline that creates a"
},
{
"path": "ml/kubeflow-pipelines/samples/automl/dataset_and_train.py",
"chars": 2375,
"preview": "# Copyright 2019 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/kubeflow-pipelines/samples/kubeflow-tf/README.md",
"chars": 171,
"preview": "\nThe example pipelines that were in this directory have been moved to the [`older`](./older) subdirectory.\nThey are not "
},
{
"path": "ml/kubeflow-pipelines/samples/kubeflow-tf/older/README.md",
"chars": 4203,
"preview": "\n# Run the example pipelines\n\n**These examples are not currently maintained and are probably out of date**.\n\nSee the top"
},
{
"path": "ml/kubeflow-pipelines/samples/kubeflow-tf/older/gh_summ.py",
"chars": 2825,
"preview": "# Copyright 2018 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/kubeflow-pipelines/samples/kubeflow-tf/older/gh_summ_serve.py",
"chars": 1560,
"preview": "# Copyright 2018 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/kubeflow-pipelines/samples/kubeflow-tf/older/pipelines-kubecon.ipynb",
"chars": 15046,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# KubeFlow Pipeline: Github Issue S"
},
{
"path": "ml/kubeflow-pipelines/samples/kubeflow-tf/older/workflow1.py",
"chars": 9690,
"preview": "# Copyright 2018 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/kubeflow-pipelines/samples/kubeflow-tf/older/workflow2.py",
"chars": 9228,
"preview": "# Copyright 2018 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/kubeflow-pipelines/sbtb/README.md",
"chars": 6109,
"preview": "\n# Kubeflow Pipeline: \"Bikes & Weather\" training + serving\n\n## Introduction\n\nThis Kubeflow pipeline trains a TensorFlow "
},
{
"path": "ml/kubeflow-pipelines/sbtb/components/kubeflow-resources/bikesw_training/bikes_weather.py",
"chars": 10091,
"preview": "\n# Copyright 2019 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n#"
},
{
"path": "ml/kubeflow-pipelines/sbtb/components/kubeflow-resources/containers/bikesw_training/Dockerfile",
"chars": 753,
"preview": "# Copyright 2019 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/sbtb/components/kubeflow-resources/containers/bikesw_training/build.sh",
"chars": 1017,
"preview": "#!/bin/bash -e\n# Copyright 2019 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the"
},
{
"path": "ml/kubeflow-pipelines/sbtb/components/kubeflow-resources/containers/tf-serving/Dockerfile",
"chars": 1931,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/sbtb/components/kubeflow-resources/containers/tf-serving/build.sh",
"chars": 996,
"preview": "#!/bin/bash -e\n# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the"
},
{
"path": "ml/kubeflow-pipelines/sbtb/components/kubeflow-resources/tf-serving/deploy-tfserve.py",
"chars": 3755,
"preview": "# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "ml/kubeflow-pipelines/sbtb/components/kubeflow-resources/tf-serving/tf-serve-template.yaml",
"chars": 2012,
"preview": "---\napiVersion: v1\nkind: Service\nmetadata:\n annotations:\n getambassador.io/config: |-\n ---\n apiVersion: am"
},
{
"path": "ml/kubeflow-pipelines/sbtb/components/serve_component.yaml",
"chars": 1043,
"preview": "# Copyright 2019 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/kubeflow-pipelines/sbtb/components/train_component.yaml",
"chars": 1464,
"preview": "# Copyright 2019 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/kubeflow-pipelines/sbtb/example_pipelines/bw.py",
"chars": 2188,
"preview": "# Copyright 2019 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "ml/notebook_examples/TF_linear_regressor.ipynb",
"chars": 6893,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"Copyright 2017 Google Inc. All Righ"
},
{
"path": "ml/notebook_examples/caipp/caipp_connect.ipynb",
"chars": 5494,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# Remote deployment of a Cloud AI P"
},
{
"path": "ml/notebook_examples/caipp/kfp_in_a_notebook.ipynb",
"chars": 31824,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# Using AI Platform Pipelines (Host"
},
{
"path": "ml/notebook_examples/functions/hosted_kfp_gcf.ipynb",
"chars": 15021,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# Using Google Cloud Functions to s"
},
{
"path": "ml/notebook_examples/functions/main.py",
"chars": 2322,
"preview": "import logging\nimport datetime\nimport logging\nimport time\n \nimport kfp\nimport kfp.compiler as compiler\nimport kfp.dsl as"
},
{
"path": "ml/notebook_examples/functions/requirements.txt",
"chars": 4,
"preview": "kfp\n"
},
{
"path": "ml/notebook_examples/hosted_kfp/event_triggered_kfp_pipeline_bw.ipynb",
"chars": 52823,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {\n \"id\": \"-ZEcVTiB-RNl\"\n },\n \"source\": [\n \"# Even"
},
{
"path": "ml/notebook_examples/keras_linear_regressor.ipynb",
"chars": 2715,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"Copyright 2017 Google Inc. All Righ"
}
]
// ... and 32 more files (download for full content)
About this extraction
This page contains the full source code of the amygdala/code-snippets GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 232 files (5.6 MB), approximately 1.5M tokens, and a symbol index with 237 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.